LLVM: lib/Transforms/Scalar/LoopStrengthReduce.cpp Source File

//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This transformation analyzes and transforms the induction variables (and

// computations derived from them) into forms suitable for efficient execution

// on the target.

//

// This pass performs a strength reduction on array references inside loops that

// have as one or more of their components the loop induction variable, it

// rewrites expressions to take advantage of scaled-index addressing modes

// available on the target, and it performs a variety of other optimizations

// related to loop induction variables.

//

// Terminology note: this code has a lot of handling for "post-increment" or

// "post-inc" users. This is not talking about post-increment addressing modes;

// it is instead talking about code like this:

//

//   %i = phi [ 0, %entry ], [ %i.next, %latch ]

//   ...

//   %i.next = add %i, 1

//   %c = icmp eq %i.next, %n

//

// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however

// it's useful to think about these as the same register, with some uses using

// the value of the register before the add and some using it after. In this

// example, the icmp is a post-increment user, since it uses %i.next, which is

// the value of the induction variable after the increment. The other common

// case of post-increment users is users outside the loop.

//

// TODO: More sophistication in the way Formulae are generated and filtered.

//

// TODO: Handle multiple loops at a time.

//

// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead

//       of a GlobalValue?

//

// TODO: When truncation is free, truncate ICmp users' operands to make it a

//       smaller encoding (on x86 at least).

//

// TODO: When a negated register is used by an add (such as in a list of

//       multiple base registers, or as the increment expression in an addrec),

//       we may not actually need both reg and (-1 * reg) in registers; the

//       negation can be implemented by using a sub instead of an add. The

//       lack of support for taking this into consideration when making

//       register pressure decisions is partly worked around by the "Special"

//       use kind.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/DenseSet.h"

#include "llvm/ADT/PointerIntPair.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SetVector.h"

#include "llvm/ADT/SmallBitVector.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/iterator_range.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/DomTreeUpdater.h"

#include "llvm/Analysis/IVUsers.h"

#include "llvm/Analysis/LoopAnalysisManager.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/LoopPass.h"

#include "llvm/Analysis/MemorySSA.h"

#include "llvm/Analysis/MemorySSAUpdater.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/Analysis/ScalarEvolutionNormalization.h"

#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/BinaryFormat/Dwarf.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DebugInfoMetadata.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/Operator.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Use.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/IR/ValueHandle.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include <algorithm>

#include <cassert>

#include <cstddef>

#include <cstdint>

#include <iterator>

#include <limits>

#include <map>

#include <numeric>

#include <optional>

#include <utility>


using namespace llvm;

using namespace SCEVPatternMatch;


#define DEBUG_TYPE "loop-reduce"


/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for

/// bail out. This threshold is far beyond the number of users that LSR can

/// conceivably solve, so it should not affect generated code, but catches the

/// worst cases before LSR burns too much compile time and stack space.

static const unsigned MaxIVUsers = 200;


/// Limit the size of expression that SCEV-based salvaging will attempt to

/// translate into a DIExpression.

/// Choose a maximum size such that debuginfo is not excessively increased and

/// the salvaging is not too expensive for the compiler.

static const unsigned MaxSCEVSalvageExpressionSize = 64;


// Cleanup congruent phis after LSR phi expansion.

static cl::opt<bool> EnablePhiElim(

  "enable-lsr-phielim", cl::Hidden, cl::init(true),

  cl::desc("Enable LSR phi elimination"));


// The flag adds instruction count to solutions cost comparison.

static cl::opt<bool> InsnsCost(

  "lsr-insns-cost", cl::Hidden, cl::init(true),

  cl::desc("Add instruction count to a LSR cost model"));


// Flag to choose how to narrow complex lsr solution

static cl::opt<bool> LSRExpNarrow(

  "lsr-exp-narrow", cl::Hidden, cl::init(false),

  cl::desc("Narrow LSR complex solution using"

           " expectation of registers number"));


// Flag to narrow search space by filtering non-optimal formulae with

// the same ScaledReg and Scale.

static cl::opt<bool> FilterSameScaledReg(

    "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),

    cl::desc("Narrow LSR search space by filtering non-optimal formulae"

             " with the same ScaledReg and Scale"));


static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(

    "lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),

    cl::desc("A flag that overrides the target's preferred addressing mode."),

    cl::values(

        clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"),

        clEnumValN(TTI::AMK_PreIndexed, "preindexed",

                   "Prefer pre-indexed addressing mode"),

        clEnumValN(TTI::AMK_PostIndexed, "postindexed",

                   "Prefer post-indexed addressing mode"),

        clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")));


static cl::opt<unsigned> ComplexityLimit(

  "lsr-complexity-limit", cl::Hidden,

  cl::init(std::numeric_limits<uint16_t>::max()),

  cl::desc("LSR search space complexity limit"));


static cl::opt<unsigned> SetupCostDepthLimit(

    "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),

    cl::desc("The limit on recursion depth for LSRs setup cost"));


static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(

    "lsr-drop-solution", cl::Hidden,

    cl::desc("Attempt to drop solution if it is less profitable"));


static cl::opt<bool> EnableVScaleImmediates(

    "lsr-enable-vscale-immediates", cl::Hidden, cl::init(true),

    cl::desc("Enable analysis of vscale-relative immediates in LSR"));


static cl::opt<bool> DropScaledForVScale(

    "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),

    cl::desc("Avoid using scaled registers with vscale-relative addressing"));


#ifndef NDEBUG

// Stress test IV chain generation.

static cl::opt<bool> StressIVChain(

  "stress-ivchain", cl::Hidden, cl::init(false),

  cl::desc("Stress test LSR IV chains"));

#else

static bool StressIVChain = false;

#endif


namespace {


struct MemAccessTy {

  /// Used in situations where the accessed memory type is unknown.

  static const unsigned UnknownAddressSpace =

      std::numeric_limits<unsigned>::max();


  Type *MemTy = nullptr;

  unsigned AddrSpace = UnknownAddressSpace;


  MemAccessTy() = default;

  MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}


  bool operator==(MemAccessTy Other) const {

    return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;

  }


  bool operator!=(MemAccessTy Other) const { return !(*this == Other); }


  static MemAccessTy getUnknown(LLVMContext &Ctx,

                                unsigned AS = UnknownAddressSpace) {

    return MemAccessTy(Type::getVoidTy(Ctx), AS);

  }


  Type *getType() { return MemTy; }

};


/// This class holds data which is used to order reuse candidates.

class RegSortData {

public:

  /// This represents the set of LSRUse indices which reference

  /// a particular register.

  SmallBitVector UsedByIndices;


  void print(raw_ostream &OS) const;

  void dump() const;

};


// An offset from an address that is either scalable or fixed. Used for

// per-target optimizations of addressing modes.

class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> {

  constexpr Immediate(ScalarTy MinVal, bool Scalable)

      : FixedOrScalableQuantity(MinVal, Scalable) {}


  constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)

      : FixedOrScalableQuantity(V) {}


public:

  constexpr Immediate() = delete;


  static constexpr Immediate getFixed(ScalarTy MinVal) {

    return {MinVal, false};

  }

  static constexpr Immediate getScalable(ScalarTy MinVal) {

    return {MinVal, true};

  }

  static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {

    return {MinVal, Scalable};

  }

  static constexpr Immediate getZero() { return {0, false}; }

  static constexpr Immediate getFixedMin() {

    return {std::numeric_limits<int64_t>::min(), false};

  }

  static constexpr Immediate getFixedMax() {

    return {std::numeric_limits<int64_t>::max(), false};

  }

  static constexpr Immediate getScalableMin() {

    return {std::numeric_limits<int64_t>::min(), true};

  }

  static constexpr Immediate getScalableMax() {

    return {std::numeric_limits<int64_t>::max(), true};

  }


  constexpr bool isLessThanZero() const { return Quantity < 0; }


  constexpr bool isGreaterThanZero() const { return Quantity > 0; }


  constexpr bool isCompatibleImmediate(const Immediate &Imm) const {

    return isZero() || Imm.isZero() || Imm.Scalable == Scalable;

  }


  constexpr bool isMin() const {

    return Quantity == std::numeric_limits<ScalarTy>::min();

  }


  constexpr bool isMax() const {

    return Quantity == std::numeric_limits<ScalarTy>::max();

  }


  // Arithmetic 'operators' that cast to unsigned types first.

  constexpr Immediate addUnsigned(const Immediate &RHS) const {

    assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");

    ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();

    return {Value, Scalable || RHS.isScalable()};

  }


  constexpr Immediate subUnsigned(const Immediate &RHS) const {

    assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");

    ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();

    return {Value, Scalable || RHS.isScalable()};

  }


  // Scale the quantity by a constant without caring about runtime scalability.

  constexpr Immediate mulUnsigned(const ScalarTy RHS) const {

    ScalarTy Value = (uint64_t)Quantity * RHS;

    return {Value, Scalable};

  }


  // Helpers for generating SCEVs with vscale terms where needed.

  const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const {

    const SCEV *S = SE.getConstant(Ty, Quantity);

    if (Scalable)

      S = SE.getMulExpr(S, SE.getVScale(S->getType()));

    return S;

  }


  const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const {

    const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);

    if (Scalable)

      NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType()));

    return NegS;

  }


  const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const {

    const SCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity));

    if (Scalable)

      SU = SE.getMulExpr(SU, SE.getVScale(SU->getType()));

    return SU;

  }

};


// This is needed for the Compare type of std::map when Immediate is used

// as a key. We don't need it to be fully correct against any value of vscale,

// just to make sure that vscale-related terms in the map are considered against

// each other rather than being mixed up and potentially missing opportunities.

struct KeyOrderTargetImmediate {

  bool operator()(const Immediate &LHS, const Immediate &RHS) const {

    if (LHS.isScalable() && !RHS.isScalable())

      return false;

    if (!LHS.isScalable() && RHS.isScalable())

      return true;

    return LHS.getKnownMinValue() < RHS.getKnownMinValue();

  }

};


// This would be nicer if we could be generic instead of directly using size_t,

// but there doesn't seem to be a type trait for is_orderable or

// is_lessthan_comparable or similar.

struct KeyOrderSizeTAndImmediate {

  bool operator()(const std::pair<size_t, Immediate> &LHS,

                  const std::pair<size_t, Immediate> &RHS) const {

    size_t LSize = LHS.first;

    size_t RSize = RHS.first;

    if (LSize != RSize)

      return LSize < RSize;

    return KeyOrderTargetImmediate()(LHS.second, RHS.second);

  }

};

} // end anonymous namespace


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void RegSortData::print(raw_ostream &OS) const {

  OS << "[NumUses=" << UsedByIndices.count() << ']';

}


LLVM_DUMP_METHOD void RegSortData::dump() const {

  print(errs()); errs() << '\n';

}

#endif


namespace {


/// Map register candidates to information about how they are used.

class RegUseTracker {

  using RegUsesTy = DenseMap<const SCEV *, RegSortData>;


  RegUsesTy RegUsesMap;

  SmallVector<const SCEV *, 16> RegSequence;


public:

  void countRegister(const SCEV *Reg, size_t LUIdx);

  void dropRegister(const SCEV *Reg, size_t LUIdx);

  void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);


  bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;


  const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;


  void clear();


  using iterator = SmallVectorImpl<const SCEV *>::iterator;

  using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;


  iterator begin() { return RegSequence.begin(); }

  iterator end()   { return RegSequence.end(); }

  const_iterator begin() const { return RegSequence.begin(); }

  const_iterator end() const   { return RegSequence.end(); }

};


} // end anonymous namespace


void

RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {

  std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(Reg);

  RegSortData &RSD = Pair.first->second;

  if (Pair.second)

    RegSequence.push_back(Reg);

  RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));

  RSD.UsedByIndices.set(LUIdx);

}


void

RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {

  RegUsesTy::iterator It = RegUsesMap.find(Reg);

  assert(It != RegUsesMap.end());

  RegSortData &RSD = It->second;

  assert(RSD.UsedByIndices.size() > LUIdx);

  RSD.UsedByIndices.reset(LUIdx);

}


void

RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {

  assert(LUIdx <= LastLUIdx);


  // Update RegUses. The data structure is not optimized for this purpose;

  // we must iterate through it and update each of the bit vectors.

  for (auto &Pair : RegUsesMap) {

    SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;

    if (LUIdx < UsedByIndices.size())

      UsedByIndices[LUIdx] =

        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;

    UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));

  }

}


bool

RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {

  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);

  if (I == RegUsesMap.end())

    return false;

  const SmallBitVector &UsedByIndices = I->second.UsedByIndices;

  int i = UsedByIndices.find_first();

  if (i == -1) return false;

  if ((size_t)i != LUIdx) return true;

  return UsedByIndices.find_next(i) != -1;

}


const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {

  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);

  assert(I != RegUsesMap.end() && "Unknown register!");

  return I->second.UsedByIndices;

}


void RegUseTracker::clear() {

  RegUsesMap.clear();

  RegSequence.clear();

}


namespace {


/// This class holds information that describes a formula for computing

/// satisfying a use. It may include broken-out immediates and scaled registers.

struct Formula {

  /// Global base address used for complex addressing.

  GlobalValue *BaseGV = nullptr;


  /// Base offset for complex addressing.

  Immediate BaseOffset = Immediate::getZero();


  /// Whether any complex addressing has a base register.

  bool HasBaseReg = false;


  /// The scale of any complex addressing.

  int64_t Scale = 0;


  /// The list of "base" registers for this use. When this is non-empty. The

  /// canonical representation of a formula is

  /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and

  /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().

  /// 3. The reg containing recurrent expr related with currect loop in the

  /// formula should be put in the ScaledReg.

  /// #1 enforces that the scaled register is always used when at least two

  /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.

  /// #2 enforces that 1 * reg is reg.

  /// #3 ensures invariant regs with respect to current loop can be combined

  /// together in LSR codegen.

  /// This invariant can be temporarily broken while building a formula.

  /// However, every formula inserted into the LSRInstance must be in canonical

  /// form.

  SmallVector<const SCEV *, 4> BaseRegs;


  /// The 'scaled' register for this use. This should be non-null when Scale is

  /// not zero.

  const SCEV *ScaledReg = nullptr;


  /// An additional constant offset which added near the use. This requires a

  /// temporary register, but the offset itself can live in an add immediate

  /// field rather than a register.

  Immediate UnfoldedOffset = Immediate::getZero();


  Formula() = default;


  void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);


  bool isCanonical(const Loop &L) const;


  void canonicalize(const Loop &L);


  bool unscale();


  bool hasZeroEnd() const;


  bool countsDownToZero() const;


  size_t getNumRegs() const;

  Type *getType() const;


  void deleteBaseReg(const SCEV *&S);


  bool referencesReg(const SCEV *S) const;

  bool hasRegsUsedByUsesOtherThan(size_t LUIdx,

                                  const RegUseTracker &RegUses) const;


  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


/// Recursion helper for initialMatch.


static void DoInitialMatch(const SCEV *S, Loop *L,

                           SmallVectorImpl<const SCEV *> &Good,

                           SmallVectorImpl<const SCEV *> &Bad,

                           ScalarEvolution &SE) {

  // Collect expressions which properly dominate the loop header.

  if (SE.properlyDominates(S, L->getHeader())) {

    Good.push_back(S);

    return;

  }


  // Look at add operands.

  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    for (const SCEV *S : Add->operands())

      DoInitialMatch(S, L, Good, Bad, SE);

    return;

  }


  // Look at addrec operands.

  const SCEV *Start, *Step;

  const Loop *ARLoop;

  if (match(S,

            m_scev_AffineAddRec(m_SCEV(Start), m_SCEV(Step), m_Loop(ARLoop))) &&

      !Start->isZero()) {

    DoInitialMatch(Start, L, Good, Bad, SE);

    DoInitialMatch(SE.getAddRecExpr(SE.getConstant(S->getType(), 0), Step,

                                    // FIXME: AR->getNoWrapFlags()

                                    ARLoop, SCEV::FlagAnyWrap),

                   L, Good, Bad, SE);

    return;

  }


  // Handle a multiplication by -1 (negation) if it didn't fold.

  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))

    if (Mul->getOperand(0)->isAllOnesValue()) {

      SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));

      const SCEV *NewMul = SE.getMulExpr(Ops);


      SmallVector<const SCEV *, 4> MyGood;

      SmallVector<const SCEV *, 4> MyBad;

      DoInitialMatch(NewMul, L, MyGood, MyBad, SE);

      const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(

        SE.getEffectiveSCEVType(NewMul->getType())));

      for (const SCEV *S : MyGood)

        Good.push_back(SE.getMulExpr(NegOne, S));

      for (const SCEV *S : MyBad)

        Bad.push_back(SE.getMulExpr(NegOne, S));

      return;

    }


  // Ok, we can't do anything interesting. Just stuff the whole thing into a

  // register and hope for the best.

  Bad.push_back(S);

}


/// Incorporate loop-variant parts of S into this Formula, attempting to keep

/// all loop-invariant and loop-computable values in a single base register.

void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {

  SmallVector<const SCEV *, 4> Good;

  SmallVector<const SCEV *, 4> Bad;

  DoInitialMatch(S, L, Good, Bad, SE);

  if (!Good.empty()) {

    const SCEV *Sum = SE.getAddExpr(Good);

    if (!Sum->isZero())

      BaseRegs.push_back(Sum);

    HasBaseReg = true;

  }

  if (!Bad.empty()) {

    const SCEV *Sum = SE.getAddExpr(Bad);

    if (!Sum->isZero())

      BaseRegs.push_back(Sum);

    HasBaseReg = true;

  }

  canonicalize(*L);

}


static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {

  return SCEVExprContains(S, [&L](const SCEV *S) {

    return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);

  });

}


/// Check whether or not this formula satisfies the canonical

/// representation.

/// \see Formula::BaseRegs.

bool Formula::isCanonical(const Loop &L) const {

  assert((Scale == 0 || ScaledReg) &&

         "ScaledReg must be non-null if Scale is non-zero");


  if (!ScaledReg)

    return BaseRegs.size() <= 1;


  if (Scale != 1)

    return true;


  if (Scale == 1 && BaseRegs.empty())

    return false;


  if (containsAddRecDependentOnLoop(ScaledReg, L))

    return true;


  // If ScaledReg is not a recurrent expr, or it is but its loop is not current

  // loop, meanwhile BaseRegs contains a recurrent expr reg related with current

  // loop, we want to swap the reg in BaseRegs with ScaledReg.

  return none_of(BaseRegs, [&L](const SCEV *S) {

    return containsAddRecDependentOnLoop(S, L);

  });

}


/// Helper method to morph a formula into its canonical representation.

/// \see Formula::BaseRegs.

/// Every formula having more than one base register, must use the ScaledReg

/// field. Otherwise, we would have to do special cases everywhere in LSR

/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...

/// On the other hand, 1*reg should be canonicalized into reg.

void Formula::canonicalize(const Loop &L) {

  if (isCanonical(L))

    return;


  if (BaseRegs.empty()) {

    // No base reg? Use scale reg with scale = 1 as such.

    assert(ScaledReg && "Expected 1*reg => reg");

    assert(Scale == 1 && "Expected 1*reg => reg");

    BaseRegs.push_back(ScaledReg);

    Scale = 0;

    ScaledReg = nullptr;

    return;

  }


  // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.

  if (!ScaledReg) {

    ScaledReg = BaseRegs.pop_back_val();

    Scale = 1;

  }


  // If ScaledReg is an invariant with respect to L, find the reg from

  // BaseRegs containing the recurrent expr related with Loop L. Swap the

  // reg with ScaledReg.

  if (!containsAddRecDependentOnLoop(ScaledReg, L)) {

    auto I = find_if(BaseRegs, [&L](const SCEV *S) {

      return containsAddRecDependentOnLoop(S, L);

    });

    if (I != BaseRegs.end())

      std::swap(ScaledReg, *I);

  }

  assert(isCanonical(L) && "Failed to canonicalize?");

}


/// Get rid of the scale in the formula.

/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.

/// \return true if it was possible to get rid of the scale, false otherwise.

/// \note After this operation the formula may not be in the canonical form.

bool Formula::unscale() {

  if (Scale != 1)

    return false;

  Scale = 0;

  BaseRegs.push_back(ScaledReg);

  ScaledReg = nullptr;

  return true;

}


bool Formula::hasZeroEnd() const {

  if (UnfoldedOffset || BaseOffset)

    return false;

  if (BaseRegs.size() != 1 || ScaledReg)

    return false;

  return true;

}


bool Formula::countsDownToZero() const {

  if (!hasZeroEnd())

    return false;

  assert(BaseRegs.size() == 1 && "hasZeroEnd should mean one BaseReg");

  const APInt *StepInt;

  if (!match(BaseRegs[0], m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(StepInt))))

    return false;

  return StepInt->isNegative();

}


/// Return the total number of register operands used by this formula. This does

/// not include register uses implied by non-constant addrec strides.

size_t Formula::getNumRegs() const {

  return !!ScaledReg + BaseRegs.size();

}


/// Return the type of this formula, if it has one, or null otherwise. This type

/// is meaningless except for the bit size.

Type *Formula::getType() const {

  return !BaseRegs.empty() ? BaseRegs.front()->getType() :

         ScaledReg ? ScaledReg->getType() :

         BaseGV ? BaseGV->getType() :

         nullptr;

}


/// Delete the given base reg from the BaseRegs list.

void Formula::deleteBaseReg(const SCEV *&S) {

  if (&S != &BaseRegs.back())

    std::swap(S, BaseRegs.back());

  BaseRegs.pop_back();

}


/// Test if this formula references the given register.

bool Formula::referencesReg(const SCEV *S) const {

  return S == ScaledReg || is_contained(BaseRegs, S);

}


/// Test whether this formula uses registers which are used by uses other than

/// the use with the given index.

bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,

                                         const RegUseTracker &RegUses) const {

  if (ScaledReg)

    if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))

      return true;

  for (const SCEV *BaseReg : BaseRegs)

    if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))

      return true;

  return false;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void Formula::print(raw_ostream &OS) const {

  bool First = true;

  if (BaseGV) {

    if (!First) OS << " + "; else First = false;

    BaseGV->printAsOperand(OS, /*PrintType=*/false);

  }

  if (BaseOffset.isNonZero()) {

    if (!First) OS << " + "; else First = false;

    OS << BaseOffset;

  }

  for (const SCEV *BaseReg : BaseRegs) {

    if (!First) OS << " + "; else First = false;

    OS << "reg(" << *BaseReg << ')';

  }

  if (HasBaseReg && BaseRegs.empty()) {

    if (!First) OS << " + "; else First = false;

    OS << "**error: HasBaseReg**";

  } else if (!HasBaseReg && !BaseRegs.empty()) {

    if (!First) OS << " + "; else First = false;

    OS << "**error: !HasBaseReg**";

  }

  if (Scale != 0) {

    if (!First) OS << " + "; else First = false;

    OS << Scale << "*reg(";

    if (ScaledReg)

      OS << *ScaledReg;

    else

      OS << "<unknown>";

    OS << ')';

  }

  if (UnfoldedOffset.isNonZero()) {

    if (!First) OS << " + ";

    OS << "imm(" << UnfoldedOffset << ')';

  }

}


LLVM_DUMP_METHOD void Formula::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Return true if the given addrec can be sign-extended without changing its

/// value.


static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {

  Type *WideTy =

    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);

  return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));

}


/// Return true if the given add can be sign-extended without changing its

/// value.


static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {

  Type *WideTy =

    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);

  return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));

}


/// Return true if the given mul can be sign-extended without changing its

/// value.


static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {

  Type *WideTy =

    IntegerType::get(SE.getContext(),

                     SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());

  return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));

}


/// Return an expression for LHS /s RHS, if it can be determined and if the

/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits

/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that

/// the multiplication may overflow, which is useful when the result will be

/// used in a context where the most significant bits are ignored.


static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,

                                ScalarEvolution &SE,

                                bool IgnoreSignificantBits = false) {

  // Handle the trivial case, which works for any SCEV type.

  if (LHS == RHS)

    return SE.getConstant(LHS->getType(), 1);


  // Handle a few RHS special cases.

  const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);

  if (RC) {

    const APInt &RA = RC->getAPInt();

    // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do

    // some folding.

    if (RA.isAllOnes()) {

      if (LHS->getType()->isPointerTy())

        return nullptr;

      return SE.getMulExpr(LHS, RC);

    }

    // Handle x /s 1 as x.

    if (RA == 1)

      return LHS;

  }


  // Check for a division of a constant by a constant.

  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {

    if (!RC)

      return nullptr;

    const APInt &LA = C->getAPInt();

    const APInt &RA = RC->getAPInt();

    if (LA.srem(RA) != 0)

      return nullptr;

    return SE.getConstant(LA.sdiv(RA));

  }


  // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.

  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {

    if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {

      const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,

                                      IgnoreSignificantBits);

      if (!Step) return nullptr;

      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,

                                       IgnoreSignificantBits);

      if (!Start) return nullptr;

      // FlagNW is independent of the start value, step direction, and is

      // preserved with smaller magnitude steps.

      // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

      return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);

    }

    return nullptr;

  }


  // Distribute the sdiv over add operands, if the add doesn't overflow.

  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {

    if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {

      SmallVector<const SCEV *, 8> Ops;

      for (const SCEV *S : Add->operands()) {

        const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);

        if (!Op) return nullptr;

        Ops.push_back(Op);

      }

      return SE.getAddExpr(Ops);

    }

    return nullptr;

  }


  // Check for a multiply operand that we can pull RHS out of.

  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {

    if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {

      // Handle special case C1*X*Y /s C2*X*Y.

      if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {

        if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {

          const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));

          const SCEVConstant *RC =

              dyn_cast<SCEVConstant>(MulRHS->getOperand(0));

          if (LC && RC) {

            SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));

            SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));

            if (LOps == ROps)

              return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);

          }

        }

      }


      SmallVector<const SCEV *, 4> Ops;

      bool Found = false;

      for (const SCEV *S : Mul->operands()) {

        if (!Found)

          if (const SCEV *Q = getExactSDiv(S, RHS, SE,

                                           IgnoreSignificantBits)) {

            S = Q;

            Found = true;

          }

        Ops.push_back(S);

      }

      return Found ? SE.getMulExpr(Ops) : nullptr;

    }

    return nullptr;

  }


  // Otherwise we don't know.

  return nullptr;

}


/// If S involves the addition of a constant integer value, return that integer

/// value, and mutate S to point to a new SCEV with that value excluded.


static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {

  const APInt *C;

  if (match(S, m_scev_APInt(C))) {

    if (C->getSignificantBits() <= 64) {

      S = SE.getConstant(S->getType(), 0);

      return Immediate::getFixed(C->getSExtValue());

    }

  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(Add->operands());

    Immediate Result = ExtractImmediate(NewOps.front(), SE);

    if (Result.isNonZero())

      S = SE.getAddExpr(NewOps);

    return Result;

  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(AR->operands());

    Immediate Result = ExtractImmediate(NewOps.front(), SE);

    if (Result.isNonZero())

      S = SE.getAddRecExpr(NewOps, AR->getLoop(),

                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

                           SCEV::FlagAnyWrap);

    return Result;

  } else if (EnableVScaleImmediates &&

             match(S, m_scev_Mul(m_scev_APInt(C), m_SCEVVScale()))) {

    S = SE.getConstant(S->getType(), 0);

    return Immediate::getScalable(C->getSExtValue());

  }

  return Immediate::getZero();

}


/// If S involves the addition of a GlobalValue address, return that symbol, and

/// mutate S to point to a new SCEV with that value excluded.


static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {

  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {

    if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {

      S = SE.getConstant(GV->getType(), 0);

      return GV;

    }

  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(Add->operands());

    GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);

    if (Result)

      S = SE.getAddExpr(NewOps);

    return Result;

  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

    SmallVector<const SCEV *, 8> NewOps(AR->operands());

    GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);

    if (Result)

      S = SE.getAddRecExpr(NewOps, AR->getLoop(),

                           // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

                           SCEV::FlagAnyWrap);

    return Result;

  }

  return nullptr;

}


/// Returns true if the specified instruction is using the specified value as an

/// address.


static bool isAddressUse(const TargetTransformInfo &TTI,

                         Instruction *Inst, Value *OperandVal) {

  bool isAddress = isa<LoadInst>(Inst);

  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {

    if (SI->getPointerOperand() == OperandVal)

      isAddress = true;

  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {

    // Addressing modes can also be folded into prefetches and a variety

    // of intrinsics.

    switch (II->getIntrinsicID()) {

    case Intrinsic::memset:

    case Intrinsic::prefetch:

    case Intrinsic::masked_load:

      if (II->getArgOperand(0) == OperandVal)

        isAddress = true;

      break;

    case Intrinsic::masked_store:

      if (II->getArgOperand(1) == OperandVal)

        isAddress = true;

      break;

    case Intrinsic::memmove:

    case Intrinsic::memcpy:

      if (II->getArgOperand(0) == OperandVal ||

          II->getArgOperand(1) == OperandVal)

        isAddress = true;

      break;

    default: {

      MemIntrinsicInfo IntrInfo;

      if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {

        if (IntrInfo.PtrVal == OperandVal)

          isAddress = true;

      }

    }

    }

  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {

    if (RMW->getPointerOperand() == OperandVal)

      isAddress = true;

  } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {

    if (CmpX->getPointerOperand() == OperandVal)

      isAddress = true;

  }

  return isAddress;

}


/// Return the type of the memory being accessed.


static MemAccessTy getAccessType(const TargetTransformInfo &TTI,

                                 Instruction *Inst, Value *OperandVal) {

  MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());


  // First get the type of memory being accessed.

  if (Type *Ty = Inst->getAccessType())

    AccessTy.MemTy = Ty;


  // Then get the pointer address space.

  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {

    AccessTy.AddrSpace = SI->getPointerAddressSpace();

  } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {

    AccessTy.AddrSpace = LI->getPointerAddressSpace();

  } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {

    AccessTy.AddrSpace = RMW->getPointerAddressSpace();

  } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {

    AccessTy.AddrSpace = CmpX->getPointerAddressSpace();

  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {

    switch (II->getIntrinsicID()) {

    case Intrinsic::prefetch:

    case Intrinsic::memset:

      AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();

      AccessTy.MemTy = OperandVal->getType();

      break;

    case Intrinsic::memmove:

    case Intrinsic::memcpy:

      AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();

      AccessTy.MemTy = OperandVal->getType();

      break;

    case Intrinsic::masked_load:

      AccessTy.AddrSpace =

          II->getArgOperand(0)->getType()->getPointerAddressSpace();

      break;

    case Intrinsic::masked_store:

      AccessTy.AddrSpace =

          II->getArgOperand(1)->getType()->getPointerAddressSpace();

      break;

    default: {

      MemIntrinsicInfo IntrInfo;

      if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {

        AccessTy.AddrSpace

          = IntrInfo.PtrVal->getType()->getPointerAddressSpace();

      }


      break;

    }

    }

  }


  return AccessTy;

}


/// Return true if this AddRec is already a phi in its loop.


static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {

  for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {

    if (SE.isSCEVable(PN.getType()) &&

        (SE.getEffectiveSCEVType(PN.getType()) ==

         SE.getEffectiveSCEVType(AR->getType())) &&

        SE.getSCEV(&PN) == AR)

      return true;

  }

  return false;

}


/// Check if expanding this expression is likely to incur significant cost. This

/// is tricky because SCEV doesn't track which expressions are actually computed

/// by the current IR.

///

/// We currently allow expansion of IV increments that involve adds,

/// multiplication by constants, and AddRecs from existing phis.

///

/// TODO: Allow UDivExpr if we can find an existing IV increment that is an

/// obvious multiple of the UDivExpr.


static bool isHighCostExpansion(const SCEV *S,

                                SmallPtrSetImpl<const SCEV*> &Processed,

                                ScalarEvolution &SE) {

  // Zero/One operand expressions

  switch (S->getSCEVType()) {

  case scUnknown:

  case scConstant:

  case scVScale:

    return false;

  case scTruncate:

    return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),

                               Processed, SE);

  case scZeroExtend:

    return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),

                               Processed, SE);

  case scSignExtend:

    return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),

                               Processed, SE);

  default:

    break;

  }


  if (!Processed.insert(S).second)

    return false;


  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    for (const SCEV *S : Add->operands()) {

      if (isHighCostExpansion(S, Processed, SE))

        return true;

    }

    return false;

  }


  const SCEV *Op0, *Op1;

  if (match(S, m_scev_Mul(m_SCEV(Op0), m_SCEV(Op1)))) {

    // Multiplication by a constant is ok

    if (isa<SCEVConstant>(Op0))

      return isHighCostExpansion(Op1, Processed, SE);


    // If we have the value of one operand, check if an existing

    // multiplication already generates this expression.

    if (const auto *U = dyn_cast<SCEVUnknown>(Op1)) {

      Value *UVal = U->getValue();

      for (User *UR : UVal->users()) {

        // If U is a constant, it may be used by a ConstantExpr.

        Instruction *UI = dyn_cast<Instruction>(UR);

        if (UI && UI->getOpcode() == Instruction::Mul &&

            SE.isSCEVable(UI->getType())) {

          return SE.getSCEV(UI) == S;

        }

      }

    }

  }


  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

    if (isExistingPhi(AR, SE))

      return false;

  }


  // Fow now, consider any other type of expression (div/mul/min/max) high cost.

  return true;

}


namespace {


class LSRUse;


} // end anonymous namespace


/// Check if the addressing mode defined by \p F is completely

/// folded in \p LU at isel time.

/// This includes address-mode folding and special icmp tricks.

/// This function returns true if \p LU can accommodate what \p F

/// defines and up to 1 base + 1 scaled + offset.

/// In other words, if \p F has several base registers, this function may

/// still return true. Therefore, users still need to account for

/// additional base registers and/or unfolded offsets to derive an

/// accurate cost model.

static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 const LSRUse &LU, const Formula &F);


// Get the cost of the scaling factor used in F for LU.

static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,

                                            const LSRUse &LU, const Formula &F,

                                            const Loop &L);


namespace {


/// This class is used to measure and compare candidate formulae.

class Cost {

  const Loop *L = nullptr;

  ScalarEvolution *SE = nullptr;

  const TargetTransformInfo *TTI = nullptr;

  TargetTransformInfo::LSRCost C;

  TTI::AddressingModeKind AMK = TTI::AMK_None;


public:

  Cost() = delete;

  Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,

       TTI::AddressingModeKind AMK) :

    L(L), SE(&SE), TTI(&TTI), AMK(AMK) {

    C.Insns = 0;

    C.NumRegs = 0;

    C.AddRecCost = 0;

    C.NumIVMuls = 0;

    C.NumBaseAdds = 0;

    C.ImmCost = 0;

    C.SetupCost = 0;

    C.ScaleCost = 0;

  }


  bool isLess(const Cost &Other) const;


  void Lose();


#ifndef NDEBUG

  // Once any of the metrics loses, they must all remain losers.

  bool isValid() {

    return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds

             | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)

      || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds

           & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);

  }

#endif


  bool isLoser() {

    assert(isValid() && "invalid cost");

    return C.NumRegs == ~0u;

  }


  void RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,

                   const DenseSet<const SCEV *> &VisitedRegs, const LSRUse &LU,

                   bool HardwareLoopProfitable,

                   SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);


  void print(raw_ostream &OS) const;

  void dump() const;


private:

  void RateRegister(const Formula &F, const SCEV *Reg,

                    SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,

                    bool HardwareLoopProfitable);

  void RatePrimaryRegister(const Formula &F, const SCEV *Reg,

                           SmallPtrSetImpl<const SCEV *> &Regs,

                           const LSRUse &LU, bool HardwareLoopProfitable,

                           SmallPtrSetImpl<const SCEV *> *LoserRegs);

};


/// An operand value in an instruction which is to be replaced with some

/// equivalent, possibly strength-reduced, replacement.

struct LSRFixup {

  /// The instruction which will be updated.

  Instruction *UserInst = nullptr;


  /// The operand of the instruction which will be replaced. The operand may be

  /// used more than once; every instance will be replaced.

  Value *OperandValToReplace = nullptr;


  /// If this user is to use the post-incremented value of an induction

  /// variable, this set is non-empty and holds the loops associated with the

  /// induction variable.

  PostIncLoopSet PostIncLoops;


  /// A constant offset to be added to the LSRUse expression.  This allows

  /// multiple fixups to share the same LSRUse with different offsets, for

  /// example in an unrolled loop.

  Immediate Offset = Immediate::getZero();


  LSRFixup() = default;


  bool isUseFullyOutsideLoop(const Loop *L) const;


  void print(raw_ostream &OS) const;

  void dump() const;

};


/// This class holds the state that LSR keeps for each use in IVUsers, as well

/// as uses invented by LSR itself. It includes information about what kinds of

/// things can be folded into the user, information about the user itself, and

/// information about how the use may be satisfied.  TODO: Represent multiple

/// users of the same expression in common?

class LSRUse {

  DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;


public:

  /// An enum for a kind of use, indicating what types of scaled and immediate

  /// operands it might support.

  enum KindType {

    Basic,   ///< A normal use, with no folding.

    Special, ///< A special case of basic, allowing -1 scales.

    Address, ///< An address use; folding according to TargetLowering

    ICmpZero ///< An equality icmp with both operands folded into one.

    // TODO: Add a generic icmp too?

  };


  using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;


  KindType Kind;

  MemAccessTy AccessTy;


  /// The list of operands which are to be replaced.

  SmallVector<LSRFixup, 8> Fixups;


  /// Keep track of the min and max offsets of the fixups.

  Immediate MinOffset = Immediate::getFixedMax();

  Immediate MaxOffset = Immediate::getFixedMin();


  /// This records whether all of the fixups using this LSRUse are outside of

  /// the loop, in which case some special-case heuristics may be used.

  bool AllFixupsOutsideLoop = true;


  /// RigidFormula is set to true to guarantee that this use will be associated

  /// with a single formula--the one that initially matched. Some SCEV

  /// expressions cannot be expanded. This allows LSR to consider the registers

  /// used by those expressions without the need to expand them later after

  /// changing the formula.

  bool RigidFormula = false;


  /// This records the widest use type for any fixup using this

  /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max

  /// fixup widths to be equivalent, because the narrower one may be relying on

  /// the implicit truncation to truncate away bogus bits.

  Type *WidestFixupType = nullptr;


  /// A list of ways to build a value that can satisfy this user.  After the

  /// list is populated, one of these is selected heuristically and used to

  /// formulate a replacement for OperandValToReplace in UserInst.

  SmallVector<Formula, 12> Formulae;


  /// The set of register candidates used by all formulae in this LSRUse.

  SmallPtrSet<const SCEV *, 4> Regs;


  LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}


  LSRFixup &getNewFixup() {

    Fixups.push_back(LSRFixup());

    return Fixups.back();

  }


  void pushFixup(LSRFixup &f) {

    Fixups.push_back(f);

    if (Immediate::isKnownGT(f.Offset, MaxOffset))

      MaxOffset = f.Offset;

    if (Immediate::isKnownLT(f.Offset, MinOffset))

      MinOffset = f.Offset;

  }


  bool HasFormulaWithSameRegs(const Formula &F) const;

  float getNotSelectedProbability(const SCEV *Reg) const;

  bool InsertFormula(const Formula &F, const Loop &L);

  void DeleteFormula(Formula &F);

  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);


  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 GlobalValue *BaseGV, Immediate BaseOffset,

                                 bool HasBaseReg, int64_t Scale,

                                 Instruction *Fixup = nullptr);


static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {

  if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))

    return 1;

  if (Depth == 0)

    return 0;

  if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))

    return getSetupCost(S->getStart(), Depth - 1);

  if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))

    return getSetupCost(S->getOperand(), Depth - 1);

  if (auto S = dyn_cast<SCEVNAryExpr>(Reg))

    return std::accumulate(S->operands().begin(), S->operands().end(), 0,

                           [&](unsigned i, const SCEV *Reg) {

                             return i + getSetupCost(Reg, Depth - 1);

                           });

  if (auto S = dyn_cast<SCEVUDivExpr>(Reg))

    return getSetupCost(S->getLHS(), Depth - 1) +

           getSetupCost(S->getRHS(), Depth - 1);

  return 0;

}


/// Tally up interesting quantities from the given register.

void Cost::RateRegister(const Formula &F, const SCEV *Reg,

                        SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,

                        bool HardwareLoopProfitable) {

  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {

    // If this is an addrec for another loop, it should be an invariant

    // with respect to L since L is the innermost loop (at least

    // for now LSR only handles innermost loops).

    if (AR->getLoop() != L) {

      // If the AddRec exists, consider it's register free and leave it alone.

      if (isExistingPhi(AR, *SE) && !(AMK & TTI::AMK_PostIndexed))

        return;


      // It is bad to allow LSR for current loop to add induction variables

      // for its sibling loops.

      if (!AR->getLoop()->contains(L)) {

        Lose();

        return;

      }


      // Otherwise, it will be an invariant with respect to Loop L.

      ++C.NumRegs;

      return;

    }


    unsigned LoopCost = 1;

    if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||

        TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {

      const SCEV *Start;

      const SCEVConstant *Step;

      if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))

        // If the step size matches the base offset, we could use pre-indexed

        // addressing.

        if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&

             Step->getAPInt() == F.BaseOffset.getFixedValue()) ||

            ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&

             SE->isLoopInvariant(Start, L)))

          LoopCost = 0;

    }

    // If the loop counts down to zero and we'll be using a hardware loop then

    // the addrec will be combined into the hardware loop instruction.

    if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&

        HardwareLoopProfitable)

      LoopCost = 0;

    C.AddRecCost += LoopCost;


    // Add the step value register, if it needs one.

    // TODO: The non-affine case isn't precisely modeled here.

    if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {

      if (!Regs.count(AR->getOperand(1))) {

        RateRegister(F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);

        if (isLoser())

          return;

      }

    }

  }

  ++C.NumRegs;


  // Rough heuristic; favor registers which don't require extra setup

  // instructions in the preheader.

  C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);

  // Ensure we don't, even with the recusion limit, produce invalid costs.

  C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);


  C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&

               SE->hasComputableLoopEvolution(Reg, L);

}


/// Record this register in the set. If we haven't seen it before, rate

/// it. Optional LoserRegs provides a way to declare any formula that refers to

/// one of those regs an instant loser.

void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,

                               SmallPtrSetImpl<const SCEV *> &Regs,

                               const LSRUse &LU, bool HardwareLoopProfitable,

                               SmallPtrSetImpl<const SCEV *> *LoserRegs) {

  if (LoserRegs && LoserRegs->count(Reg)) {

    Lose();

    return;

  }

  if (Regs.insert(Reg).second) {

    RateRegister(F, Reg, Regs, LU, HardwareLoopProfitable);

    if (LoserRegs && isLoser())

      LoserRegs->insert(Reg);

  }

}


void Cost::RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,

                       const DenseSet<const SCEV *> &VisitedRegs,

                       const LSRUse &LU, bool HardwareLoopProfitable,

                       SmallPtrSetImpl<const SCEV *> *LoserRegs) {

  if (isLoser())

    return;

  assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");

  // Tally up the registers.

  unsigned PrevAddRecCost = C.AddRecCost;

  unsigned PrevNumRegs = C.NumRegs;

  unsigned PrevNumBaseAdds = C.NumBaseAdds;

  if (const SCEV *ScaledReg = F.ScaledReg) {

    if (VisitedRegs.count(ScaledReg)) {

      Lose();

      return;

    }

    RatePrimaryRegister(F, ScaledReg, Regs, LU, HardwareLoopProfitable,

                        LoserRegs);

    if (isLoser())

      return;

  }

  for (const SCEV *BaseReg : F.BaseRegs) {

    if (VisitedRegs.count(BaseReg)) {

      Lose();

      return;

    }

    RatePrimaryRegister(F, BaseReg, Regs, LU, HardwareLoopProfitable,

                        LoserRegs);

    if (isLoser())

      return;

  }


  // Determine how many (unfolded) adds we'll need inside the loop.

  size_t NumBaseParts = F.getNumRegs();

  if (NumBaseParts > 1)

    // Do not count the base and a possible second register if the target

    // allows to fold 2 registers.

    C.NumBaseAdds +=

        NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));

  C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());


  // Accumulate non-free scaling amounts.

  C.ScaleCost += getScalingFactorCost(*TTI, LU, F, *L).getValue();


  // Tally up the non-zero immediates.

  for (const LSRFixup &Fixup : LU.Fixups) {

    if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {

      Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset);

      if (F.BaseGV)

        C.ImmCost += 64; // Handle symbolic values conservatively.

                         // TODO: This should probably be the pointer size.

      else if (Offset.isNonZero())

        C.ImmCost +=

            APInt(64, Offset.getKnownMinValue(), true).getSignificantBits();


      // Check with target if this offset with this instruction is

      // specifically not supported.

      if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&

          !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,

                                Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))

        C.NumBaseAdds++;

    } else {

      // Incompatible immediate type, increase cost to avoid using

      C.ImmCost += 2048;

    }

  }


  // If we don't count instruction cost exit here.

  if (!InsnsCost) {

    assert(isValid() && "invalid cost");

    return;

  }


  // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as

  // additional instruction (at least fill).

  // TODO: Need distinguish register class?

  unsigned TTIRegNum = TTI->getNumberOfRegisters(

                       TTI->getRegisterClassForType(false, F.getType())) - 1;

  if (C.NumRegs > TTIRegNum) {

    // Cost already exceeded TTIRegNum, then only newly added register can add

    // new instructions.

    if (PrevNumRegs > TTIRegNum)

      C.Insns += (C.NumRegs - PrevNumRegs);

    else

      C.Insns += (C.NumRegs - TTIRegNum);

  }


  // If ICmpZero formula ends with not 0, it could not be replaced by

  // just add or sub. We'll need to compare final result of AddRec.

  // That means we'll need an additional instruction. But if the target can

  // macro-fuse a compare with a branch, don't count this extra instruction.

  // For -10 + {0, +, 1}:

  // i = i + 1;

  // cmp i, 10

  //

  // For {-10, +, 1}:

  // i = i + 1;

  if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&

      !TTI->canMacroFuseCmp())

    C.Insns++;

  // Each new AddRec adds 1 instruction to calculation.

  C.Insns += (C.AddRecCost - PrevAddRecCost);


  // BaseAdds adds instructions for unfolded registers.

  if (LU.Kind != LSRUse::ICmpZero)

    C.Insns += C.NumBaseAdds - PrevNumBaseAdds;

  assert(isValid() && "invalid cost");

}


/// Set this cost to a losing value.

void Cost::Lose() {

  C.Insns = std::numeric_limits<unsigned>::max();

  C.NumRegs = std::numeric_limits<unsigned>::max();

  C.AddRecCost = std::numeric_limits<unsigned>::max();

  C.NumIVMuls = std::numeric_limits<unsigned>::max();

  C.NumBaseAdds = std::numeric_limits<unsigned>::max();

  C.ImmCost = std::numeric_limits<unsigned>::max();

  C.SetupCost = std::numeric_limits<unsigned>::max();

  C.ScaleCost = std::numeric_limits<unsigned>::max();

}


/// Choose the lower cost.

bool Cost::isLess(const Cost &Other) const {

  if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&

      C.Insns != Other.C.Insns)

    return C.Insns < Other.C.Insns;

  return TTI->isLSRCostLess(C, Other.C);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void Cost::print(raw_ostream &OS) const {

  if (InsnsCost)

    OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");

  OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");

  if (C.AddRecCost != 0)

    OS << ", with addrec cost " << C.AddRecCost;

  if (C.NumIVMuls != 0)

    OS << ", plus " << C.NumIVMuls << " IV mul"

       << (C.NumIVMuls == 1 ? "" : "s");

  if (C.NumBaseAdds != 0)

    OS << ", plus " << C.NumBaseAdds << " base add"

       << (C.NumBaseAdds == 1 ? "" : "s");

  if (C.ScaleCost != 0)

    OS << ", plus " << C.ScaleCost << " scale cost";

  if (C.ImmCost != 0)

    OS << ", plus " << C.ImmCost << " imm cost";

  if (C.SetupCost != 0)

    OS << ", plus " << C.SetupCost << " setup cost";

}


LLVM_DUMP_METHOD void Cost::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Test whether this fixup always uses its value outside of the given loop.

bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {

  // PHI nodes use their value in their incoming blocks.

  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {

    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

      if (PN->getIncomingValue(i) == OperandValToReplace &&

          L->contains(PN->getIncomingBlock(i)))

        return false;

    return true;

  }


  return !L->contains(UserInst);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void LSRFixup::print(raw_ostream &OS) const {

  OS << "UserInst=";

  // Store is common and interesting enough to be worth special-casing.

  if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {

    OS << "store ";

    Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);

  } else if (UserInst->getType()->isVoidTy())

    OS << UserInst->getOpcodeName();

  else

    UserInst->printAsOperand(OS, /*PrintType=*/false);


  OS << ", OperandValToReplace=";

  OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);


  for (const Loop *PIL : PostIncLoops) {

    OS << ", PostIncLoop=";

    PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);

  }


  if (Offset.isNonZero())

    OS << ", Offset=" << Offset;

}


LLVM_DUMP_METHOD void LSRFixup::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Test whether this use as a formula which has the same registers as the given

/// formula.

bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {

  SmallVector<const SCEV *, 4> Key = F.BaseRegs;

  if (F.ScaledReg) Key.push_back(F.ScaledReg);

  // Unstable sort by host order ok, because this is only used for uniquifying.

  llvm::sort(Key);

  return Uniquifier.count(Key);

}


/// The function returns a probability of selecting formula without Reg.

float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {

  unsigned FNum = 0;

  for (const Formula &F : Formulae)

    if (F.referencesReg(Reg))

      FNum++;

  return ((float)(Formulae.size() - FNum)) / Formulae.size();

}


/// If the given formula has not yet been inserted, add it to the list, and

/// return true. Return false otherwise.  The formula must be in canonical form.

bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {

  assert(F.isCanonical(L) && "Invalid canonical representation");


  if (!Formulae.empty() && RigidFormula)

    return false;


  SmallVector<const SCEV *, 4> Key = F.BaseRegs;

  if (F.ScaledReg) Key.push_back(F.ScaledReg);

  // Unstable sort by host order ok, because this is only used for uniquifying.

  llvm::sort(Key);


  if (!Uniquifier.insert(Key).second)

    return false;


  // Using a register to hold the value of 0 is not profitable.

  assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&

         "Zero allocated in a scaled register!");

#ifndef NDEBUG

  for (const SCEV *BaseReg : F.BaseRegs)

    assert(!BaseReg->isZero() && "Zero allocated in a base register!");

#endif


  // Add the formula to the list.

  Formulae.push_back(F);


  // Record registers now being used by this use.

  Regs.insert_range(F.BaseRegs);

  if (F.ScaledReg)

    Regs.insert(F.ScaledReg);


  return true;

}


/// Remove the given formula from this use's list.

void LSRUse::DeleteFormula(Formula &F) {

  if (&F != &Formulae.back())

    std::swap(F, Formulae.back());

  Formulae.pop_back();

}


/// Recompute the Regs field, and update RegUses.

void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {

  // Now that we've filtered out some formulae, recompute the Regs set.

  SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);

  Regs.clear();

  for (const Formula &F : Formulae) {

    if (F.ScaledReg) Regs.insert(F.ScaledReg);

    Regs.insert_range(F.BaseRegs);

  }


  // Update the RegTracker.

  for (const SCEV *S : OldRegs)

    if (!Regs.count(S))

      RegUses.dropRegister(S, LUIdx);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void LSRUse::print(raw_ostream &OS) const {

  OS << "LSR Use: Kind=";

  switch (Kind) {

  case Basic:    OS << "Basic"; break;

  case Special:  OS << "Special"; break;

  case ICmpZero: OS << "ICmpZero"; break;

  case Address:

    OS << "Address of ";

    if (AccessTy.MemTy->isPointerTy())

      OS << "pointer"; // the full pointer type could be really verbose

    else {

      OS << *AccessTy.MemTy;

    }


    OS << " in addrspace(" << AccessTy.AddrSpace << ')';

  }


  OS << ", Offsets={";

  bool NeedComma = false;

  for (const LSRFixup &Fixup : Fixups) {

    if (NeedComma) OS << ',';

    OS << Fixup.Offset;

    NeedComma = true;

  }

  OS << '}';


  if (AllFixupsOutsideLoop)

    OS << ", all-fixups-outside-loop";


  if (WidestFixupType)

    OS << ", widest fixup type: " << *WidestFixupType;

}


LLVM_DUMP_METHOD void LSRUse::dump() const {

  print(errs()); errs() << '\n';

}

#endif


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 GlobalValue *BaseGV, Immediate BaseOffset,

                                 bool HasBaseReg, int64_t Scale,

                                 Instruction *Fixup /* = nullptr */) {

  switch (Kind) {

  case LSRUse::Address: {

    int64_t FixedOffset =

        BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();

    int64_t ScalableOffset =

        BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;

    return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,

                                     HasBaseReg, Scale, AccessTy.AddrSpace,

                                     Fixup, ScalableOffset);

  }

  case LSRUse::ICmpZero:

    // There's not even a target hook for querying whether it would be legal to

    // fold a GV into an ICmp.

    if (BaseGV)

      return false;


    // ICmp only has two operands; don't allow more than two non-trivial parts.

    if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())

      return false;


    // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by

    // putting the scaled register in the other operand of the icmp.

    if (Scale != 0 && Scale != -1)

      return false;


    // If we have low-level target information, ask the target if it can fold an

    // integer immediate on an icmp.

    if (BaseOffset.isNonZero()) {

      // We don't have an interface to query whether the target supports

      // icmpzero against scalable quantities yet.

      if (BaseOffset.isScalable())

        return false;


      // We have one of:

      // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset

      // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset

      // Offs is the ICmp immediate.

      if (Scale == 0)

        // The cast does the right thing with

        // std::numeric_limits<int64_t>::min().

        BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());

      return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());

    }


    // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg

    return true;


  case LSRUse::Basic:

    // Only handle single-register values.

    return !BaseGV && Scale == 0 && BaseOffset.isZero();


  case LSRUse::Special:

    // Special case Basic to handle -1 scales.

    return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();

  }


  llvm_unreachable("Invalid LSRUse Kind!");

}


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 Immediate MinOffset, Immediate MaxOffset,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 GlobalValue *BaseGV, Immediate BaseOffset,

                                 bool HasBaseReg, int64_t Scale) {

  if (BaseOffset.isNonZero() &&

      (BaseOffset.isScalable() != MinOffset.isScalable() ||

       BaseOffset.isScalable() != MaxOffset.isScalable()))

    return false;

  // Check for overflow.

  int64_t Base = BaseOffset.getKnownMinValue();

  int64_t Min = MinOffset.getKnownMinValue();

  int64_t Max = MaxOffset.getKnownMinValue();

  if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > 0))

    return false;

  MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());

  if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > 0))

    return false;

  MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());


  return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,

                              HasBaseReg, Scale) &&

         isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,

                              HasBaseReg, Scale);

}


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 Immediate MinOffset, Immediate MaxOffset,

                                 LSRUse::KindType Kind, MemAccessTy AccessTy,

                                 const Formula &F, const Loop &L) {

  // For the purpose of isAMCompletelyFolded either having a canonical formula

  // or a scale not equal to zero is correct.

  // Problems may arise from non canonical formulae having a scale == 0.

  // Strictly speaking it would best to just rely on canonical formulae.

  // However, when we generate the scaled formulae, we first check that the

  // scaling factor is profitable before computing the actual ScaledReg for

  // compile time sake.

  assert((F.isCanonical(L) || F.Scale != 0));

  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,

                              F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);

}


/// Test whether we know how to expand the current formula.


static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,

                       Immediate MaxOffset, LSRUse::KindType Kind,

                       MemAccessTy AccessTy, GlobalValue *BaseGV,

                       Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {

  // We know how to expand completely foldable formulae.

  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,

                              BaseOffset, HasBaseReg, Scale) ||

         // Or formulae that use a base register produced by a sum of base

         // registers.

         (Scale == 1 &&

          isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,

                               BaseGV, BaseOffset, true, 0));

}


static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,

                       Immediate MaxOffset, LSRUse::KindType Kind,

                       MemAccessTy AccessTy, const Formula &F) {

  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,

                    F.BaseOffset, F.HasBaseReg, F.Scale);

}


static bool isLegalAddImmediate(const TargetTransformInfo &TTI,

                                Immediate Offset) {

  if (Offset.isScalable())

    return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());


  return TTI.isLegalAddImmediate(Offset.getFixedValue());

}


static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,

                                 const LSRUse &LU, const Formula &F) {

  // Target may want to look at the user instructions.

  if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {

    for (const LSRFixup &Fixup : LU.Fixups)

      if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,

                                (F.BaseOffset + Fixup.Offset), F.HasBaseReg,

                                F.Scale, Fixup.UserInst))

        return false;

    return true;

  }


  return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,

                              LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,

                              F.Scale);

}


static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,

                                            const LSRUse &LU, const Formula &F,

                                            const Loop &L) {

  if (!F.Scale)

    return 0;


  // If the use is not completely folded in that instruction, we will have to

  // pay an extra cost only for scale != 1.

  if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,

                            LU.AccessTy, F, L))

    return F.Scale != 1;


  switch (LU.Kind) {

  case LSRUse::Address: {

    // Check the scaling factor cost with both the min and max offsets.

    int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;

    if (F.BaseOffset.isScalable()) {

      ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();

      ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();

    } else {

      FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();

      FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();

    }

    InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(

        LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin),

        F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);

    InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(

        LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax),

        F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);


    assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&

           "Legal addressing mode has an illegal cost!");

    return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);

  }

  case LSRUse::ICmpZero:

  case LSRUse::Basic:

  case LSRUse::Special:

    // The use is completely folded, i.e., everything is folded into the

    // instruction.

    return 0;

  }


  llvm_unreachable("Invalid LSRUse Kind!");

}


static bool isAlwaysFoldable(const TargetTransformInfo &TTI,

                             LSRUse::KindType Kind, MemAccessTy AccessTy,

                             GlobalValue *BaseGV, Immediate BaseOffset,

                             bool HasBaseReg) {

  // Fast-path: zero is always foldable.

  if (BaseOffset.isZero() && !BaseGV)

    return true;


  // Conservatively, create an address with an immediate and a

  // base and a scale.

  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;


  // Canonicalize a scale of 1 to a base register if the formula doesn't

  // already have a base register.

  if (!HasBaseReg && Scale == 1) {

    Scale = 0;

    HasBaseReg = true;

  }


  // FIXME: Try with + without a scale? Maybe based on TTI?

  // I think basereg + scaledreg + immediateoffset isn't a good 'conservative'

  // default for many architectures, not just AArch64 SVE. More investigation

  // needed later to determine if this should be used more widely than just

  // on scalable types.

  if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&

      AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale)

    Scale = 0;


  return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,

                              HasBaseReg, Scale);

}


static bool isAlwaysFoldable(const TargetTransformInfo &TTI,

                             ScalarEvolution &SE, Immediate MinOffset,

                             Immediate MaxOffset, LSRUse::KindType Kind,

                             MemAccessTy AccessTy, const SCEV *S,

                             bool HasBaseReg) {

  // Fast-path: zero is always foldable.

  if (S->isZero()) return true;


  // Conservatively, create an address with an immediate and a

  // base and a scale.

  Immediate BaseOffset = ExtractImmediate(S, SE);

  GlobalValue *BaseGV = ExtractSymbol(S, SE);


  // If there's anything else involved, it's not foldable.

  if (!S->isZero()) return false;


  // Fast-path: zero is always foldable.

  if (BaseOffset.isZero() && !BaseGV)

    return true;


  if (BaseOffset.isScalable())

    return false;


  // Conservatively, create an address with an immediate and a

  // base and a scale.

  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;


  return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,

                              BaseOffset, HasBaseReg, Scale);

}


namespace {


/// An individual increment in a Chain of IV increments.  Relate an IV user to

/// an expression that computes the IV it uses from the IV used by the previous

/// link in the Chain.

///

/// For the head of a chain, IncExpr holds the absolute SCEV expression for the

/// original IVOperand. The head of the chain's IVOperand is only valid during

/// chain collection, before LSR replaces IV users. During chain generation,

/// IncExpr can be used to find the new IVOperand that computes the same

/// expression.

struct IVInc {

  Instruction *UserInst;

  Value* IVOperand;

  const SCEV *IncExpr;


  IVInc(Instruction *U, Value *O, const SCEV *E)

      : UserInst(U), IVOperand(O), IncExpr(E) {}

};


// The list of IV increments in program order.  We typically add the head of a

// chain without finding subsequent links.

struct IVChain {

  SmallVector<IVInc, 1> Incs;

  const SCEV *ExprBase = nullptr;


  IVChain() = default;

  IVChain(const IVInc &Head, const SCEV *Base)

      : Incs(1, Head), ExprBase(Base) {}


  using const_iterator = SmallVectorImpl<IVInc>::const_iterator;


  // Return the first increment in the chain.

  const_iterator begin() const {

    assert(!Incs.empty());

    return std::next(Incs.begin());

  }

  const_iterator end() const {

    return Incs.end();

  }


  // Returns true if this chain contains any increments.

  bool hasIncs() const { return Incs.size() >= 2; }


  // Add an IVInc to the end of this chain.

  void add(const IVInc &X) { Incs.push_back(X); }


  // Returns the last UserInst in the chain.

  Instruction *tailUserInst() const { return Incs.back().UserInst; }


  // Returns true if IncExpr can be profitably added to this chain.

  bool isProfitableIncrement(const SCEV *OperExpr,

                             const SCEV *IncExpr,

                             ScalarEvolution&);

};


/// Helper for CollectChains to track multiple IV increment uses.  Distinguish

/// between FarUsers that definitely cross IV increments and NearUsers that may

/// be used between IV increments.

struct ChainUsers {

  SmallPtrSet<Instruction*, 4> FarUsers;

  SmallPtrSet<Instruction*, 4> NearUsers;

};


/// This class holds state for the main loop strength reduction logic.

class LSRInstance {

  IVUsers &IU;

  ScalarEvolution &SE;

  DominatorTree &DT;

  LoopInfo &LI;

  AssumptionCache &AC;

  TargetLibraryInfo &TLI;

  const TargetTransformInfo &TTI;

  Loop *const L;

  MemorySSAUpdater *MSSAU;

  TTI::AddressingModeKind AMK;

  mutable SCEVExpander Rewriter;

  bool Changed = false;

  bool HardwareLoopProfitable = false;


  /// This is the insert position that the current loop's induction variable

  /// increment should be placed. In simple loops, this is the latch block's

  /// terminator. But in more complicated cases, this is a position which will

  /// dominate all the in-loop post-increment users.

  Instruction *IVIncInsertPos = nullptr;


  /// Interesting factors between use strides.

  ///

  /// We explicitly use a SetVector which contains a SmallSet, instead of the

  /// default, a SmallDenseSet, because we need to use the full range of

  /// int64_ts, and there's currently no good way of doing that with

  /// SmallDenseSet.

  SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;


  /// The cost of the current SCEV, the best solution by LSR will be dropped if

  /// the solution is not profitable.

  Cost BaselineCost;


  /// Interesting use types, to facilitate truncation reuse.

  SmallSetVector<Type *, 4> Types;


  /// The list of interesting uses.

  mutable SmallVector<LSRUse, 16> Uses;


  /// Track which uses use which register candidates.

  RegUseTracker RegUses;


  // Limit the number of chains to avoid quadratic behavior. We don't expect to

  // have more than a few IV increment chains in a loop. Missing a Chain falls

  // back to normal LSR behavior for those uses.

  static const unsigned MaxChains = 8;


  /// IV users can form a chain of IV increments.

  SmallVector<IVChain, MaxChains> IVChainVec;


  /// IV users that belong to profitable IVChains.

  SmallPtrSet<Use*, MaxChains> IVIncSet;


  /// Induction variables that were generated and inserted by the SCEV Expander.

  SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;


  // Inserting instructions in the loop and using them as PHI's input could

  // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the

  // corresponding incoming block is not loop exiting). So collect all such

  // instructions to form LCSSA for them later.

  SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;


  void OptimizeShadowIV();

  bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);

  ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);

  void OptimizeLoopTermCond();


  void ChainInstruction(Instruction *UserInst, Instruction *IVOper,

                        SmallVectorImpl<ChainUsers> &ChainUsersVec);

  void FinalizeChain(IVChain &Chain);

  void CollectChains();

  void GenerateIVChain(const IVChain &Chain,

                       SmallVectorImpl<WeakTrackingVH> &DeadInsts);


  void CollectInterestingTypesAndFactors();

  void CollectFixupsAndInitialFormulae();


  // Support for sharing of LSRUses between LSRFixups.

  using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;

  UseMapTy UseMap;


  bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,

                          LSRUse::KindType Kind, MemAccessTy AccessTy);


  std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,

                                      MemAccessTy AccessTy);


  void DeleteUse(LSRUse &LU, size_t LUIdx);


  LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);


  void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);

  void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);

  void CountRegisters(const Formula &F, size_t LUIdx);

  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);


  void CollectLoopInvariantFixupsAndFormulae();


  void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,

                              unsigned Depth = 0);


  void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,

                                  const Formula &Base, unsigned Depth,

                                  size_t Idx, bool IsScaledReg = false);

  void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,

                                   const Formula &Base, size_t Idx,

                                   bool IsScaledReg = false);

  void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,

                                   const Formula &Base,

                                   const SmallVectorImpl<Immediate> &Worklist,

                                   size_t Idx, bool IsScaledReg = false);

  void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);

  void GenerateCrossUseConstantOffsets();

  void GenerateAllReuseFormulae();


  void FilterOutUndesirableDedicatedRegisters();


  size_t EstimateSearchSpaceComplexity() const;

  void NarrowSearchSpaceByDetectingSupersets();

  void NarrowSearchSpaceByCollapsingUnrolledCode();

  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

  void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

  void NarrowSearchSpaceByFilterPostInc();

  void NarrowSearchSpaceByDeletingCostlyFormulas();

  void NarrowSearchSpaceByPickingWinnerRegs();

  void NarrowSearchSpaceUsingHeuristics();


  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

                    Cost &SolutionCost,

                    SmallVectorImpl<const Formula *> &Workspace,

                    const Cost &CurCost,

                    const SmallPtrSet<const SCEV *, 16> &CurRegs,

                    DenseSet<const SCEV *> &VisitedRegs) const;

  void Solve(SmallVectorImpl<const Formula *> &Solution) const;


  BasicBlock::iterator

  HoistInsertPosition(BasicBlock::iterator IP,

                      const SmallVectorImpl<Instruction *> &Inputs) const;

  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,

                                                     const LSRFixup &LF,

                                                     const LSRUse &LU) const;


  Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,

                BasicBlock::iterator IP,

                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;

  void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,

                     const Formula &F,

                     SmallVectorImpl<WeakTrackingVH> &DeadInsts);

  void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,

               SmallVectorImpl<WeakTrackingVH> &DeadInsts);

  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);


public:

  LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,

              LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,

              TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);


  bool getChanged() const { return Changed; }

  const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {

    return ScalarEvolutionIVs;

  }


  void print_factors_and_types(raw_ostream &OS) const;

  void print_fixups(raw_ostream &OS) const;

  void print_uses(raw_ostream &OS) const;

  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


/// If IV is used in a int-to-float cast inside the loop then try to eliminate

/// the cast operation.

void LSRInstance::OptimizeShadowIV() {

  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);

  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))

    return;


  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();

       UI != E; /* empty */) {

    IVUsers::const_iterator CandidateUI = UI;

    ++UI;

    Instruction *ShadowUse = CandidateUI->getUser();

    Type *DestTy = nullptr;

    bool IsSigned = false;


    /* If shadow use is a int->float cast then insert a second IV

       to eliminate this cast.


         for (unsigned i = 0; i < n; ++i)

           foo((double)i);


       is transformed into


         double d = 0.0;

         for (unsigned i = 0; i < n; ++i, ++d)

           foo(d);

    */

    if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {

      IsSigned = false;

      DestTy = UCast->getDestTy();

    }

    else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {

      IsSigned = true;

      DestTy = SCast->getDestTy();

    }

    if (!DestTy) continue;


    // If target does not support DestTy natively then do not apply

    // this transformation.

    if (!TTI.isTypeLegal(DestTy)) continue;


    PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));

    if (!PH) continue;

    if (PH->getNumIncomingValues() != 2) continue;


    // If the calculation in integers overflows, the result in FP type will

    // differ. So we only can do this transformation if we are guaranteed to not

    // deal with overflowing values

    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));

    if (!AR) continue;

    if (IsSigned && !AR->hasNoSignedWrap()) continue;

    if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;


    Type *SrcTy = PH->getType();

    int Mantissa = DestTy->getFPMantissaWidth();

    if (Mantissa == -1) continue;

    if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)

      continue;


    unsigned Entry, Latch;

    if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {

      Entry = 0;

      Latch = 1;

    } else {

      Entry = 1;

      Latch = 0;

    }


    ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));

    if (!Init) continue;

    Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?

                                        (double)Init->getSExtValue() :

                                        (double)Init->getZExtValue());


    BinaryOperator *Incr =

      dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));

    if (!Incr) continue;

    if (Incr->getOpcode() != Instruction::Add

        && Incr->getOpcode() != Instruction::Sub)

      continue;


    /* Initialize new IV, double d = 0.0 in above example. */

    ConstantInt *C = nullptr;

    if (Incr->getOperand(0) == PH)

      C = dyn_cast<ConstantInt>(Incr->getOperand(1));

    else if (Incr->getOperand(1) == PH)

      C = dyn_cast<ConstantInt>(Incr->getOperand(0));

    else

      continue;


    if (!C) continue;


    // Ignore negative constants, as the code below doesn't handle them

    // correctly. TODO: Remove this restriction.

    if (!C->getValue().isStrictlyPositive())

      continue;


    /* Add new PHINode. */

    PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH->getIterator());

    NewPH->setDebugLoc(PH->getDebugLoc());


    /* create new increment. '++d' in above example. */

    Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());

    BinaryOperator *NewIncr = BinaryOperator::Create(

        Incr->getOpcode() == Instruction::Add ? Instruction::FAdd

                                              : Instruction::FSub,

        NewPH, CFP, "IV.S.next.", Incr->getIterator());

    NewIncr->setDebugLoc(Incr->getDebugLoc());


    NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));

    NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));


    /* Remove cast operation */

    ShadowUse->replaceAllUsesWith(NewPH);

    ShadowUse->eraseFromParent();

    Changed = true;

    break;

  }

}


/// If Cond has an operand that is an expression of an IV, set the IV user and

/// stride information and return true, otherwise return false.

bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {

  for (IVStrideUse &U : IU)

    if (U.getUser() == Cond) {

      // NOTE: we could handle setcc instructions with multiple uses here, but

      // InstCombine does it as well for simple uses, it's not clear that it

      // occurs enough in real life to handle.

      CondUse = &U;

      return true;

    }

  return false;

}


/// Rewrite the loop's terminating condition if it uses a max computation.

///

/// This is a narrow solution to a specific, but acute, problem. For loops

/// like this:

///

///   i = 0;

///   do {

///     p[i] = 0.0;

///   } while (++i < n);

///

/// the trip count isn't just 'n', because 'n' might not be positive. And

/// unfortunately this can come up even for loops where the user didn't use

/// a C do-while loop. For example, seemingly well-behaved top-test loops

/// will commonly be lowered like this:

///

///   if (n > 0) {

///     i = 0;

///     do {

///       p[i] = 0.0;

///     } while (++i < n);

///   }

///

/// and then it's possible for subsequent optimization to obscure the if

/// test in such a way that indvars can't find it.

///

/// When indvars can't find the if test in loops like this, it creates a

/// max expression, which allows it to give the loop a canonical

/// induction variable:

///

///   i = 0;

///   max = n < 1 ? 1 : n;

///   do {

///     p[i] = 0.0;

///   } while (++i != max);

///

/// Canonical induction variables are necessary because the loop passes

/// are designed around them. The most obvious example of this is the

/// LoopInfo analysis, which doesn't remember trip count values. It

/// expects to be able to rediscover the trip count each time it is

/// needed, and it does this using a simple analysis that only succeeds if

/// the loop has a canonical induction variable.

///

/// However, when it comes time to generate code, the maximum operation

/// can be quite costly, especially if it's inside of an outer loop.

///

/// This function solves this problem by detecting this type of loop and

/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting

/// the instructions for the maximum computation.

ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {

  // Check that the loop matches the pattern we're looking for.

  if (Cond->getPredicate() != CmpInst::ICMP_EQ &&

      Cond->getPredicate() != CmpInst::ICMP_NE)

    return Cond;


  SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));

  if (!Sel || !Sel->hasOneUse()) return Cond;


  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);

  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))

    return Cond;

  const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);


  // Add one to the backedge-taken count to get the trip count.

  const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);

  if (IterationCount != SE.getSCEV(Sel)) return Cond;


  // Check for a max calculation that matches the pattern. There's no check

  // for ICMP_ULE here because the comparison would be with zero, which

  // isn't interesting.

  CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;

  const SCEVNAryExpr *Max = nullptr;

  if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {

    Pred = ICmpInst::ICMP_SLE;

    Max = S;

  } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {

    Pred = ICmpInst::ICMP_SLT;

    Max = S;

  } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {

    Pred = ICmpInst::ICMP_ULT;

    Max = U;

  } else {

    // No match; bail.

    return Cond;

  }


  // To handle a max with more than two operands, this optimization would

  // require additional checking and setup.

  if (Max->getNumOperands() != 2)

    return Cond;


  const SCEV *MaxLHS = Max->getOperand(0);

  const SCEV *MaxRHS = Max->getOperand(1);


  // ScalarEvolution canonicalizes constants to the left. For < and >, look

  // for a comparison with 1. For <= and >=, a comparison with zero.

  if (!MaxLHS ||

      (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))

    return Cond;


  // Check the relevant induction variable for conformance to

  // the pattern.

  const SCEV *IV = SE.getSCEV(Cond->getOperand(0));

  if (!match(IV,

             m_scev_AffineAddRec(m_scev_SpecificInt(1), m_scev_SpecificInt(1))))

    return Cond;


  assert(cast<SCEVAddRecExpr>(IV)->getLoop() == L &&

         "Loop condition operand is an addrec in a different loop!");


  // Check the right operand of the select, and remember it, as it will

  // be used in the new comparison instruction.

  Value *NewRHS = nullptr;

  if (ICmpInst::isTrueWhenEqual(Pred)) {

    // Look for n+1, and grab n.

    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))

      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))

         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

           NewRHS = BO->getOperand(0);

    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))

      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))

        if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

          NewRHS = BO->getOperand(0);

    if (!NewRHS)

      return Cond;

  } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)

    NewRHS = Sel->getOperand(1);

  else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)

    NewRHS = Sel->getOperand(2);

  else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))

    NewRHS = SU->getValue();

  else

    // Max doesn't match expected pattern.

    return Cond;


  // Determine the new comparison opcode. It may be signed or unsigned,

  // and the original comparison may be either equality or inequality.

  if (Cond->getPredicate() == CmpInst::ICMP_EQ)

    Pred = CmpInst::getInversePredicate(Pred);


  // Ok, everything looks ok to change the condition into an SLT or SGE and

  // delete the max calculation.

  ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred,

                                   Cond->getOperand(0), NewRHS, "scmp");


  // Delete the max calculation instructions.

  NewCond->setDebugLoc(Cond->getDebugLoc());

  Cond->replaceAllUsesWith(NewCond);

  CondUse->setUser(NewCond);

  Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));

  Cond->eraseFromParent();

  Sel->eraseFromParent();

  if (Cmp->use_empty()) {

    salvageDebugInfo(*Cmp);

    Cmp->eraseFromParent();

  }

  return NewCond;

}


/// Change loop terminating condition to use the postinc iv when possible.

void

LSRInstance::OptimizeLoopTermCond() {

  SmallPtrSet<Instruction *, 4> PostIncs;


  // We need a different set of heuristics for rotated and non-rotated loops.

  // If a loop is rotated then the latch is also the backedge, so inserting

  // post-inc expressions just before the latch is ideal. To reduce live ranges

  // it also makes sense to rewrite terminating conditions to use post-inc

  // expressions.

  //

  // If the loop is not rotated then the latch is not a backedge; the latch

  // check is done in the loop head. Adding post-inc expressions before the

  // latch will cause overlapping live-ranges of pre-inc and post-inc expressions

  // in the loop body. In this case we do *not* want to use post-inc expressions

  // in the latch check, and we want to insert post-inc expressions before

  // the backedge.

  BasicBlock *LatchBlock = L->getLoopLatch();

  SmallVector<BasicBlock*, 8> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);

  if (!llvm::is_contained(ExitingBlocks, LatchBlock)) {

    // The backedge doesn't exit the loop; treat this as a head-tested loop.

    IVIncInsertPos = LatchBlock->getTerminator();

    return;

  }


  // Otherwise treat this as a rotated loop.

  for (BasicBlock *ExitingBlock : ExitingBlocks) {

    // Get the terminating condition for the loop if possible.  If we

    // can, we want to change it to use a post-incremented version of its

    // induction variable, to allow coalescing the live ranges for the IV into

    // one register value.


    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());

    if (!TermBr)

      continue;

    // FIXME: Overly conservative, termination condition could be an 'or' etc..

    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))

      continue;


    // Search IVUsesByStride to find Cond's IVUse if there is one.

    IVStrideUse *CondUse = nullptr;

    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());

    if (!FindIVUserForCond(Cond, CondUse))

      continue;


    // If the trip count is computed in terms of a max (due to ScalarEvolution

    // being unable to find a sufficient guard, for example), change the loop

    // comparison to use SLT or ULT instead of NE.

    // One consequence of doing this now is that it disrupts the count-down

    // optimization. That's not always a bad thing though, because in such

    // cases it may still be worthwhile to avoid a max.

    Cond = OptimizeMax(Cond, CondUse);


    // If this exiting block dominates the latch block, it may also use

    // the post-inc value if it won't be shared with other uses.

    // Check for dominance.

    if (!DT.dominates(ExitingBlock, LatchBlock))

      continue;


    // Conservatively avoid trying to use the post-inc value in non-latch

    // exits if there may be pre-inc users in intervening blocks.

    if (LatchBlock != ExitingBlock)

      for (const IVStrideUse &UI : IU)

        // Test if the use is reachable from the exiting block. This dominator

        // query is a conservative approximation of reachability.

        if (&UI != CondUse &&

            !DT.properlyDominates(UI.getUser()->getParent(), ExitingBlock)) {

          // Conservatively assume there may be reuse if the quotient of their

          // strides could be a legal scale.

          const SCEV *A = IU.getStride(*CondUse, L);

          const SCEV *B = IU.getStride(UI, L);

          if (!A || !B) continue;

          if (SE.getTypeSizeInBits(A->getType()) !=

              SE.getTypeSizeInBits(B->getType())) {

            if (SE.getTypeSizeInBits(A->getType()) >

                SE.getTypeSizeInBits(B->getType()))

              B = SE.getSignExtendExpr(B, A->getType());

            else

              A = SE.getSignExtendExpr(A, B->getType());

          }

          if (const SCEVConstant *D =

                dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {

            const ConstantInt *C = D->getValue();

            // Stride of one or negative one can have reuse with non-addresses.

            if (C->isOne() || C->isMinusOne())

              goto decline_post_inc;

            // Avoid weird situations.

            if (C->getValue().getSignificantBits() >= 64 ||

                C->getValue().isMinSignedValue())

              goto decline_post_inc;

            // Check for possible scaled-address reuse.

            if (isAddressUse(TTI, UI.getUser(), UI.getOperandValToReplace())) {

              MemAccessTy AccessTy =

                  getAccessType(TTI, UI.getUser(), UI.getOperandValToReplace());

              int64_t Scale = C->getSExtValue();

              if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,

                                            /*BaseOffset=*/0,

                                            /*HasBaseReg=*/true, Scale,

                                            AccessTy.AddrSpace))

                goto decline_post_inc;

              Scale = -Scale;

              if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,

                                            /*BaseOffset=*/0,

                                            /*HasBaseReg=*/true, Scale,

                                            AccessTy.AddrSpace))

                goto decline_post_inc;

            }

          }

        }


    LLVM_DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "

                      << *Cond << '\n');


    // It's possible for the setcc instruction to be anywhere in the loop, and

    // possible for it to have multiple users.  If it is not immediately before

    // the exiting block branch, move it.

    if (Cond->getNextNode() != TermBr) {

      if (Cond->hasOneUse()) {

        Cond->moveBefore(TermBr->getIterator());

      } else {

        // Clone the terminating condition and insert into the loopend.

        ICmpInst *OldCond = Cond;

        Cond = cast<ICmpInst>(Cond->clone());

        Cond->setName(L->getHeader()->getName() + ".termcond");

        Cond->insertInto(ExitingBlock, TermBr->getIterator());


        // Clone the IVUse, as the old use still exists!

        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());

        TermBr->replaceUsesOfWith(OldCond, Cond);

      }

    }


    // If we get to here, we know that we can transform the setcc instruction to

    // use the post-incremented version of the IV, allowing us to coalesce the

    // live ranges for the IV correctly.

    CondUse->transformToPostInc(L);

    Changed = true;


    PostIncs.insert(Cond);

  decline_post_inc:;

  }


  // Determine an insertion point for the loop induction variable increment. It

  // must dominate all the post-inc comparisons we just set up, and it must

  // dominate the loop latch edge.

  IVIncInsertPos = L->getLoopLatch()->getTerminator();

  for (Instruction *Inst : PostIncs)

    IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);

}


/// Determine if the given use can accommodate a fixup at the given offset and

/// other details. If so, update the use and return true.

bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,

                                     bool HasBaseReg, LSRUse::KindType Kind,

                                     MemAccessTy AccessTy) {

  Immediate NewMinOffset = LU.MinOffset;

  Immediate NewMaxOffset = LU.MaxOffset;

  MemAccessTy NewAccessTy = AccessTy;


  // Check for a mismatched kind. It's tempting to collapse mismatched kinds to

  // something conservative, however this can pessimize in the case that one of

  // the uses will have all its uses outside the loop, for example.

  if (LU.Kind != Kind)

    return false;


  // Check for a mismatched access type, and fall back conservatively as needed.

  // TODO: Be less conservative when the type is similar and can use the same

  // addressing modes.

  if (Kind == LSRUse::Address) {

    if (AccessTy.MemTy != LU.AccessTy.MemTy) {

      NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),

                                            AccessTy.AddrSpace);

    }

  }


  // Conservatively assume HasBaseReg is true for now.

  if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {

    if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,

                          LU.MaxOffset - NewOffset, HasBaseReg))

      return false;

    NewMinOffset = NewOffset;

  } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {

    if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,

                          NewOffset - LU.MinOffset, HasBaseReg))

      return false;

    NewMaxOffset = NewOffset;

  }


  // FIXME: We should be able to handle some level of scalable offset support

  // for 'void', but in order to get basic support up and running this is

  // being left out.

  if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&

      (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))

    return false;


  // Update the use.

  LU.MinOffset = NewMinOffset;

  LU.MaxOffset = NewMaxOffset;

  LU.AccessTy = NewAccessTy;

  return true;

}


/// Return an LSRUse index and an offset value for a fixup which needs the given

/// expression, with the given kind and optional access type.  Either reuse an

/// existing use or create a new one, as needed.

std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,

                                                 LSRUse::KindType Kind,

                                                 MemAccessTy AccessTy) {

  const SCEV *Copy = Expr;

  Immediate Offset = ExtractImmediate(Expr, SE);


  // Basic uses can't accept any offset, for example.

  if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,

                        Offset, /*HasBaseReg=*/ true)) {

    Expr = Copy;

    Offset = Immediate::getFixed(0);

  }


  std::pair<UseMapTy::iterator, bool> P =

      UseMap.try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));

  if (!P.second) {

    // A use already existed with this base.

    size_t LUIdx = P.first->second;

    LSRUse &LU = Uses[LUIdx];

    if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))

      // Reuse this use.

      return std::make_pair(LUIdx, Offset);

  }


  // Create a new use.

  size_t LUIdx = Uses.size();

  P.first->second = LUIdx;

  Uses.push_back(LSRUse(Kind, AccessTy));

  LSRUse &LU = Uses[LUIdx];


  LU.MinOffset = Offset;

  LU.MaxOffset = Offset;

  return std::make_pair(LUIdx, Offset);

}


/// Delete the given use from the Uses list.

void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {

  if (&LU != &Uses.back())

    std::swap(LU, Uses.back());

  Uses.pop_back();


  // Update RegUses.

  RegUses.swapAndDropUse(LUIdx, Uses.size());

}


/// Look for a use distinct from OrigLU which is has a formula that has the same

/// registers as the given formula.

LSRUse *

LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,

                                       const LSRUse &OrigLU) {

  // Search all uses for the formula. This could be more clever.

  for (LSRUse &LU : Uses) {

    // Check whether this use is close enough to OrigLU, to see whether it's

    // worthwhile looking through its formulae.

    // Ignore ICmpZero uses because they may contain formulae generated by

    // GenerateICmpZeroScales, in which case adding fixup offsets may

    // be invalid.

    if (&LU != &OrigLU &&

        LU.Kind != LSRUse::ICmpZero &&

        LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&

        LU.WidestFixupType == OrigLU.WidestFixupType &&

        LU.HasFormulaWithSameRegs(OrigF)) {

      // Scan through this use's formulae.

      for (const Formula &F : LU.Formulae) {

        // Check to see if this formula has the same registers and symbols

        // as OrigF.

        if (F.BaseRegs == OrigF.BaseRegs &&

            F.ScaledReg == OrigF.ScaledReg &&

            F.BaseGV == OrigF.BaseGV &&

            F.Scale == OrigF.Scale &&

            F.UnfoldedOffset == OrigF.UnfoldedOffset) {

          if (F.BaseOffset.isZero())

            return &LU;

          // This is the formula where all the registers and symbols matched;

          // there aren't going to be any others. Since we declined it, we

          // can skip the rest of the formulae and proceed to the next LSRUse.

          break;

        }

      }

    }

  }


  // Nothing looked good.

  return nullptr;

}


void LSRInstance::CollectInterestingTypesAndFactors() {

  SmallSetVector<const SCEV *, 4> Strides;


  // Collect interesting types and strides.

  SmallVector<const SCEV *, 4> Worklist;

  for (const IVStrideUse &U : IU) {

    const SCEV *Expr = IU.getExpr(U);

    if (!Expr)

      continue;


    // Collect interesting types.

    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));


    // Add strides for mentioned loops.

    Worklist.push_back(Expr);

    do {

      const SCEV *S = Worklist.pop_back_val();

      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {

        if (AR->getLoop() == L)

          Strides.insert(AR->getStepRecurrence(SE));

        Worklist.push_back(AR->getStart());

      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

        append_range(Worklist, Add->operands());

      }

    } while (!Worklist.empty());

  }


  // Compute interesting factors from the set of interesting strides.

  for (SmallSetVector<const SCEV *, 4>::const_iterator

       I = Strides.begin(), E = Strides.end(); I != E; ++I)

    for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =

         std::next(I); NewStrideIter != E; ++NewStrideIter) {

      const SCEV *OldStride = *I;

      const SCEV *NewStride = *NewStrideIter;


      if (SE.getTypeSizeInBits(OldStride->getType()) !=

          SE.getTypeSizeInBits(NewStride->getType())) {

        if (SE.getTypeSizeInBits(OldStride->getType()) >

            SE.getTypeSizeInBits(NewStride->getType()))

          NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());

        else

          OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());

      }

      if (const SCEVConstant *Factor =

            dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,

                                                        SE, true))) {

        if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

          Factors.insert(Factor->getAPInt().getSExtValue());

      } else if (const SCEVConstant *Factor =

                   dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,

                                                               NewStride,

                                                               SE, true))) {

        if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

          Factors.insert(Factor->getAPInt().getSExtValue());

      }

    }


  // If all uses use the same type, don't bother looking for truncation-based

  // reuse.

  if (Types.size() == 1)

    Types.clear();


  LLVM_DEBUG(print_factors_and_types(dbgs()));

}


/// Helper for CollectChains that finds an IV operand (computed by an AddRec in

/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to

/// IVStrideUses, we could partially skip this.

static User::op_iterator


findIVOperand(User::op_iterator OI, User::op_iterator OE,

              Loop *L, ScalarEvolution &SE) {

  for(; OI != OE; ++OI) {

    if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {

      if (!SE.isSCEVable(Oper->getType()))

        continue;


      if (const SCEVAddRecExpr *AR =

          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {

        if (AR->getLoop() == L)

          break;

      }

    }

  }

  return OI;

}


/// IVChain logic must consistently peek base TruncInst operands, so wrap it in

/// a convenient helper.


static Value *getWideOperand(Value *Oper) {

  if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))

    return Trunc->getOperand(0);

  return Oper;

}


/// Return an approximation of this SCEV expression's "base", or NULL for any

/// constant. Returning the expression itself is conservative. Returning a

/// deeper subexpression is more precise and valid as long as it isn't less

/// complex than another subexpression. For expressions involving multiple

/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids

/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],

/// IVInc==b-a.

///

/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost

/// SCEVUnknown, we simply return the rightmost SCEV operand.


static const SCEV *getExprBase(const SCEV *S) {

  switch (S->getSCEVType()) {

  default: // including scUnknown.

    return S;

  case scConstant:

  case scVScale:

    return nullptr;

  case scTruncate:

    return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());

  case scZeroExtend:

    return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());

  case scSignExtend:

    return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());

  case scAddExpr: {

    // Skip over scaled operands (scMulExpr) to follow add operands as long as

    // there's nothing more complex.

    // FIXME: not sure if we want to recognize negation.

    const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);

    for (const SCEV *SubExpr : reverse(Add->operands())) {

      if (SubExpr->getSCEVType() == scAddExpr)

        return getExprBase(SubExpr);


      if (SubExpr->getSCEVType() != scMulExpr)

        return SubExpr;

    }

    return S; // all operands are scaled, be conservative.

  }

  case scAddRecExpr:

    return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());

  }

  llvm_unreachable("Unknown SCEV kind!");

}


/// Return true if the chain increment is profitable to expand into a loop

/// invariant value, which may require its own register. A profitable chain

/// increment will be an offset relative to the same base. We allow such offsets

/// to potentially be used as chain increment as long as it's not obviously

/// expensive to expand using real instructions.

bool IVChain::isProfitableIncrement(const SCEV *OperExpr,

                                    const SCEV *IncExpr,

                                    ScalarEvolution &SE) {

  // Aggressively form chains when -stress-ivchain.

  if (StressIVChain)

    return true;


  // Do not replace a constant offset from IV head with a nonconstant IV

  // increment.

  if (!isa<SCEVConstant>(IncExpr)) {

    const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));

    if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))

      return false;

  }


  SmallPtrSet<const SCEV*, 8> Processed;

  return !isHighCostExpansion(IncExpr, Processed, SE);

}


/// Return true if the number of registers needed for the chain is estimated to

/// be less than the number required for the individual IV users. First prohibit

/// any IV users that keep the IV live across increments (the Users set should

/// be empty). Next count the number and type of increments in the chain.

///

/// Chaining IVs can lead to considerable code bloat if ISEL doesn't

/// effectively use postinc addressing modes. Only consider it profitable it the

/// increments can be computed in fewer registers when chained.

///

/// TODO: Consider IVInc free if it's already used in another chains.


static bool isProfitableChain(IVChain &Chain,

                              SmallPtrSetImpl<Instruction *> &Users,

                              ScalarEvolution &SE,

                              const TargetTransformInfo &TTI) {

  if (StressIVChain)

    return true;


  if (!Chain.hasIncs())

    return false;


  if (!Users.empty()) {

    LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";

               for (Instruction *Inst

                    : Users) { dbgs() << "  " << *Inst << "\n"; });

    return false;

  }

  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");


  // The chain itself may require a register, so intialize cost to 1.

  int cost = 1;


  // A complete chain likely eliminates the need for keeping the original IV in

  // a register. LSR does not currently know how to form a complete chain unless

  // the header phi already exists.

  if (isa<PHINode>(Chain.tailUserInst())

      && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {

    --cost;

  }

  const SCEV *LastIncExpr = nullptr;

  unsigned NumConstIncrements = 0;

  unsigned NumVarIncrements = 0;

  unsigned NumReusedIncrements = 0;


  if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))

    return true;


  for (const IVInc &Inc : Chain) {

    if (TTI.isProfitableLSRChainElement(Inc.UserInst))

      return true;

    if (Inc.IncExpr->isZero())

      continue;


    // Incrementing by zero or some constant is neutral. We assume constants can

    // be folded into an addressing mode or an add's immediate operand.

    if (isa<SCEVConstant>(Inc.IncExpr)) {

      ++NumConstIncrements;

      continue;

    }


    if (Inc.IncExpr == LastIncExpr)

      ++NumReusedIncrements;

    else

      ++NumVarIncrements;


    LastIncExpr = Inc.IncExpr;

  }

  // An IV chain with a single increment is handled by LSR's postinc

  // uses. However, a chain with multiple increments requires keeping the IV's

  // value live longer than it needs to be if chained.

  if (NumConstIncrements > 1)

    --cost;


  // Materializing increment expressions in the preheader that didn't exist in

  // the original code may cost a register. For example, sign-extended array

  // indices can produce ridiculous increments like this:

  // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))

  cost += NumVarIncrements;


  // Reusing variable increments likely saves a register to hold the multiple of

  // the stride.

  cost -= NumReusedIncrements;


  LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost

                    << "\n");


  return cost < 0;

}


/// Add this IV user to an existing chain or make it the head of a new chain.

void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,

                                   SmallVectorImpl<ChainUsers> &ChainUsersVec) {

  // When IVs are used as types of varying widths, they are generally converted

  // to a wider type with some uses remaining narrow under a (free) trunc.

  Value *const NextIV = getWideOperand(IVOper);

  const SCEV *const OperExpr = SE.getSCEV(NextIV);

  const SCEV *const OperExprBase = getExprBase(OperExpr);


  // Visit all existing chains. Check if its IVOper can be computed as a

  // profitable loop invariant increment from the last link in the Chain.

  unsigned ChainIdx = 0, NChains = IVChainVec.size();

  const SCEV *LastIncExpr = nullptr;

  for (; ChainIdx < NChains; ++ChainIdx) {

    IVChain &Chain = IVChainVec[ChainIdx];


    // Prune the solution space aggressively by checking that both IV operands

    // are expressions that operate on the same unscaled SCEVUnknown. This

    // "base" will be canceled by the subsequent getMinusSCEV call. Checking

    // first avoids creating extra SCEV expressions.

    if (!StressIVChain && Chain.ExprBase != OperExprBase)

      continue;


    Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);

    if (PrevIV->getType() != NextIV->getType())

      continue;


    // A phi node terminates a chain.

    if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))

      continue;


    // The increment must be loop-invariant so it can be kept in a register.

    const SCEV *PrevExpr = SE.getSCEV(PrevIV);

    const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);

    if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))

      continue;


    if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {

      LastIncExpr = IncExpr;

      break;

    }

  }

  // If we haven't found a chain, create a new one, unless we hit the max. Don't

  // bother for phi nodes, because they must be last in the chain.

  if (ChainIdx == NChains) {

    if (isa<PHINode>(UserInst))

      return;

    if (NChains >= MaxChains && !StressIVChain) {

      LLVM_DEBUG(dbgs() << "IV Chain Limit\n");

      return;

    }

    LastIncExpr = OperExpr;

    // IVUsers may have skipped over sign/zero extensions. We don't currently

    // attempt to form chains involving extensions unless they can be hoisted

    // into this loop's AddRec.

    if (!isa<SCEVAddRecExpr>(LastIncExpr))

      return;

    ++NChains;

    IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),

                                 OperExprBase));

    ChainUsersVec.resize(NChains);

    LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst

                      << ") IV=" << *LastIncExpr << "\n");

  } else {

    LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << "  Inc: (" << *UserInst

                      << ") IV+" << *LastIncExpr << "\n");

    // Add this IV user to the end of the chain.

    IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));

  }

  IVChain &Chain = IVChainVec[ChainIdx];


  SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;

  // This chain's NearUsers become FarUsers.

  if (!LastIncExpr->isZero()) {

    ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);

    NearUsers.clear();

  }


  // All other uses of IVOperand become near uses of the chain.

  // We currently ignore intermediate values within SCEV expressions, assuming

  // they will eventually be used be the current chain, or can be computed

  // from one of the chain increments. To be more precise we could

  // transitively follow its user and only add leaf IV users to the set.

  for (User *U : IVOper->users()) {

    Instruction *OtherUse = dyn_cast<Instruction>(U);

    if (!OtherUse)

      continue;

    // Uses in the chain will no longer be uses if the chain is formed.

    // Include the head of the chain in this iteration (not Chain.begin()).

    IVChain::const_iterator IncIter = Chain.Incs.begin();

    IVChain::const_iterator IncEnd = Chain.Incs.end();

    for( ; IncIter != IncEnd; ++IncIter) {

      if (IncIter->UserInst == OtherUse)

        break;

    }

    if (IncIter != IncEnd)

      continue;


    if (SE.isSCEVable(OtherUse->getType())

        && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))

        && IU.isIVUserOrOperand(OtherUse)) {

      continue;

    }

    NearUsers.insert(OtherUse);

  }


  // Since this user is part of the chain, it's no longer considered a use

  // of the chain.

  ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);

}


/// Populate the vector of Chains.

///

/// This decreases ILP at the architecture level. Targets with ample registers,

/// multiple memory ports, and no register renaming probably don't want

/// this. However, such targets should probably disable LSR altogether.

///

/// The job of LSR is to make a reasonable choice of induction variables across

/// the loop. Subsequent passes can easily "unchain" computation exposing more

/// ILP *within the loop* if the target wants it.

///

/// Finding the best IV chain is potentially a scheduling problem. Since LSR

/// will not reorder memory operations, it will recognize this as a chain, but

/// will generate redundant IV increments. Ideally this would be corrected later

/// by a smart scheduler:

///        = A[i]

///        = A[i+x]

/// A[i]   =

/// A[i+x] =

///

/// TODO: Walk the entire domtree within this loop, not just the path to the

/// loop latch. This will discover chains on side paths, but requires

/// maintaining multiple copies of the Chains state.

void LSRInstance::CollectChains() {

  LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");

  SmallVector<ChainUsers, 8> ChainUsersVec;


  SmallVector<BasicBlock *,8> LatchPath;

  BasicBlock *LoopHeader = L->getHeader();

  for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());

       Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {

    LatchPath.push_back(Rung->getBlock());

  }

  LatchPath.push_back(LoopHeader);


  // Walk the instruction stream from the loop header to the loop latch.

  for (BasicBlock *BB : reverse(LatchPath)) {

    for (Instruction &I : *BB) {

      // Skip instructions that weren't seen by IVUsers analysis.

      if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))

        continue;


      // Ignore users that are part of a SCEV expression. This way we only

      // consider leaf IV Users. This effectively rediscovers a portion of

      // IVUsers analysis but in program order this time.

      if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))

          continue;


      // Remove this instruction from any NearUsers set it may be in.

      for (unsigned ChainIdx = 0, NChains = IVChainVec.size();

           ChainIdx < NChains; ++ChainIdx) {

        ChainUsersVec[ChainIdx].NearUsers.erase(&I);

      }

      // Search for operands that can be chained.

      SmallPtrSet<Instruction*, 4> UniqueOperands;

      User::op_iterator IVOpEnd = I.op_end();

      User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);

      while (IVOpIter != IVOpEnd) {

        Instruction *IVOpInst = cast<Instruction>(*IVOpIter);

        if (UniqueOperands.insert(IVOpInst).second)

          ChainInstruction(&I, IVOpInst, ChainUsersVec);

        IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

      }

    } // Continue walking down the instructions.

  } // Continue walking down the domtree.

  // Visit phi backedges to determine if the chain can generate the IV postinc.

  for (PHINode &PN : L->getHeader()->phis()) {

    if (!SE.isSCEVable(PN.getType()))

      continue;


    Instruction *IncV =

        dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));

    if (IncV)

      ChainInstruction(&PN, IncV, ChainUsersVec);

  }

  // Remove any unprofitable chains.

  unsigned ChainIdx = 0;

  for (unsigned UsersIdx = 0, NChains = IVChainVec.size();

       UsersIdx < NChains; ++UsersIdx) {

    if (!isProfitableChain(IVChainVec[UsersIdx],

                           ChainUsersVec[UsersIdx].FarUsers, SE, TTI))

      continue;

    // Preserve the chain at UsesIdx.

    if (ChainIdx != UsersIdx)

      IVChainVec[ChainIdx] = IVChainVec[UsersIdx];

    FinalizeChain(IVChainVec[ChainIdx]);

    ++ChainIdx;

  }

  IVChainVec.resize(ChainIdx);

}


void LSRInstance::FinalizeChain(IVChain &Chain) {

  assert(!Chain.Incs.empty() && "empty IV chains are not allowed");

  LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");


  for (const IVInc &Inc : Chain) {

    LLVM_DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");

    auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);

    assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");

    IVIncSet.insert(UseI);

  }

}


/// Return true if the IVInc can be folded into an addressing mode.


static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,

                             Value *Operand, const TargetTransformInfo &TTI) {

  const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);

  Immediate IncOffset = Immediate::getZero();

  if (IncConst) {

    if (IncConst && IncConst->getAPInt().getSignificantBits() > 64)

      return false;

    IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue());

  } else {

    // Look for mul(vscale, constant), to detect a scalable offset.

    const APInt *C;

    if (!match(IncExpr, m_scev_Mul(m_scev_APInt(C), m_SCEVVScale())) ||

        C->getSignificantBits() > 64)

      return false;

    IncOffset = Immediate::getScalable(C->getSExtValue());

  }


  if (!isAddressUse(TTI, UserInst, Operand))

    return false;


  MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);

  if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,

                        IncOffset, /*HasBaseReg=*/false))

    return false;


  return true;

}


/// Generate an add or subtract for each IVInc in a chain to materialize the IV

/// user's operand from the previous IV user's operand.

void LSRInstance::GenerateIVChain(const IVChain &Chain,

                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

  // Find the new IVOperand for the head of the chain. It may have been replaced

  // by LSR.

  const IVInc &Head = Chain.Incs[0];

  User::op_iterator IVOpEnd = Head.UserInst->op_end();

  // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.

  User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),

                                             IVOpEnd, L, SE);

  Value *IVSrc = nullptr;

  while (IVOpIter != IVOpEnd) {

    IVSrc = getWideOperand(*IVOpIter);


    // If this operand computes the expression that the chain needs, we may use

    // it. (Check this after setting IVSrc which is used below.)

    //

    // Note that if Head.IncExpr is wider than IVSrc, then this phi is too

    // narrow for the chain, so we can no longer use it. We do allow using a

    // wider phi, assuming the LSR checked for free truncation. In that case we

    // should already have a truncate on this operand such that

    // getSCEV(IVSrc) == IncExpr.

    if (SE.getSCEV(*IVOpIter) == Head.IncExpr

        || SE.getSCEV(IVSrc) == Head.IncExpr) {

      break;

    }

    IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

  }

  if (IVOpIter == IVOpEnd) {

    // Gracefully give up on this chain.

    LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");

    return;

  }

  assert(IVSrc && "Failed to find IV chain source");


  LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");

  Type *IVTy = IVSrc->getType();

  Type *IntTy = SE.getEffectiveSCEVType(IVTy);

  const SCEV *LeftOverExpr = nullptr;

  const SCEV *Accum = SE.getZero(IntTy);

  SmallVector<std::pair<const SCEV *, Value *>> Bases;

  Bases.emplace_back(Accum, IVSrc);


  for (const IVInc &Inc : Chain) {

    Instruction *InsertPt = Inc.UserInst;

    if (isa<PHINode>(InsertPt))

      InsertPt = L->getLoopLatch()->getTerminator();


    // IVOper will replace the current IV User's operand. IVSrc is the IV

    // value currently held in a register.

    Value *IVOper = IVSrc;

    if (!Inc.IncExpr->isZero()) {

      // IncExpr was the result of subtraction of two narrow values, so must

      // be signed.

      const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);

      Accum = SE.getAddExpr(Accum, IncExpr);

      LeftOverExpr = LeftOverExpr ?

        SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;

    }


    // Look through each base to see if any can produce a nice addressing mode.

    bool FoundBase = false;

    for (auto [MapScev, MapIVOper] : reverse(Bases)) {

      const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);

      if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand, TTI)) {

        if (!Remainder->isZero()) {

          Rewriter.clearPostInc();

          Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);

          const SCEV *IVOperExpr =

              SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV));

          IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);

        } else {

          IVOper = MapIVOper;

        }


        FoundBase = true;

        break;

      }

    }

    if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {

      // Expand the IV increment.

      Rewriter.clearPostInc();

      Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);

      const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),

                                             SE.getUnknown(IncV));

      IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);


      // If an IV increment can't be folded, use it as the next IV value.

      if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {

        assert(IVTy == IVOper->getType() && "inconsistent IV increment type");

        Bases.emplace_back(Accum, IVOper);

        IVSrc = IVOper;

        LeftOverExpr = nullptr;

      }

    }

    Type *OperTy = Inc.IVOperand->getType();

    if (IVTy != OperTy) {

      assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&

             "cannot extend a chained IV");

      IRBuilder<> Builder(InsertPt);

      IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");

    }

    Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);

    if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))

      DeadInsts.emplace_back(OperandIsInstr);

  }

  // If LSR created a new, wider phi, we may also replace its postinc. We only

  // do this if we also found a wide value for the head of the chain.

  if (isa<PHINode>(Chain.tailUserInst())) {

    for (PHINode &Phi : L->getHeader()->phis()) {

      if (Phi.getType() != IVSrc->getType())

        continue;

      Instruction *PostIncV = dyn_cast<Instruction>(

          Phi.getIncomingValueForBlock(L->getLoopLatch()));

      if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))

        continue;

      Value *IVOper = IVSrc;

      Type *PostIncTy = PostIncV->getType();

      if (IVTy != PostIncTy) {

        assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");

        IRBuilder<> Builder(L->getLoopLatch()->getTerminator());

        Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());

        IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");

      }

      Phi.replaceUsesOfWith(PostIncV, IVOper);

      DeadInsts.emplace_back(PostIncV);

    }

  }

}


void LSRInstance::CollectFixupsAndInitialFormulae() {

  BranchInst *ExitBranch = nullptr;

  bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);


  // For calculating baseline cost

  SmallPtrSet<const SCEV *, 16> Regs;

  DenseSet<const SCEV *> VisitedRegs;

  DenseSet<size_t> VisitedLSRUse;


  for (const IVStrideUse &U : IU) {

    Instruction *UserInst = U.getUser();

    // Skip IV users that are part of profitable IV Chains.

    User::op_iterator UseI =

        find(UserInst->operands(), U.getOperandValToReplace());

    assert(UseI != UserInst->op_end() && "cannot find IV operand");

    if (IVIncSet.count(UseI)) {

      LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');

      continue;

    }


    LSRUse::KindType Kind = LSRUse::Basic;

    MemAccessTy AccessTy;

    if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {

      Kind = LSRUse::Address;

      AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());

    }


    const SCEV *S = IU.getExpr(U);

    if (!S)

      continue;

    PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();


    // Equality (== and !=) ICmps are special. We can rewrite (i == N) as

    // (N - i == 0), and this allows (N - i) to be the expression that we work

    // with rather than just N or i, so we can consider the register

    // requirements for both N and i at the same time. Limiting this code to

    // equality icmps is not a problem because all interesting loops use

    // equality icmps, thanks to IndVarSimplify.

    if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {

      // If CI can be saved in some target, like replaced inside hardware loop

      // in PowerPC, no need to generate initial formulae for it.

      if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))

        continue;

      if (CI->isEquality()) {

        // Swap the operands if needed to put the OperandValToReplace on the

        // left, for consistency.

        Value *NV = CI->getOperand(1);

        if (NV == U.getOperandValToReplace()) {

          CI->setOperand(1, CI->getOperand(0));

          CI->setOperand(0, NV);

          NV = CI->getOperand(1);

          Changed = true;

        }


        // x == y  -->  x - y == 0

        const SCEV *N = SE.getSCEV(NV);

        if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&

            (!NV->getType()->isPointerTy() ||

             SE.getPointerBase(N) == SE.getPointerBase(S))) {

          // S is normalized, so normalize N before folding it into S

          // to keep the result normalized.

          N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);

          if (!N)

            continue;

          Kind = LSRUse::ICmpZero;

          S = SE.getMinusSCEV(N, S);

        } else if (L->isLoopInvariant(NV) &&

                   (!isa<Instruction>(NV) ||

                    DT.dominates(cast<Instruction>(NV), L->getHeader())) &&

                   !NV->getType()->isPointerTy()) {

          // If we can't generally expand the expression (e.g. it contains

          // a divide), but it is already at a loop invariant point before the

          // loop, wrap it in an unknown (to prevent the expander from trying

          // to re-expand in a potentially unsafe way.)  The restriction to

          // integer types is required because the unknown hides the base, and

          // SCEV can't compute the difference of two unknown pointers.

          N = SE.getUnknown(NV);

          N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);

          if (!N)

            continue;

          Kind = LSRUse::ICmpZero;

          S = SE.getMinusSCEV(N, S);

          assert(!isa<SCEVCouldNotCompute>(S));

        }


        // -1 and the negations of all interesting strides (except the negation

        // of -1) are now also interesting.

        for (size_t i = 0, e = Factors.size(); i != e; ++i)

          if (Factors[i] != -1)

            Factors.insert(-(uint64_t)Factors[i]);

        Factors.insert(-1);

      }

    }


    // Get or create an LSRUse.

    std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy);

    size_t LUIdx = P.first;

    Immediate Offset = P.second;

    LSRUse &LU = Uses[LUIdx];


    // Record the fixup.

    LSRFixup &LF = LU.getNewFixup();

    LF.UserInst = UserInst;

    LF.OperandValToReplace = U.getOperandValToReplace();

    LF.PostIncLoops = TmpPostIncLoops;

    LF.Offset = Offset;

    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);


    // Create SCEV as Formula for calculating baseline cost

    if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {

      Formula F;

      F.initialMatch(S, L, SE);

      BaselineCost.RateFormula(F, Regs, VisitedRegs, LU,

                               HardwareLoopProfitable);

      VisitedLSRUse.insert(LUIdx);

    }


    if (!LU.WidestFixupType ||

        SE.getTypeSizeInBits(LU.WidestFixupType) <

        SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))

      LU.WidestFixupType = LF.OperandValToReplace->getType();


    // If this is the first use of this LSRUse, give it a formula.

    if (LU.Formulae.empty()) {

      InsertInitialFormula(S, LU, LUIdx);

      CountRegisters(LU.Formulae.back(), LUIdx);

    }

  }


  LLVM_DEBUG(print_fixups(dbgs()));

}


/// Insert a formula for the given expression into the given use, separating out

/// loop-variant portions from loop-invariant and loop-computable portions.

void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,

                                       size_t LUIdx) {

  // Mark uses whose expressions cannot be expanded.

  if (!Rewriter.isSafeToExpand(S))

    LU.RigidFormula = true;


  Formula F;

  F.initialMatch(S, L, SE);

  bool Inserted = InsertFormula(LU, LUIdx, F);

  assert(Inserted && "Initial formula already exists!"); (void)Inserted;

}


/// Insert a simple single-register formula for the given expression into the

/// given use.

void

LSRInstance::InsertSupplementalFormula(const SCEV *S,

                                       LSRUse &LU, size_t LUIdx) {

  Formula F;

  F.BaseRegs.push_back(S);

  F.HasBaseReg = true;

  bool Inserted = InsertFormula(LU, LUIdx, F);

  assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;

}


/// Note which registers are used by the given formula, updating RegUses.

void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {

  if (F.ScaledReg)

    RegUses.countRegister(F.ScaledReg, LUIdx);

  for (const SCEV *BaseReg : F.BaseRegs)

    RegUses.countRegister(BaseReg, LUIdx);

}


/// If the given formula has not yet been inserted, add it to the list, and

/// return true. Return false otherwise.

bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {

  // Do not insert formula that we will not be able to expand.

  assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&

         "Formula is illegal");


  if (!LU.InsertFormula(F, *L))

    return false;


  CountRegisters(F, LUIdx);

  return true;

}


/// Check for other uses of loop-invariant values which we're tracking. These

/// other uses will pin these values in registers, making them less profitable

/// for elimination.

/// TODO: This currently misses non-constant addrec step registers.

/// TODO: Should this give more weight to users inside the loop?

void

LSRInstance::CollectLoopInvariantFixupsAndFormulae() {

  SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());

  SmallPtrSet<const SCEV *, 32> Visited;


  // Don't collect outside uses if we are favoring postinc - the instructions in

  // the loop are more important than the ones outside of it.

  if (AMK == TTI::AMK_PostIndexed)

    return;


  while (!Worklist.empty()) {

    const SCEV *S = Worklist.pop_back_val();


    // Don't process the same SCEV twice

    if (!Visited.insert(S).second)

      continue;


    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))

      append_range(Worklist, N->operands());

    else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))

      Worklist.push_back(C->getOperand());

    else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {

      Worklist.push_back(D->getLHS());

      Worklist.push_back(D->getRHS());

    } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {

      const Value *V = US->getValue();

      if (const Instruction *Inst = dyn_cast<Instruction>(V)) {

        // Look for instructions defined outside the loop.

        if (L->contains(Inst)) continue;

      } else if (isa<Constant>(V))

        // Constants can be re-materialized.

        continue;

      for (const Use &U : V->uses()) {

        const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());

        // Ignore non-instructions.

        if (!UserInst)

          continue;

        // Don't bother if the instruction is an EHPad.

        if (UserInst->isEHPad())

          continue;

        // Ignore instructions in other functions (as can happen with

        // Constants).

        if (UserInst->getParent()->getParent() != L->getHeader()->getParent())

          continue;

        // Ignore instructions not dominated by the loop.

        const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?

          UserInst->getParent() :

          cast<PHINode>(UserInst)->getIncomingBlock(

            PHINode::getIncomingValueNumForOperand(U.getOperandNo()));

        if (!DT.dominates(L->getHeader(), UseBB))

          continue;

        // Don't bother if the instruction is in a BB which ends in an EHPad.

        if (UseBB->getTerminator()->isEHPad())

          continue;


        // Ignore cases in which the currently-examined value could come from

        // a basic block terminated with an EHPad. This checks all incoming

        // blocks of the phi node since it is possible that the same incoming

        // value comes from multiple basic blocks, only some of which may end

        // in an EHPad. If any of them do, a subsequent rewrite attempt by this

        // pass would try to insert instructions into an EHPad, hitting an

        // assertion.

        if (isa<PHINode>(UserInst)) {

          const auto *PhiNode = cast<PHINode>(UserInst);

          bool HasIncompatibleEHPTerminatedBlock = false;

          llvm::Value *ExpectedValue = U;

          for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {

            if (PhiNode->getIncomingValue(I) == ExpectedValue) {

              if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {

                HasIncompatibleEHPTerminatedBlock = true;

                break;

              }

            }

          }

          if (HasIncompatibleEHPTerminatedBlock) {

            continue;

          }

        }


        // Don't bother rewriting PHIs in catchswitch blocks.

        if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))

          continue;

        // Ignore uses which are part of other SCEV expressions, to avoid

        // analyzing them multiple times.

        if (SE.isSCEVable(UserInst->getType())) {

          const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));

          // If the user is a no-op, look through to its uses.

          if (!isa<SCEVUnknown>(UserS))

            continue;

          if (UserS == US) {

            Worklist.push_back(

              SE.getUnknown(const_cast<Instruction *>(UserInst)));

            continue;

          }

        }

        // Ignore icmp instructions which are already being analyzed.

        if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {

          unsigned OtherIdx = !U.getOperandNo();

          Value *OtherOp = ICI->getOperand(OtherIdx);

          if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))

            continue;

        }


        // Do not consider uses inside lifetime intrinsics. These are not

        // actually materialized.

        if (UserInst->isLifetimeStartOrEnd())

          continue;


        std::pair<size_t, Immediate> P =

            getUse(S, LSRUse::Basic, MemAccessTy());

        size_t LUIdx = P.first;

        Immediate Offset = P.second;

        LSRUse &LU = Uses[LUIdx];

        LSRFixup &LF = LU.getNewFixup();

        LF.UserInst = const_cast<Instruction *>(UserInst);

        LF.OperandValToReplace = U;

        LF.Offset = Offset;

        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);

        if (!LU.WidestFixupType ||

            SE.getTypeSizeInBits(LU.WidestFixupType) <

            SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))

          LU.WidestFixupType = LF.OperandValToReplace->getType();

        InsertSupplementalFormula(US, LU, LUIdx);

        CountRegisters(LU.Formulae.back(), Uses.size() - 1);

        break;

      }

    }

  }

}


/// Split S into subexpressions which can be pulled out into separate

/// registers. If C is non-null, multiply each subexpression by C.

///

/// Return remainder expression after factoring the subexpressions captured by

/// Ops. If Ops is complete, return NULL.


static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,

                                   SmallVectorImpl<const SCEV *> &Ops,

                                   const Loop *L,

                                   ScalarEvolution &SE,

                                   unsigned Depth = 0) {

  // Arbitrarily cap recursion to protect compile time.

  if (Depth >= 3)

    return S;


  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {

    // Break out add operands.

    for (const SCEV *S : Add->operands()) {

      const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);

      if (Remainder)

        Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

    }

    return nullptr;

  }

  const SCEV *Start, *Step;

  const SCEVConstant *Op0;

  const SCEV *Op1;

  if (match(S, m_scev_AffineAddRec(m_SCEV(Start), m_SCEV(Step)))) {

    // Split a non-zero base out of an addrec.

    if (Start->isZero())

      return S;


    const SCEV *Remainder = CollectSubexprs(Start, C, Ops, L, SE, Depth + 1);

    // Split the non-zero AddRec unless it is part of a nested recurrence that

    // does not pertain to this loop.

    if (Remainder && (cast<SCEVAddRecExpr>(S)->getLoop() == L ||

                      !isa<SCEVAddRecExpr>(Remainder))) {

      Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

      Remainder = nullptr;

    }

    if (Remainder != Start) {

      if (!Remainder)

        Remainder = SE.getConstant(S->getType(), 0);

      return SE.getAddRecExpr(Remainder, Step,

                              cast<SCEVAddRecExpr>(S)->getLoop(),

                              // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)

                              SCEV::FlagAnyWrap);

    }

  } else if (match(S, m_scev_Mul(m_SCEVConstant(Op0), m_SCEV(Op1)))) {

    // Break (C * (a + b + c)) into C*a + C*b + C*c.

    C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;

    const SCEV *Remainder = CollectSubexprs(Op1, C, Ops, L, SE, Depth + 1);

    if (Remainder)

      Ops.push_back(SE.getMulExpr(C, Remainder));

    return nullptr;

  }

  return S;

}


/// Return true if the SCEV represents a value that may end up as a

/// post-increment operation.


static bool mayUsePostIncMode(const TargetTransformInfo &TTI,

                              LSRUse &LU, const SCEV *S, const Loop *L,

                              ScalarEvolution &SE) {

  if (LU.Kind != LSRUse::Address ||

      !LU.AccessTy.getType()->isIntOrIntVectorTy())

    return false;

  const SCEV *Start;

  if (!match(S, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant())))

    return false;

  // Check if a post-indexed load/store can be used.

  if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, S->getType()) ||

      TTI.isIndexedStoreLegal(TTI.MIM_PostInc, S->getType())) {

    if (!isa<SCEVConstant>(Start) && SE.isLoopInvariant(Start, L))

      return true;

  }

  return false;

}


/// Helper function for LSRInstance::GenerateReassociations.

void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,

                                             const Formula &Base,

                                             unsigned Depth, size_t Idx,

                                             bool IsScaledReg) {

  const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];

  // Don't generate reassociations for the base register of a value that

  // may generate a post-increment operator. The reason is that the

  // reassociations cause extra base+register formula to be created,

  // and possibly chosen, but the post-increment is more efficient.

  if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))

    return;

  SmallVector<const SCEV *, 8> AddOps;

  const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);

  if (Remainder)

    AddOps.push_back(Remainder);


  if (AddOps.size() == 1)

    return;


  for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),

                                                     JE = AddOps.end();

       J != JE; ++J) {

    // Loop-variant "unknown" values are uninteresting; we won't be able to

    // do anything meaningful with them.

    if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))

      continue;


    // Don't pull a constant into a register if the constant could be folded

    // into an immediate field.

    if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,

                         LU.AccessTy, *J, Base.getNumRegs() > 1))

      continue;


    // Collect all operands except *J.

    SmallVector<const SCEV *, 8> InnerAddOps(std::as_const(AddOps).begin(), J);

    InnerAddOps.append(std::next(J), std::as_const(AddOps).end());


    // Don't leave just a constant behind in a register if the constant could

    // be folded into an immediate field.

    if (InnerAddOps.size() == 1 &&

        isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,

                         LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))

      continue;


    const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);

    if (InnerSum->isZero())

      continue;

    Formula F = Base;


    if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())

      continue;


    // Add the remaining pieces of the add back into the new formula.

    const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);

    if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&

        TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +

                                InnerSumSC->getValue()->getZExtValue())) {

      F.UnfoldedOffset =

          Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

                              InnerSumSC->getValue()->getZExtValue());

      if (IsScaledReg) {

        F.ScaledReg = nullptr;

        F.Scale = 0;

      } else

        F.BaseRegs.erase(F.BaseRegs.begin() + Idx);

    } else if (IsScaledReg)

      F.ScaledReg = InnerSum;

    else

      F.BaseRegs[Idx] = InnerSum;


    // Add J as its own register, or an unfolded immediate.

    const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);

    if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&

        TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +

                                SC->getValue()->getZExtValue()))

      F.UnfoldedOffset =

          Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

                              SC->getValue()->getZExtValue());

    else

      F.BaseRegs.push_back(*J);

    // We may have changed the number of register in base regs, adjust the

    // formula accordingly.

    F.canonicalize(*L);


    if (InsertFormula(LU, LUIdx, F))

      // If that formula hadn't been seen before, recurse to find more like

      // it.

      // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)

      // Because just Depth is not enough to bound compile time.

      // This means that every time AddOps.size() is greater 16^x we will add

      // x to Depth.

      GenerateReassociations(LU, LUIdx, LU.Formulae.back(),

                             Depth + 1 + (Log2_32(AddOps.size()) >> 2));

  }

}


/// Split out subexpressions from adds and the bases of addrecs.

void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,

                                         Formula Base, unsigned Depth) {

  assert(Base.isCanonical(*L) && "Input must be in the canonical form");

  // Arbitrarily cap recursion to protect compile time.

  if (Depth >= 3)

    return;


  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

    GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);


  if (Base.Scale == 1)

    GenerateReassociationsImpl(LU, LUIdx, Base, Depth,

                               /* Idx */ -1, /* IsScaledReg */ true);

}


///  Generate a formula consisting of all of the loop-dominating registers added

/// into a single register.

void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,

                                       Formula Base) {

  // This method is only interesting on a plurality of registers.

  if (Base.BaseRegs.size() + (Base.Scale == 1) +

          (Base.UnfoldedOffset.isNonZero()) <=

      1)

    return;


  // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before

  // processing the formula.

  Base.unscale();

  SmallVector<const SCEV *, 4> Ops;

  Formula NewBase = Base;

  NewBase.BaseRegs.clear();

  Type *CombinedIntegerType = nullptr;

  for (const SCEV *BaseReg : Base.BaseRegs) {

    if (SE.properlyDominates(BaseReg, L->getHeader()) &&

        !SE.hasComputableLoopEvolution(BaseReg, L)) {

      if (!CombinedIntegerType)

        CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());

      Ops.push_back(BaseReg);

    }

    else

      NewBase.BaseRegs.push_back(BaseReg);

  }


  // If no register is relevant, we're done.

  if (Ops.size() == 0)

    return;


  // Utility function for generating the required variants of the combined

  // registers.

  auto GenerateFormula = [&](const SCEV *Sum) {

    Formula F = NewBase;


    // TODO: If Sum is zero, it probably means ScalarEvolution missed an

    // opportunity to fold something. For now, just ignore such cases

    // rather than proceed with zero in a register.

    if (Sum->isZero())

      return;


    F.BaseRegs.push_back(Sum);

    F.canonicalize(*L);

    (void)InsertFormula(LU, LUIdx, F);

  };


  // If we collected at least two registers, generate a formula combining them.

  if (Ops.size() > 1) {

    SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.

    GenerateFormula(SE.getAddExpr(OpsCopy));

  }


  // If we have an unfolded offset, generate a formula combining it with the

  // registers collected.

  if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {

    assert(CombinedIntegerType && "Missing a type for the unfolded offset");

    Ops.push_back(SE.getConstant(CombinedIntegerType,

                                 NewBase.UnfoldedOffset.getFixedValue(), true));

    NewBase.UnfoldedOffset = Immediate::getFixed(0);

    GenerateFormula(SE.getAddExpr(Ops));

  }

}


/// Helper function for LSRInstance::GenerateSymbolicOffsets.

void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,

                                              const Formula &Base, size_t Idx,

                                              bool IsScaledReg) {

  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];

  GlobalValue *GV = ExtractSymbol(G, SE);

  if (G->isZero() || !GV)

    return;

  Formula F = Base;

  F.BaseGV = GV;

  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))

    return;

  if (IsScaledReg)

    F.ScaledReg = G;

  else

    F.BaseRegs[Idx] = G;

  (void)InsertFormula(LU, LUIdx, F);

}


/// Generate reuse formulae using symbolic offsets.

void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,

                                          Formula Base) {

  // We can't add a symbolic offset if the address already contains one.

  if (Base.BaseGV) return;


  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

    GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);

  if (Base.Scale == 1)

    GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,

                                /* IsScaledReg */ true);

}


/// Helper function for LSRInstance::GenerateConstantOffsets.

void LSRInstance::GenerateConstantOffsetsImpl(

    LSRUse &LU, unsigned LUIdx, const Formula &Base,

    const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) {


  auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {

    Formula F = Base;

    if (!Base.BaseOffset.isCompatibleImmediate(Offset))

      return;

    F.BaseOffset = Base.BaseOffset.subUnsigned(Offset);


    if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {

      // Add the offset to the base register.

      const SCEV *NewOffset = Offset.getSCEV(SE, G->getType());

      const SCEV *NewG = SE.getAddExpr(NewOffset, G);

      // If it cancelled out, drop the base register, otherwise update it.

      if (NewG->isZero()) {

        if (IsScaledReg) {

          F.Scale = 0;

          F.ScaledReg = nullptr;

        } else

          F.deleteBaseReg(F.BaseRegs[Idx]);

        F.canonicalize(*L);

      } else if (IsScaledReg)

        F.ScaledReg = NewG;

      else

        F.BaseRegs[Idx] = NewG;


      (void)InsertFormula(LU, LUIdx, F);

    }

  };


  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];


  // With constant offsets and constant steps, we can generate pre-inc

  // accesses by having the offset equal the step. So, for access #0 with a

  // step of 8, we generate a G - 8 base which would require the first access

  // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer

  // for itself and hopefully becomes the base for other accesses. This means

  // means that a single pre-indexed access can be generated to become the new

  // base pointer for each iteration of the loop, resulting in no extra add/sub

  // instructions for pointer updating.

  if ((AMK & TTI::AMK_PreIndexed) && LU.Kind == LSRUse::Address) {

    const APInt *StepInt;

    if (match(G, m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(StepInt)))) {

      int64_t Step = StepInt->isNegative() ? StepInt->getSExtValue()

                                           : StepInt->getZExtValue();


      for (Immediate Offset : Worklist) {

        if (Offset.isFixed()) {

          Offset = Immediate::getFixed(Offset.getFixedValue() - Step);

          GenerateOffset(G, Offset);

        }

      }

    }

  }

  for (Immediate Offset : Worklist)

    GenerateOffset(G, Offset);


  Immediate Imm = ExtractImmediate(G, SE);

  if (G->isZero() || Imm.isZero() ||

      !Base.BaseOffset.isCompatibleImmediate(Imm))

    return;

  Formula F = Base;

  F.BaseOffset = F.BaseOffset.addUnsigned(Imm);

  if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))

    return;

  if (IsScaledReg) {

    F.ScaledReg = G;

  } else {

    F.BaseRegs[Idx] = G;

    // We may generate non canonical Formula if G is a recurrent expr reg

    // related with current loop while F.ScaledReg is not.

    F.canonicalize(*L);

  }

  (void)InsertFormula(LU, LUIdx, F);

}


/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.

void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,

                                          Formula Base) {

  // TODO: For now, just add the min and max offset, because it usually isn't

  // worthwhile looking at everything inbetween.

  SmallVector<Immediate, 2> Worklist;

  Worklist.push_back(LU.MinOffset);

  if (LU.MaxOffset != LU.MinOffset)

    Worklist.push_back(LU.MaxOffset);


  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

    GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);

  if (Base.Scale == 1)

    GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,

                                /* IsScaledReg */ true);

}


/// For ICmpZero, check to see if we can scale up the comparison. For example, x

/// == y -> x*c == y*c.

void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,

                                         Formula Base) {

  if (LU.Kind != LSRUse::ICmpZero) return;


  // Determine the integer type for the base formula.

  Type *IntTy = Base.getType();

  if (!IntTy) return;

  if (SE.getTypeSizeInBits(IntTy) > 64) return;


  // Don't do this if there is more than one offset.

  if (LU.MinOffset != LU.MaxOffset) return;


  // Check if transformation is valid. It is illegal to multiply pointer.

  if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())

    return;

  for (const SCEV *BaseReg : Base.BaseRegs)

    if (BaseReg->getType()->isPointerTy())

      return;

  assert(!Base.BaseGV && "ICmpZero use is not legal!");


  // Check each interesting stride.

  for (int64_t Factor : Factors) {

    // Check that Factor can be represented by IntTy

    if (!ConstantInt::isValueValidForType(IntTy, Factor))

      continue;

    // Check that the multiplication doesn't overflow.

    if (Base.BaseOffset.isMin() && Factor == -1)

      continue;

    // Not supporting scalable immediates.

    if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())

      continue;

    Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor);

    assert(Factor != 0 && "Zero factor not expected!");

    if (NewBaseOffset.getFixedValue() / Factor !=

        Base.BaseOffset.getFixedValue())

      continue;

    // If the offset will be truncated at this use, check that it is in bounds.

    if (!IntTy->isPointerTy() &&

        !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue()))

      continue;


    // Check that multiplying with the use offset doesn't overflow.

    Immediate Offset = LU.MinOffset;

    if (Offset.isMin() && Factor == -1)

      continue;

    Offset = Offset.mulUnsigned(Factor);

    if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())

      continue;

    // If the offset will be truncated at this use, check that it is in bounds.

    if (!IntTy->isPointerTy() &&

        !ConstantInt::isValueValidForType(IntTy, Offset.getFixedValue()))

      continue;


    Formula F = Base;

    F.BaseOffset = NewBaseOffset;


    // Check that this scale is legal.

    if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))

      continue;


    // Compensate for the use having MinOffset built into it.

    F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);


    const SCEV *FactorS = SE.getConstant(IntTy, Factor);


    // Check that multiplying with each base register doesn't overflow.

    for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {

      F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);

      if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])

        goto next;

    }


    // Check that multiplying with the scaled register doesn't overflow.

    if (F.ScaledReg) {

      F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);

      if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)

        continue;

    }


    // Check that multiplying with the unfolded offset doesn't overflow.

    if (F.UnfoldedOffset.isNonZero()) {

      if (F.UnfoldedOffset.isMin() && Factor == -1)

        continue;

      F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor);

      if (F.UnfoldedOffset.getFixedValue() / Factor !=

          Base.UnfoldedOffset.getFixedValue())

        continue;

      // If the offset will be truncated, check that it is in bounds.

      if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(

                                       IntTy, F.UnfoldedOffset.getFixedValue()))

        continue;

    }


    // If we make it here and it's legal, add it.

    (void)InsertFormula(LU, LUIdx, F);

  next:;

  }

}


/// Generate stride factor reuse formulae by making use of scaled-offset address

/// modes, for example.

void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {

  // Determine the integer type for the base formula.

  Type *IntTy = Base.getType();

  if (!IntTy) return;


  // If this Formula already has a scaled register, we can't add another one.

  // Try to unscale the formula to generate a better scale.

  if (Base.Scale != 0 && !Base.unscale())

    return;


  assert(Base.Scale == 0 && "unscale did not did its job!");


  // Check each interesting stride.

  for (int64_t Factor : Factors) {

    Base.Scale = Factor;

    Base.HasBaseReg = Base.BaseRegs.size() > 1;

    // Check whether this scale is going to be legal.

    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

                    Base)) {

      // As a special-case, handle special out-of-loop Basic users specially.

      // TODO: Reconsider this special case.

      if (LU.Kind == LSRUse::Basic &&

          isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,

                     LU.AccessTy, Base) &&

          LU.AllFixupsOutsideLoop)

        LU.Kind = LSRUse::Special;

      else

        continue;

    }

    // For an ICmpZero, negating a solitary base register won't lead to

    // new solutions.

    if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&

        Base.BaseOffset.isZero() && !Base.BaseGV)

      continue;

    // For each addrec base reg, if its loop is current loop, apply the scale.

    for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {

      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);

      if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {

        const SCEV *FactorS = SE.getConstant(IntTy, Factor);

        if (FactorS->isZero())

          continue;

        // Divide out the factor, ignoring high bits, since we'll be

        // scaling the value back up in the end.

        if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))

          if (!Quotient->isZero()) {

            // TODO: This could be optimized to avoid all the copying.

            Formula F = Base;

            F.ScaledReg = Quotient;

            F.deleteBaseReg(F.BaseRegs[i]);

            // The canonical representation of 1*reg is reg, which is already in

            // Base. In that case, do not try to insert the formula, it will be

            // rejected anyway.

            if (F.Scale == 1 && (F.BaseRegs.empty() ||

                                 (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))

              continue;

            // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate

            // non canonical Formula with ScaledReg's loop not being L.

            if (F.Scale == 1 && LU.AllFixupsOutsideLoop)

              F.canonicalize(*L);

            (void)InsertFormula(LU, LUIdx, F);

          }

      }

    }

  }

}


/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.

/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then

/// perform the extension/truncate and normalize again, as the normalized form

/// can result in folds that are not valid in the post-inc use contexts. The

/// expressions for all PostIncLoopSets must match, otherwise return nullptr.

static const SCEV *


getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,

                                   const SCEV *Expr, Type *ToTy,

                                   ScalarEvolution &SE) {

  const SCEV *Result = nullptr;

  for (auto &L : Loops) {

    auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE);

    const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);

    const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE);

    if (!New || (Result && New != Result))

      return nullptr;

    Result = New;

  }


  assert(Result && "failed to create expression");

  return Result;

}


/// Generate reuse formulae from different IV types.

void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {

  // Don't bother truncating symbolic values.

  if (Base.BaseGV) return;


  // Determine the integer type for the base formula.

  Type *DstTy = Base.getType();

  if (!DstTy) return;

  if (DstTy->isPointerTy())

    return;


  // It is invalid to extend a pointer type so exit early if ScaledReg or

  // any of the BaseRegs are pointers.

  if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())

    return;

  if (any_of(Base.BaseRegs,

             [](const SCEV *S) { return S->getType()->isPointerTy(); }))

    return;


  SmallVector<PostIncLoopSet> Loops;

  for (auto &LF : LU.Fixups)

    Loops.push_back(LF.PostIncLoops);


  for (Type *SrcTy : Types) {

    if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {

      Formula F = Base;


      // Sometimes SCEV is able to prove zero during ext transform. It may

      // happen if SCEV did not do all possible transforms while creating the

      // initial node (maybe due to depth limitations), but it can do them while

      // taking ext.

      if (F.ScaledReg) {

        const SCEV *NewScaledReg =

            getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE);

        if (!NewScaledReg || NewScaledReg->isZero())

          continue;

        F.ScaledReg = NewScaledReg;

      }

      bool HasZeroBaseReg = false;

      for (const SCEV *&BaseReg : F.BaseRegs) {

        const SCEV *NewBaseReg =

            getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE);

        if (!NewBaseReg || NewBaseReg->isZero()) {

          HasZeroBaseReg = true;

          break;

        }

        BaseReg = NewBaseReg;

      }

      if (HasZeroBaseReg)

        continue;


      // TODO: This assumes we've done basic processing on all uses and

      // have an idea what the register usage is.

      if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))

        continue;


      F.canonicalize(*L);

      (void)InsertFormula(LU, LUIdx, F);

    }

  }

}


namespace {


/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer

/// modifications so that the search phase doesn't have to worry about the data

/// structures moving underneath it.

struct WorkItem {

  size_t LUIdx;

  Immediate Imm;

  const SCEV *OrigReg;


  WorkItem(size_t LI, Immediate I, const SCEV *R)

      : LUIdx(LI), Imm(I), OrigReg(R) {}


  void print(raw_ostream &OS) const;

  void dump() const;

};


} // end anonymous namespace


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void WorkItem::print(raw_ostream &OS) const {

  OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx

     << " , add offset " << Imm;

}


LLVM_DUMP_METHOD void WorkItem::dump() const {

  print(errs()); errs() << '\n';

}

#endif


/// Look for registers which are a constant distance apart and try to form reuse

/// opportunities between them.

void LSRInstance::GenerateCrossUseConstantOffsets() {

  // Group the registers by their value without any added constant offset.

  using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;


  DenseMap<const SCEV *, ImmMapTy> Map;

  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;

  SmallVector<const SCEV *, 8> Sequence;

  for (const SCEV *Use : RegUses) {

    const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.

    Immediate Imm = ExtractImmediate(Reg, SE);

    auto Pair = Map.try_emplace(Reg);

    if (Pair.second)

      Sequence.push_back(Reg);

    Pair.first->second.insert(std::make_pair(Imm, Use));

    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);

  }


  // Now examine each set of registers with the same base value. Build up

  // a list of work to do and do the work in a separate step so that we're

  // not adding formulae and register counts while we're searching.

  SmallVector<WorkItem, 32> WorkItems;

  SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>

      UniqueItems;

  for (const SCEV *Reg : Sequence) {

    const ImmMapTy &Imms = Map.find(Reg)->second;


    // It's not worthwhile looking for reuse if there's only one offset.

    if (Imms.size() == 1)

      continue;


    LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';

               for (const auto &Entry

                    : Imms) dbgs()

               << ' ' << Entry.first;

               dbgs() << '\n');


    // Examine each offset.

    for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();

         J != JE; ++J) {

      const SCEV *OrigReg = J->second;


      Immediate JImm = J->first;

      const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);


      if (!isa<SCEVConstant>(OrigReg) &&

          UsedByIndicesMap[Reg].count() == 1) {

        LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg

                          << '\n');

        continue;

      }


      // Conservatively examine offsets between this orig reg a few selected

      // other orig regs.

      Immediate First = Imms.begin()->first;

      Immediate Last = std::prev(Imms.end())->first;

      if (!First.isCompatibleImmediate(Last)) {

        LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg

                          << "\n");

        continue;

      }

      // Only scalable if both terms are scalable, or if one is scalable and

      // the other is 0.

      bool Scalable = First.isScalable() || Last.isScalable();

      int64_t FI = First.getKnownMinValue();

      int64_t LI = Last.getKnownMinValue();

      // Compute (First + Last)  / 2 without overflow using the fact that

      // First + Last = 2 * (First + Last) + (First ^ Last).

      int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);

      // If the result is negative and FI is odd and LI even (or vice versa),

      // we rounded towards -inf. Add 1 in that case, to round towards 0.

      Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));

      ImmMapTy::const_iterator OtherImms[] = {

          Imms.begin(), std::prev(Imms.end()),

          Imms.lower_bound(Immediate::get(Avg, Scalable))};

      for (const auto &M : OtherImms) {

        if (M == J || M == JE) continue;

        if (!JImm.isCompatibleImmediate(M->first))

          continue;


        // Compute the difference between the two.

        Immediate Imm = JImm.subUnsigned(M->first);

        for (unsigned LUIdx : UsedByIndices.set_bits())

          // Make a memo of this use, offset, and register tuple.

          if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)

            WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));

      }

    }

  }


  Map.clear();

  Sequence.clear();

  UsedByIndicesMap.clear();

  UniqueItems.clear();


  // Now iterate through the worklist and add new formulae.

  for (const WorkItem &WI : WorkItems) {

    size_t LUIdx = WI.LUIdx;

    LSRUse &LU = Uses[LUIdx];

    Immediate Imm = WI.Imm;

    const SCEV *OrigReg = WI.OrigReg;


    Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());

    const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy);

    unsigned BitWidth = SE.getTypeSizeInBits(IntTy);


    // TODO: Use a more targeted data structure.

    for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {

      Formula F = LU.Formulae[L];

      // FIXME: The code for the scaled and unscaled registers looks

      // very similar but slightly different. Investigate if they

      // could be merged. That way, we would not have to unscale the

      // Formula.

      F.unscale();

      // Use the immediate in the scaled register.

      if (F.ScaledReg == OrigReg) {

        if (!F.BaseOffset.isCompatibleImmediate(Imm))

          continue;

        Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));

        // Don't create 50 + reg(-50).

        const SCEV *S = Offset.getNegativeSCEV(SE, IntTy);

        if (F.referencesReg(S))

          continue;

        Formula NewF = F;

        NewF.BaseOffset = Offset;

        if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

                        NewF))

          continue;

        NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);


        // If the new scale is a constant in a register, and adding the constant

        // value to the immediate would produce a value closer to zero than the

        // immediate itself, then the formula isn't worthwhile.

        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {

          // FIXME: Do we need to do something for scalable immediates here?

          //        A scalable SCEV won't be constant, but we might still have

          //        something in the offset? Bail out for now to be safe.

          if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

            continue;

          if (C->getValue()->isNegative() !=

                  (NewF.BaseOffset.isLessThanZero()) &&

              (C->getAPInt().abs() * APInt(BitWidth, F.Scale))

                  .ule(std::abs(NewF.BaseOffset.getFixedValue())))

            continue;

        }


        // OK, looks good.

        NewF.canonicalize(*this->L);

        (void)InsertFormula(LU, LUIdx, NewF);

      } else {

        // Use the immediate in a base register.

        for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {

          const SCEV *BaseReg = F.BaseRegs[N];

          if (BaseReg != OrigReg)

            continue;

          Formula NewF = F;

          if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||

              !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||

              !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))

            continue;

          NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);

          if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,

                          LU.Kind, LU.AccessTy, NewF)) {

            if (AMK == TTI::AMK_PostIndexed &&

                mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))

              continue;

            Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);

            if (!isLegalAddImmediate(TTI, NewUnfoldedOffset))

              continue;

            NewF = F;

            NewF.UnfoldedOffset = NewUnfoldedOffset;

          }

          NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);


          // If the new formula has a constant in a register, and adding the

          // constant value to the immediate would produce a value closer to

          // zero than the immediate itself, then the formula isn't worthwhile.

          for (const SCEV *NewReg : NewF.BaseRegs)

            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) {

              if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

                goto skip_formula;

              if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())

                      .abs()

                      .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&

                  (C->getAPInt() + NewF.BaseOffset.getFixedValue())

                          .countr_zero() >=

                      (unsigned)llvm::countr_zero<uint64_t>(

                          NewF.BaseOffset.getFixedValue()))

                goto skip_formula;

            }


          // Ok, looks good.

          NewF.canonicalize(*this->L);

          (void)InsertFormula(LU, LUIdx, NewF);

          break;

        skip_formula:;

        }

      }

    }

  }

}


/// Generate formulae for each use.

void

LSRInstance::GenerateAllReuseFormulae() {

  // This is split into multiple loops so that hasRegsUsedByUsesOtherThan

  // queries are more precise.

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateReassociations(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateCombinations(LU, LUIdx, LU.Formulae[i]);

  }

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateScales(LU, LUIdx, LU.Formulae[i]);

  }

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

      GenerateTruncates(LU, LUIdx, LU.Formulae[i]);

  }


  GenerateCrossUseConstantOffsets();


  LLVM_DEBUG(dbgs() << "\n"

                       "After generating reuse formulae:\n";

             print_uses(dbgs()));

}


/// If there are multiple formulae with the same set of registers used

/// by other uses, pick the best one and delete the others.

void LSRInstance::FilterOutUndesirableDedicatedRegisters() {

  DenseSet<const SCEV *> VisitedRegs;

  SmallPtrSet<const SCEV *, 16> Regs;

  SmallPtrSet<const SCEV *, 16> LoserRegs;

#ifndef NDEBUG

  bool ChangedFormulae = false;

#endif


  // Collect the best formula for each unique set of shared registers. This

  // is reset for each use.

  using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>, size_t>;


  BestFormulaeTy BestFormulae;


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());

               dbgs() << '\n');


    bool Any = false;

    for (size_t FIdx = 0, NumForms = LU.Formulae.size();

         FIdx != NumForms; ++FIdx) {

      Formula &F = LU.Formulae[FIdx];


      // Some formulas are instant losers. For example, they may depend on

      // nonexistent AddRecs from other loops. These need to be filtered

      // immediately, otherwise heuristics could choose them over others leading

      // to an unsatisfactory solution. Passing LoserRegs into RateFormula here

      // avoids the need to recompute this information across formulae using the

      // same bad AddRec. Passing LoserRegs is also essential unless we remove

      // the corresponding bad register from the Regs set.

      Cost CostF(L, SE, TTI, AMK);

      Regs.clear();

      CostF.RateFormula(F, Regs, VisitedRegs, LU, HardwareLoopProfitable,

                        &LoserRegs);

      if (CostF.isLoser()) {

        // During initial formula generation, undesirable formulae are generated

        // by uses within other loops that have some non-trivial address mode or

        // use the postinc form of the IV. LSR needs to provide these formulae

        // as the basis of rediscovering the desired formula that uses an AddRec

        // corresponding to the existing phi. Once all formulae have been

        // generated, these initial losers may be pruned.

        LLVM_DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());

                   dbgs() << "\n");

      }

      else {

        SmallVector<const SCEV *, 4> Key;

        for (const SCEV *Reg : F.BaseRegs) {

          if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))

            Key.push_back(Reg);

        }

        if (F.ScaledReg &&

            RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))

          Key.push_back(F.ScaledReg);

        // Unstable sort by host order ok, because this is only used for

        // uniquifying.

        llvm::sort(Key);


        std::pair<BestFormulaeTy::const_iterator, bool> P =

          BestFormulae.insert(std::make_pair(Key, FIdx));

        if (P.second)

          continue;


        Formula &Best = LU.Formulae[P.first->second];


        Cost CostBest(L, SE, TTI, AMK);

        Regs.clear();

        CostBest.RateFormula(Best, Regs, VisitedRegs, LU,

                             HardwareLoopProfitable);

        if (CostF.isLess(CostBest))

          std::swap(F, Best);

        LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());

                   dbgs() << "\n"

                             "    in favor of formula ";

                   Best.print(dbgs()); dbgs() << '\n');

      }

#ifndef NDEBUG

      ChangedFormulae = true;

#endif

      LU.DeleteFormula(F);

      --FIdx;

      --NumForms;

      Any = true;

    }


    // Now that we've filtered out some formulae, recompute the Regs set.

    if (Any)

      LU.RecomputeRegs(LUIdx, RegUses);


    // Reset this to prepare for the next use.

    BestFormulae.clear();

  }


  LLVM_DEBUG(if (ChangedFormulae) {

    dbgs() << "\n"

              "After filtering out undesirable candidates:\n";

    print_uses(dbgs());

  });

}


/// Estimate the worst-case number of solutions the solver might have to

/// consider. It almost never considers this many solutions because it prune the

/// search space, but the pruning isn't always sufficient.

size_t LSRInstance::EstimateSearchSpaceComplexity() const {

  size_t Power = 1;

  for (const LSRUse &LU : Uses) {

    size_t FSize = LU.Formulae.size();

    if (FSize >= ComplexityLimit) {

      Power = ComplexityLimit;

      break;

    }

    Power *= FSize;

    if (Power >= ComplexityLimit)

      break;

  }

  return Power;

}


/// When one formula uses a superset of the registers of another formula, it

/// won't help reduce register pressure (though it may not necessarily hurt

/// register pressure); remove it to simplify the system.

void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {

  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

    LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


    LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "

                         "which use a superset of registers used by other "

                         "formulae.\n");


    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

      LSRUse &LU = Uses[LUIdx];

      bool Any = false;

      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

        Formula &F = LU.Formulae[i];

        if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())

          continue;

        // Look for a formula with a constant or GV in a register. If the use

        // also has a formula with that same value in an immediate field,

        // delete the one that uses a register.

        for (SmallVectorImpl<const SCEV *>::const_iterator

             I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {

          if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {

            Formula NewF = F;

            //FIXME: Formulas should store bitwidth to do wrapping properly.

            //       See PR41034.

            NewF.BaseOffset =

                Immediate::getFixed(NewF.BaseOffset.getFixedValue() +

                                    (uint64_t)C->getValue()->getSExtValue());

            NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

                                (I - F.BaseRegs.begin()));

            if (LU.HasFormulaWithSameRegs(NewF)) {

              LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());

                         dbgs() << '\n');

              LU.DeleteFormula(F);

              --i;

              --e;

              Any = true;

              break;

            }

          } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {

            if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))

              if (!F.BaseGV) {

                Formula NewF = F;

                NewF.BaseGV = GV;

                NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

                                    (I - F.BaseRegs.begin()));

                if (LU.HasFormulaWithSameRegs(NewF)) {

                  LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());

                             dbgs() << '\n');

                  LU.DeleteFormula(F);

                  --i;

                  --e;

                  Any = true;

                  break;

                }

              }

          }

        }

      }

      if (Any)

        LU.RecomputeRegs(LUIdx, RegUses);

    }


    LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

  }

}


/// When there are many registers for expressions like A, A+1, A+2, etc.,

/// allocate a single register for them.

void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;


  LLVM_DEBUG(

      dbgs() << "The search space is too complex.\n"

                "Narrowing the search space by assuming that uses separated "

                "by a constant offset will use the same registers.\n");


  // This is especially useful for unrolled loops.


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    for (const Formula &F : LU.Formulae) {

      if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1))

        continue;


      LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);

      if (!LUThatHas)

        continue;


      if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,

                              LU.Kind, LU.AccessTy))

        continue;


      LLVM_DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');


      LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;


      // Transfer the fixups of LU to LUThatHas.

      for (LSRFixup &Fixup : LU.Fixups) {

        Fixup.Offset += F.BaseOffset;

        LUThatHas->pushFixup(Fixup);

        LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');

      }


      // Delete formulae from the new use which are no longer legal.

      bool Any = false;

      for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {

        Formula &F = LUThatHas->Formulae[i];

        if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,

                        LUThatHas->Kind, LUThatHas->AccessTy, F)) {

          LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');

          LUThatHas->DeleteFormula(F);

          --i;

          --e;

          Any = true;

        }

      }


      if (Any)

        LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);


      // Delete the old use.

      DeleteUse(LU, LUIdx);

      --LUIdx;

      --NumUses;

      break;

    }

  }


  LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

}


/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that

/// we've done more filtering, as it may be able to find more formulae to

/// eliminate.

void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){

  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

    LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


    LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "

                         "undesirable dedicated registers.\n");


    FilterOutUndesirableDedicatedRegisters();


    LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

  }

}


/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.

/// Pick the best one and delete the others.

/// This narrowing heuristic is to keep as many formulae with different

/// Scale and ScaledReg pair as possible while narrowing the search space.

/// The benefit is that it is more likely to find out a better solution

/// from a formulae set with more Scale and ScaledReg variations than

/// a formulae set with the same Scale and ScaledReg. The picking winner

/// reg heuristic will often keep the formulae with the same Scale and

/// ScaledReg and filter others, and we want to avoid that if possible.

void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;


  LLVM_DEBUG(

      dbgs() << "The search space is too complex.\n"

                "Narrowing the search space by choosing the best Formula "

                "from the Formulae with the same Scale and ScaledReg.\n");


  // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.

  using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;


  BestFormulaeTy BestFormulae;

#ifndef NDEBUG

  bool ChangedFormulae = false;

#endif

  DenseSet<const SCEV *> VisitedRegs;

  SmallPtrSet<const SCEV *, 16> Regs;


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());

               dbgs() << '\n');


    // Return true if Formula FA is better than Formula FB.

    auto IsBetterThan = [&](Formula &FA, Formula &FB) {

      // First we will try to choose the Formula with fewer new registers.

      // For a register used by current Formula, the more the register is

      // shared among LSRUses, the less we increase the register number

      // counter of the formula.

      size_t FARegNum = 0;

      for (const SCEV *Reg : FA.BaseRegs) {

        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

        FARegNum += (NumUses - UsedByIndices.count() + 1);

      }

      size_t FBRegNum = 0;

      for (const SCEV *Reg : FB.BaseRegs) {

        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

        FBRegNum += (NumUses - UsedByIndices.count() + 1);

      }

      if (FARegNum != FBRegNum)

        return FARegNum < FBRegNum;


      // If the new register numbers are the same, choose the Formula with

      // less Cost.

      Cost CostFA(L, SE, TTI, AMK);

      Cost CostFB(L, SE, TTI, AMK);

      Regs.clear();

      CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);

      Regs.clear();

      CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);

      return CostFA.isLess(CostFB);

    };


    bool Any = false;

    for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

         ++FIdx) {

      Formula &F = LU.Formulae[FIdx];

      if (!F.ScaledReg)

        continue;

      auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});

      if (P.second)

        continue;


      Formula &Best = LU.Formulae[P.first->second];

      if (IsBetterThan(F, Best))

        std::swap(F, Best);

      LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());

                 dbgs() << "\n"

                           "    in favor of formula ";

                 Best.print(dbgs()); dbgs() << '\n');

#ifndef NDEBUG

      ChangedFormulae = true;

#endif

      LU.DeleteFormula(F);

      --FIdx;

      --NumForms;

      Any = true;

    }

    if (Any)

      LU.RecomputeRegs(LUIdx, RegUses);


    // Reset this to prepare for the next use.

    BestFormulae.clear();

  }


  LLVM_DEBUG(if (ChangedFormulae) {

    dbgs() << "\n"

              "After filtering out undesirable candidates:\n";

    print_uses(dbgs());

  });

}


/// If we are over the complexity limit, filter out any post-inc prefering

/// variables to only post-inc values.

void LSRInstance::NarrowSearchSpaceByFilterPostInc() {

  if (AMK != TTI::AMK_PostIndexed)

    return;

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;


  LLVM_DEBUG(dbgs() << "The search space is too complex.\n"

                       "Narrowing the search space by choosing the lowest "

                       "register Formula for PostInc Uses.\n");


  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];


    if (LU.Kind != LSRUse::Address)

      continue;

    if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&

        !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))

      continue;


    size_t MinRegs = std::numeric_limits<size_t>::max();

    for (const Formula &F : LU.Formulae)

      MinRegs = std::min(F.getNumRegs(), MinRegs);


    bool Any = false;

    for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

         ++FIdx) {

      Formula &F = LU.Formulae[FIdx];

      if (F.getNumRegs() > MinRegs) {

        LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());

                   dbgs() << "\n");

        LU.DeleteFormula(F);

        --FIdx;

        --NumForms;

        Any = true;

      }

    }

    if (Any)

      LU.RecomputeRegs(LUIdx, RegUses);


    if (EstimateSearchSpaceComplexity() < ComplexityLimit)

      break;

  }


  LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

}


/// The function delete formulas with high registers number expectation.

/// Assuming we don't know the value of each formula (already delete

/// all inefficient), generate probability of not selecting for each

/// register.

/// For example,

/// Use1:

///  reg(a) + reg({0,+,1})

///  reg(a) + reg({-1,+,1}) + 1

///  reg({a,+,1})

/// Use2:

///  reg(b) + reg({0,+,1})

///  reg(b) + reg({-1,+,1}) + 1

///  reg({b,+,1})

/// Use3:

///  reg(c) + reg(b) + reg({0,+,1})

///  reg(c) + reg({b,+,1})

///

/// Probability of not selecting

///                 Use1   Use2    Use3

/// reg(a)         (1/3) *   1   *   1

/// reg(b)           1   * (1/3) * (1/2)

/// reg({0,+,1})   (2/3) * (2/3) * (1/2)

/// reg({-1,+,1})  (2/3) * (2/3) *   1

/// reg({a,+,1})   (2/3) *   1   *   1

/// reg({b,+,1})     1   * (2/3) * (2/3)

/// reg(c)           1   *   1   *   0

///

/// Now count registers number mathematical expectation for each formula:

/// Note that for each use we exclude probability if not selecting for the use.

/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding

/// probabilty 1/3 of not selecting for Use1).

/// Use1:

///  reg(a) + reg({0,+,1})          1 + 1/3       -- to be deleted

///  reg(a) + reg({-1,+,1}) + 1     1 + 4/9       -- to be deleted

///  reg({a,+,1})                   1

/// Use2:

///  reg(b) + reg({0,+,1})          1/2 + 1/3     -- to be deleted

///  reg(b) + reg({-1,+,1}) + 1     1/2 + 2/3     -- to be deleted

///  reg({b,+,1})                   2/3

/// Use3:

///  reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted

///  reg(c) + reg({b,+,1})          1 + 2/3

void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {

  if (EstimateSearchSpaceComplexity() < ComplexityLimit)

    return;

  // Ok, we have too many of formulae on our hands to conveniently handle.

  // Use a rough heuristic to thin out the list.


  // Set of Regs wich will be 100% used in final solution.

  // Used in each formula of a solution (in example above this is reg(c)).

  // We can skip them in calculations.

  SmallPtrSet<const SCEV *, 4> UniqRegs;

  LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


  // Map each register to probability of not selecting

  DenseMap <const SCEV *, float> RegNumMap;

  for (const SCEV *Reg : RegUses) {

    if (UniqRegs.count(Reg))

      continue;

    float PNotSel = 1;

    for (const LSRUse &LU : Uses) {

      if (!LU.Regs.count(Reg))

        continue;

      float P = LU.getNotSelectedProbability(Reg);

      if (P != 0.0)

        PNotSel *= P;

      else

        UniqRegs.insert(Reg);

    }

    RegNumMap.insert(std::make_pair(Reg, PNotSel));

  }


  LLVM_DEBUG(

      dbgs() << "Narrowing the search space by deleting costly formulas\n");


  // Delete formulas where registers number expectation is high.

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

    LSRUse &LU = Uses[LUIdx];

    // If nothing to delete - continue.

    if (LU.Formulae.size() < 2)

      continue;

    // This is temporary solution to test performance. Float should be

    // replaced with round independent type (based on integers) to avoid

    // different results for different target builds.

    float FMinRegNum = LU.Formulae[0].getNumRegs();

    float FMinARegNum = LU.Formulae[0].getNumRegs();

    size_t MinIdx = 0;

    for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

      Formula &F = LU.Formulae[i];

      float FRegNum = 0;

      float FARegNum = 0;

      for (const SCEV *BaseReg : F.BaseRegs) {

        if (UniqRegs.count(BaseReg))

          continue;

        FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

        if (isa<SCEVAddRecExpr>(BaseReg))

          FARegNum +=

              RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

      }

      if (const SCEV *ScaledReg = F.ScaledReg) {

        if (!UniqRegs.count(ScaledReg)) {

          FRegNum +=

              RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

          if (isa<SCEVAddRecExpr>(ScaledReg))

            FARegNum +=

                RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

        }

      }

      if (FMinRegNum > FRegNum ||

          (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {

        FMinRegNum = FRegNum;

        FMinARegNum = FARegNum;

        MinIdx = i;

      }

    }

    LLVM_DEBUG(dbgs() << "  The formula "; LU.Formulae[MinIdx].print(dbgs());

               dbgs() << " with min reg num " << FMinRegNum << '\n');

    if (MinIdx != 0)

      std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);

    while (LU.Formulae.size() != 1) {

      LLVM_DEBUG(dbgs() << "  Deleting "; LU.Formulae.back().print(dbgs());

                 dbgs() << '\n');

      LU.Formulae.pop_back();

    }

    LU.RecomputeRegs(LUIdx, RegUses);

    assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");

    Formula &F = LU.Formulae[0];

    LLVM_DEBUG(dbgs() << "  Leaving only "; F.print(dbgs()); dbgs() << '\n');

    // When we choose the formula, the regs become unique.

    UniqRegs.insert_range(F.BaseRegs);

    if (F.ScaledReg)

      UniqRegs.insert(F.ScaledReg);

  }

  LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

}


// Check if Best and Reg are SCEVs separated by a constant amount C, and if so

// would the addressing offset +C would be legal where the negative offset -C is

// not.


static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,

                                       ScalarEvolution &SE, const SCEV *Best,

                                       const SCEV *Reg,

                                       MemAccessTy AccessType) {

  if (Best->getType() != Reg->getType() ||

      (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&

       cast<SCEVAddRecExpr>(Best)->getLoop() !=

           cast<SCEVAddRecExpr>(Reg)->getLoop()))

    return false;

  std::optional<APInt> Diff = SE.computeConstantDifference(Best, Reg);

  if (!Diff)

    return false;


  return TTI.isLegalAddressingMode(

             AccessType.MemTy, /*BaseGV=*/nullptr,

             /*BaseOffset=*/Diff->getSExtValue(),

             /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) &&

         !TTI.isLegalAddressingMode(

             AccessType.MemTy, /*BaseGV=*/nullptr,

             /*BaseOffset=*/-Diff->getSExtValue(),

             /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace);

}


/// Pick a register which seems likely to be profitable, and then in any use

/// which has any reference to that register, delete all formulae which do not

/// reference that register.

void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {

  // With all other options exhausted, loop until the system is simple

  // enough to handle.

  SmallPtrSet<const SCEV *, 4> Taken;

  while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

    // Ok, we have too many of formulae on our hands to conveniently handle.

    // Use a rough heuristic to thin out the list.

    LLVM_DEBUG(dbgs() << "The search space is too complex.\n");


    // Pick the register which is used by the most LSRUses, which is likely

    // to be a good reuse register candidate.

    const SCEV *Best = nullptr;

    unsigned BestNum = 0;

    for (const SCEV *Reg : RegUses) {

      if (Taken.count(Reg))

        continue;

      if (!Best) {

        Best = Reg;

        BestNum = RegUses.getUsedByIndices(Reg).count();

      } else {

        unsigned Count = RegUses.getUsedByIndices(Reg).count();

        if (Count > BestNum) {

          Best = Reg;

          BestNum = Count;

        }


        // If the scores are the same, but the Reg is simpler for the target

        // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can

        // handle +C but not -C), opt for the simpler formula.

        if (Count == BestNum) {

          int LUIdx = RegUses.getUsedByIndices(Reg).find_first();

          if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&

              IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,

                                         Uses[LUIdx].AccessTy)) {

            Best = Reg;

            BestNum = Count;

          }

        }

      }

    }

    assert(Best && "Failed to find best LSRUse candidate");


    LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best

                      << " will yield profitable reuse.\n");

    Taken.insert(Best);


    // In any use with formulae which references this register, delete formulae

    // which don't reference it.

    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

      LSRUse &LU = Uses[LUIdx];

      if (!LU.Regs.count(Best)) continue;


      bool Any = false;

      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

        Formula &F = LU.Formulae[i];

        if (!F.referencesReg(Best)) {

          LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');

          LU.DeleteFormula(F);

          --e;

          --i;

          Any = true;

          assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");

          continue;

        }

      }


      if (Any)

        LU.RecomputeRegs(LUIdx, RegUses);

    }


    LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));

  }

}


/// If there are an extraordinary number of formulae to choose from, use some

/// rough heuristics to prune down the number of formulae. This keeps the main

/// solver from taking an extraordinary amount of time in some worst-case

/// scenarios.

void LSRInstance::NarrowSearchSpaceUsingHeuristics() {

  NarrowSearchSpaceByDetectingSupersets();

  NarrowSearchSpaceByCollapsingUnrolledCode();

  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

  if (FilterSameScaledReg)

    NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

  NarrowSearchSpaceByFilterPostInc();

  if (LSRExpNarrow)

    NarrowSearchSpaceByDeletingCostlyFormulas();

  else

    NarrowSearchSpaceByPickingWinnerRegs();

}


/// This is the recursive solver.

void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

                               Cost &SolutionCost,

                               SmallVectorImpl<const Formula *> &Workspace,

                               const Cost &CurCost,

                               const SmallPtrSet<const SCEV *, 16> &CurRegs,

                               DenseSet<const SCEV *> &VisitedRegs) const {

  // Some ideas:

  //  - prune more:

  //    - use more aggressive filtering

  //    - sort the formula so that the most profitable solutions are found first

  //    - sort the uses too

  //  - search faster:

  //    - don't compute a cost, and then compare. compare while computing a cost

  //      and bail early.

  //    - track register sets with SmallBitVector


  const LSRUse &LU = Uses[Workspace.size()];


  // If this use references any register that's already a part of the

  // in-progress solution, consider it a requirement that a formula must

  // reference that register in order to be considered. This prunes out

  // unprofitable searching.

  SmallSetVector<const SCEV *, 4> ReqRegs;

  for (const SCEV *S : CurRegs)

    if (LU.Regs.count(S))

      ReqRegs.insert(S);


  SmallPtrSet<const SCEV *, 16> NewRegs;

  Cost NewCost(L, SE, TTI, AMK);

  for (const Formula &F : LU.Formulae) {

    // Ignore formulae which may not be ideal in terms of register reuse of

    // ReqRegs.  The formula should use all required registers before

    // introducing new ones.

    // This can sometimes (notably when trying to favour postinc) lead to

    // sub-optimial decisions. There it is best left to the cost modelling to

    // get correct.

    if (!(AMK & TTI::AMK_PostIndexed) || LU.Kind != LSRUse::Address) {

      int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());

      for (const SCEV *Reg : ReqRegs) {

        if ((F.ScaledReg && F.ScaledReg == Reg) ||

            is_contained(F.BaseRegs, Reg)) {

          --NumReqRegsToFind;

          if (NumReqRegsToFind == 0)

            break;

        }

      }

      if (NumReqRegsToFind != 0) {

        // If none of the formulae satisfied the required registers, then we could

        // clear ReqRegs and try again. Currently, we simply give up in this case.

        continue;

      }

    }


    // Evaluate the cost of the current formula. If it's already worse than

    // the current best, prune the search at that point.

    NewCost = CurCost;

    NewRegs = CurRegs;

    NewCost.RateFormula(F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);

    if (NewCost.isLess(SolutionCost)) {

      Workspace.push_back(&F);

      if (Workspace.size() != Uses.size()) {

        SolveRecurse(Solution, SolutionCost, Workspace, NewCost,

                     NewRegs, VisitedRegs);

        if (F.getNumRegs() == 1 && Workspace.size() == 1)

          VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);

      } else {

        LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());

                   dbgs() << ".\nRegs:\n";

                   for (const SCEV *S : NewRegs) dbgs()

                      << "- " << *S << "\n";

                   dbgs() << '\n');


        SolutionCost = NewCost;

        Solution = Workspace;

      }

      Workspace.pop_back();

    }

  }

}


/// Choose one formula from each use. Return the results in the given Solution

/// vector.

void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {

  SmallVector<const Formula *, 8> Workspace;

  Cost SolutionCost(L, SE, TTI, AMK);

  SolutionCost.Lose();

  Cost CurCost(L, SE, TTI, AMK);

  SmallPtrSet<const SCEV *, 16> CurRegs;

  DenseSet<const SCEV *> VisitedRegs;

  Workspace.reserve(Uses.size());


  // SolveRecurse does all the work.

  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,

               CurRegs, VisitedRegs);

  if (Solution.empty()) {

    LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");

    return;

  }


  // Ok, we've now made all our decisions.

  LLVM_DEBUG(dbgs() << "\n"

                       "The chosen solution requires ";

             SolutionCost.print(dbgs()); dbgs() << ":\n";

             for (size_t i = 0, e = Uses.size(); i != e; ++i) {

               dbgs() << "  ";

               Uses[i].print(dbgs());

               dbgs() << "\n"

                         "    ";

               Solution[i]->print(dbgs());

               dbgs() << '\n';

             });


  assert(Solution.size() == Uses.size() && "Malformed solution!");


  const bool EnableDropUnprofitableSolution = [&] {

    switch (AllowDropSolutionIfLessProfitable) {

    case cl::BOU_TRUE:

      return true;

    case cl::BOU_FALSE:

      return false;

    case cl::BOU_UNSET:

      return TTI.shouldDropLSRSolutionIfLessProfitable();

    }

    llvm_unreachable("Unhandled cl::boolOrDefault enum");

  }();


  if (BaselineCost.isLess(SolutionCost)) {

    if (!EnableDropUnprofitableSolution)

      LLVM_DEBUG(

          dbgs() << "Baseline is more profitable than chosen solution, "

                    "add option 'lsr-drop-solution' to drop LSR solution.\n");

    else {

      LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "

                           "solution, dropping LSR solution.\n";);

      Solution.clear();

    }

  }

}


/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as

/// we can go while still being dominated by the input positions. This helps

/// canonicalize the insert position, which encourages sharing.

BasicBlock::iterator

LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,

                                 const SmallVectorImpl<Instruction *> &Inputs)

                                                                         const {

  Instruction *Tentative = &*IP;

  while (true) {

    bool AllDominate = true;

    Instruction *BetterPos = nullptr;

    // Don't bother attempting to insert before a catchswitch, their basic block

    // cannot have other non-PHI instructions.

    if (isa<CatchSwitchInst>(Tentative))

      return IP;


    for (Instruction *Inst : Inputs) {

      if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {

        AllDominate = false;

        break;

      }

      // Attempt to find an insert position in the middle of the block,

      // instead of at the end, so that it can be used for other expansions.

      if (Tentative->getParent() == Inst->getParent() &&

          (!BetterPos || !DT.dominates(Inst, BetterPos)))

        BetterPos = &*std::next(BasicBlock::iterator(Inst));

    }

    if (!AllDominate)

      break;

    if (BetterPos)

      IP = BetterPos->getIterator();

    else

      IP = Tentative->getIterator();


    const Loop *IPLoop = LI.getLoopFor(IP->getParent());

    unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;


    BasicBlock *IDom;

    for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {

      if (!Rung) return IP;

      Rung = Rung->getIDom();

      if (!Rung) return IP;

      IDom = Rung->getBlock();


      // Don't climb into a loop though.

      const Loop *IDomLoop = LI.getLoopFor(IDom);

      unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;

      if (IDomDepth <= IPLoopDepth &&

          (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))

        break;

    }


    Tentative = IDom->getTerminator();

  }


  return IP;

}


/// Determine an input position which will be dominated by the operands and

/// which will dominate the result.

BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(

    BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {

  // Collect some instructions which must be dominated by the

  // expanding replacement. These must be dominated by any operands that

  // will be required in the expansion.

  SmallVector<Instruction *, 4> Inputs;

  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))

    Inputs.push_back(I);

  if (LU.Kind == LSRUse::ICmpZero)

    if (Instruction *I =

          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))

      Inputs.push_back(I);

  if (LF.PostIncLoops.count(L)) {

    if (LF.isUseFullyOutsideLoop(L))

      Inputs.push_back(L->getLoopLatch()->getTerminator());

    else

      Inputs.push_back(IVIncInsertPos);

  }

  // The expansion must also be dominated by the increment positions of any

  // loops it for which it is using post-inc mode.

  for (const Loop *PIL : LF.PostIncLoops) {

    if (PIL == L) continue;


    // Be dominated by the loop exit.

    SmallVector<BasicBlock *, 4> ExitingBlocks;

    PIL->getExitingBlocks(ExitingBlocks);

    if (!ExitingBlocks.empty()) {

      BasicBlock *BB = ExitingBlocks[0];

      for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)

        BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);

      Inputs.push_back(BB->getTerminator());

    }

  }


  assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() &&

         "Insertion point must be a normal instruction");


  // Then, climb up the immediate dominator tree as far as we can go while

  // still being dominated by the input positions.

  BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);


  // Don't insert instructions before PHI nodes.

  while (isa<PHINode>(IP)) ++IP;


  // Ignore landingpad instructions.

  while (IP->isEHPad()) ++IP;


  // Set IP below instructions recently inserted by SCEVExpander. This keeps the

  // IP consistent across expansions and allows the previously inserted

  // instructions to be reused by subsequent expansion.

  while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)

    ++IP;


  return IP;

}


/// Emit instructions for the leading candidate expression for this LSRUse (this

/// is called "expanding").

Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,

                           const Formula &F, BasicBlock::iterator IP,

                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {

  if (LU.RigidFormula)

    return LF.OperandValToReplace;


  // Determine an input position which will be dominated by the operands and

  // which will dominate the result.

  IP = AdjustInsertPositionForExpand(IP, LF, LU);

  Rewriter.setInsertPoint(&*IP);


  // Inform the Rewriter if we have a post-increment use, so that it can

  // perform an advantageous expansion.

  Rewriter.setPostInc(LF.PostIncLoops);


  // This is the type that the user actually needs.

  Type *OpTy = LF.OperandValToReplace->getType();

  // This will be the type that we'll initially expand to.

  Type *Ty = F.getType();

  if (!Ty)

    // No type known; just expand directly to the ultimate type.

    Ty = OpTy;

  else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))

    // Expand directly to the ultimate type if it's the right size.

    Ty = OpTy;

  // This is the type to do integer arithmetic in.

  Type *IntTy = SE.getEffectiveSCEVType(Ty);


  // Build up a list of operands to add together to form the full base.

  SmallVector<const SCEV *, 8> Ops;


  // Expand the BaseRegs portion.

  for (const SCEV *Reg : F.BaseRegs) {

    assert(!Reg->isZero() && "Zero allocated in a base register!");


    // If we're expanding for a post-inc user, make the post-inc adjustment.

    Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);

    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));

  }


  // Expand the ScaledReg portion.

  Value *ICmpScaledV = nullptr;

  if (F.Scale != 0) {

    const SCEV *ScaledS = F.ScaledReg;


    // If we're expanding for a post-inc user, make the post-inc adjustment.

    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);

    ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);


    if (LU.Kind == LSRUse::ICmpZero) {

      // Expand ScaleReg as if it was part of the base regs.

      if (F.Scale == 1)

        Ops.push_back(

            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));

      else {

        // An interesting way of "folding" with an icmp is to use a negated

        // scale, which we'll implement by inserting it into the other operand

        // of the icmp.

        assert(F.Scale == -1 &&

               "The only scale supported by ICmpZero uses is -1!");

        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);

      }

    } else {

      // Otherwise just expand the scaled register and an explicit scale,

      // which is expected to be matched as part of the address.


      // Flush the operand list to suppress SCEVExpander hoisting address modes.

      // Unless the addressing mode will not be folded.

      if (!Ops.empty() && LU.Kind == LSRUse::Address &&

          isAMCompletelyFolded(TTI, LU, F)) {

        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);

        Ops.clear();

        Ops.push_back(SE.getUnknown(FullV));

      }

      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));

      if (F.Scale != 1)

        ScaledS =

            SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));

      Ops.push_back(ScaledS);

    }

  }


  // Expand the GV portion.

  if (F.BaseGV) {

    // Flush the operand list to suppress SCEVExpander hoisting.

    if (!Ops.empty()) {

      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);

      Ops.clear();

      Ops.push_back(SE.getUnknown(FullV));

    }

    Ops.push_back(SE.getUnknown(F.BaseGV));

  }


  // Flush the operand list to suppress SCEVExpander hoisting of both folded and

  // unfolded offsets. LSR assumes they both live next to their uses.

  if (!Ops.empty()) {

    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);

    Ops.clear();

    Ops.push_back(SE.getUnknown(FullV));

  }


  // FIXME: Are we sure we won't get a mismatch here? Is there a way to bail

  // out at this point, or should we generate a SCEV adding together mixed

  // offsets?

  assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&

         "Expanding mismatched offsets\n");

  // Expand the immediate portion.

  Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset);

  if (Offset.isNonZero()) {

    if (LU.Kind == LSRUse::ICmpZero) {

      // The other interesting way of "folding" with an ICmpZero is to use a

      // negated immediate.

      if (!ICmpScaledV)

        ICmpScaledV =

            ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue());

      else {

        Ops.push_back(SE.getUnknown(ICmpScaledV));

        ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue());

      }

    } else {

      // Just add the immediate values. These again are expected to be matched

      // as part of the address.

      Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));

    }

  }


  // Expand the unfolded offset portion.

  Immediate UnfoldedOffset = F.UnfoldedOffset;

  if (UnfoldedOffset.isNonZero()) {

    // Just add the immediate values.

    Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));

  }


  // Emit instructions summing all the operands.

  const SCEV *FullS = Ops.empty() ?

                      SE.getConstant(IntTy, 0) :

                      SE.getAddExpr(Ops);

  Value *FullV = Rewriter.expandCodeFor(FullS, Ty);


  // We're done expanding now, so reset the rewriter.

  Rewriter.clearPostInc();


  // An ICmpZero Formula represents an ICmp which we're handling as a

  // comparison against zero. Now that we've expanded an expression for that

  // form, update the ICmp's other operand.

  if (LU.Kind == LSRUse::ICmpZero) {

    ICmpInst *CI = cast<ICmpInst>(LF.UserInst);

    if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))

      DeadInsts.emplace_back(OperandIsInstr);

    assert(!F.BaseGV && "ICmp does not support folding a global value and "

                           "a scale at the same time!");

    if (F.Scale == -1) {

      if (ICmpScaledV->getType() != OpTy) {

        Instruction *Cast = CastInst::Create(

            CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false),

            ICmpScaledV, OpTy, "tmp", CI->getIterator());

        ICmpScaledV = Cast;

      }

      CI->setOperand(1, ICmpScaledV);

    } else {

      // A scale of 1 means that the scale has been expanded as part of the

      // base regs.

      assert((F.Scale == 0 || F.Scale == 1) &&

             "ICmp does not support folding a global value and "

             "a scale at the same time!");

      Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),

                                           -(uint64_t)Offset.getFixedValue());

      if (C->getType() != OpTy) {

        C = ConstantFoldCastOperand(

            CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy,

            CI->getDataLayout());

        assert(C && "Cast of ConstantInt should have folded");

      }


      CI->setOperand(1, C);

    }

  }


  return FullV;

}


/// Helper for Rewrite. PHI nodes are special because the use of their operands

/// effectively happens in their predecessor blocks, so the expression may need

/// to be expanded in multiple places.

void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,

                                const LSRFixup &LF, const Formula &F,

                                SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

  DenseMap<BasicBlock *, Value *> Inserted;


  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

    if (PN->getIncomingValue(i) == LF.OperandValToReplace) {

      bool needUpdateFixups = false;

      BasicBlock *BB = PN->getIncomingBlock(i);


      // If this is a critical edge, split the edge so that we do not insert

      // the code on all predecessor/successor paths.  We do this unless this

      // is the canonical backedge for this loop, which complicates post-inc

      // users.

      if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&

          !isa<IndirectBrInst>(BB->getTerminator()) &&

          !isa<CatchSwitchInst>(BB->getTerminator())) {

        BasicBlock *Parent = PN->getParent();

        Loop *PNLoop = LI.getLoopFor(Parent);

        if (!PNLoop || Parent != PNLoop->getHeader()) {

          // Split the critical edge.

          BasicBlock *NewBB = nullptr;

          if (!Parent->isLandingPad()) {

            NewBB =

                SplitCriticalEdge(BB, Parent,

                                  CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)

                                      .setMergeIdenticalEdges()

                                      .setKeepOneInputPHIs());

          } else {

            SmallVector<BasicBlock*, 2> NewBBs;

            DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

            SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DTU, &LI);

            NewBB = NewBBs[0];

          }

          // If NewBB==NULL, then SplitCriticalEdge refused to split because all

          // phi predecessors are identical. The simple thing to do is skip

          // splitting in this case rather than complicate the API.

          if (NewBB) {

            // If PN is outside of the loop and BB is in the loop, we want to

            // move the block to be immediately before the PHI block, not

            // immediately after BB.

            if (L->contains(BB) && !L->contains(PN))

              NewBB->moveBefore(PN->getParent());


            // Splitting the edge can reduce the number of PHI entries we have.

            e = PN->getNumIncomingValues();

            BB = NewBB;

            i = PN->getBasicBlockIndex(BB);


            needUpdateFixups = true;

          }

        }

      }


      std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =

          Inserted.try_emplace(BB);

      if (!Pair.second)

        PN->setIncomingValue(i, Pair.first->second);

      else {

        Value *FullV =

            Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts);


        // If this is reuse-by-noop-cast, insert the noop cast.

        Type *OpTy = LF.OperandValToReplace->getType();

        if (FullV->getType() != OpTy)

          FullV = CastInst::Create(

              CastInst::getCastOpcode(FullV, false, OpTy, false), FullV,

              LF.OperandValToReplace->getType(), "tmp",

              BB->getTerminator()->getIterator());


        // If the incoming block for this value is not in the loop, it means the

        // current PHI is not in a loop exit, so we must create a LCSSA PHI for

        // the inserted value.

        if (auto *I = dyn_cast<Instruction>(FullV))

          if (L->contains(I) && !L->contains(BB))

            InsertedNonLCSSAInsts.insert(I);


        PN->setIncomingValue(i, FullV);

        Pair.first->second = FullV;

      }


      // If LSR splits critical edge and phi node has other pending

      // fixup operands, we need to update those pending fixups. Otherwise

      // formulae will not be implemented completely and some instructions

      // will not be eliminated.

      if (needUpdateFixups) {

        for (LSRUse &LU : Uses)

          for (LSRFixup &Fixup : LU.Fixups)

            // If fixup is supposed to rewrite some operand in the phi

            // that was just updated, it may be already moved to

            // another phi node. Such fixup requires update.

            if (Fixup.UserInst == PN) {

              // Check if the operand we try to replace still exists in the

              // original phi.

              bool foundInOriginalPHI = false;

              for (const auto &val : PN->incoming_values())

                if (val == Fixup.OperandValToReplace) {

                  foundInOriginalPHI = true;

                  break;

                }


              // If fixup operand found in original PHI - nothing to do.

              if (foundInOriginalPHI)

                continue;


              // Otherwise it might be moved to another PHI and requires update.

              // If fixup operand not found in any of the incoming blocks that

              // means we have already rewritten it - nothing to do.

              for (const auto &Block : PN->blocks())

                for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);

                     ++I) {

                  PHINode *NewPN = cast<PHINode>(I);

                  for (const auto &val : NewPN->incoming_values())

                    if (val == Fixup.OperandValToReplace)

                      Fixup.UserInst = NewPN;

                }

            }

      }

    }

}


/// Emit instructions for the leading candidate expression for this LSRUse (this

/// is called "expanding"), and update the UserInst to reference the newly

/// expanded value.

void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,

                          const Formula &F,

                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

  // First, find an insertion point that dominates UserInst. For PHI nodes,

  // find the nearest block which dominates all the relevant uses.

  if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {

    RewriteForPHI(PN, LU, LF, F, DeadInsts);

  } else {

    Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);


    // If this is reuse-by-noop-cast, insert the noop cast.

    Type *OpTy = LF.OperandValToReplace->getType();

    if (FullV->getType() != OpTy) {

      Instruction *Cast =

          CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),

                           FullV, OpTy, "tmp", LF.UserInst->getIterator());

      FullV = Cast;

    }


    // Update the user. ICmpZero is handled specially here (for now) because

    // Expand may have updated one of the operands of the icmp already, and

    // its new value may happen to be equal to LF.OperandValToReplace, in

    // which case doing replaceUsesOfWith leads to replacing both operands

    // with the same value. TODO: Reorganize this.

    if (LU.Kind == LSRUse::ICmpZero)

      LF.UserInst->setOperand(0, FullV);

    else

      LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);

  }


  if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))

    DeadInsts.emplace_back(OperandIsInstr);

}


// Trying to hoist the IVInc to loop header if all IVInc users are in

// the loop header. It will help backend to generate post index load/store

// when the latch block is different from loop header block.


static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,

                          const LSRUse &LU, Instruction *IVIncInsertPos,

                          Loop *L) {

  if (LU.Kind != LSRUse::Address)

    return false;


  // For now this code do the conservative optimization, only work for

  // the header block. Later we can hoist the IVInc to the block post

  // dominate all users.

  BasicBlock *LHeader = L->getHeader();

  if (IVIncInsertPos->getParent() == LHeader)

    return false;


  if (!Fixup.OperandValToReplace ||

      any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {

        Instruction *UI = cast<Instruction>(U);

        return UI->getParent() != LHeader;

      }))

    return false;


  Instruction *I = Fixup.UserInst;

  Type *Ty = I->getType();

  return (isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||

         (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty));

}


/// Rewrite all the fixup locations with new values, following the chosen

/// solution.

void LSRInstance::ImplementSolution(

    const SmallVectorImpl<const Formula *> &Solution) {

  // Keep track of instructions we may have made dead, so that

  // we can remove them after we are done working.

  SmallVector<WeakTrackingVH, 16> DeadInsts;


  // Mark phi nodes that terminate chains so the expander tries to reuse them.

  for (const IVChain &Chain : IVChainVec) {

    if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))

      Rewriter.setChainedPhi(PN);

  }


  // Expand the new value definitions and update the users.

  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)

    for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {

      Instruction *InsertPos =

          canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)

              ? L->getHeader()->getTerminator()

              : IVIncInsertPos;

      Rewriter.setIVIncInsertPos(L, InsertPos);

      Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);

      Changed = true;

    }


  auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();

  formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);


  for (const IVChain &Chain : IVChainVec) {

    GenerateIVChain(Chain, DeadInsts);

    Changed = true;

  }


  for (const WeakVH &IV : Rewriter.getInsertedIVs())

    if (IV && dyn_cast<Instruction>(&*IV)->getParent())

      ScalarEvolutionIVs.push_back(IV);


  // Clean up after ourselves. This must be done before deleting any

  // instructions.

  Rewriter.clear();


  Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,

                                                                  &TLI, MSSAU);


  // In our cost analysis above, we assume that each addrec consumes exactly

  // one register, and arrange to have increments inserted just before the

  // latch to maximimize the chance this is true.  However, if we reused

  // existing IVs, we now need to move the increments to match our

  // expectations.  Otherwise, our cost modeling results in us having a

  // chosen a non-optimal result for the actual schedule.  (And yes, this

  // scheduling decision does impact later codegen.)

  for (PHINode &PN : L->getHeader()->phis()) {

    BinaryOperator *BO = nullptr;

    Value *Start = nullptr, *Step = nullptr;

    if (!matchSimpleRecurrence(&PN, BO, Start, Step))

      continue;


    switch (BO->getOpcode()) {

    case Instruction::Sub:

      if (BO->getOperand(0) != &PN)

        // sub is non-commutative - match handling elsewhere in LSR

        continue;

      break;

    case Instruction::Add:

      break;

    default:

      continue;

    };


    if (!isa<Constant>(Step))

      // If not a constant step, might increase register pressure

      // (We assume constants have been canonicalized to RHS)

      continue;


    if (BO->getParent() == IVIncInsertPos->getParent())

      // Only bother moving across blocks.  Isel can handle block local case.

      continue;


    // Can we legally schedule inc at the desired point?

    if (!llvm::all_of(BO->uses(),

                      [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))

      continue;

    BO->moveBefore(IVIncInsertPos->getIterator());

    Changed = true;

  }


}


LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,

                         DominatorTree &DT, LoopInfo &LI,

                         const TargetTransformInfo &TTI, AssumptionCache &AC,

                         TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)

    : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),

      MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0

                            ? PreferredAddresingMode

                            : TTI.getPreferredAddressingMode(L, &SE)),

      Rewriter(SE, L->getHeader()->getDataLayout(), "lsr", false),

      BaselineCost(L, SE, TTI, AMK) {

  // If LoopSimplify form is not available, stay out of trouble.

  if (!L->isLoopSimplifyForm())

    return;


  // If there's no interesting work to be done, bail early.

  if (IU.empty()) return;


  // If there's too much analysis to be done, bail early. We won't be able to

  // model the problem anyway.

  unsigned NumUsers = 0;

  for (const IVStrideUse &U : IU) {

    if (++NumUsers > MaxIVUsers) {

      (void)U;

      LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U

                        << "\n");

      return;

    }

    // Bail out if we have a PHI on an EHPad that gets a value from a

    // CatchSwitchInst.  Because the CatchSwitchInst cannot be split, there is

    // no good place to stick any instructions.

    if (auto *PN = dyn_cast<PHINode>(U.getUser())) {

       auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();

       if (isa<FuncletPadInst>(FirstNonPHI) ||

           isa<CatchSwitchInst>(FirstNonPHI))

         for (BasicBlock *PredBB : PN->blocks())

           if (isa<CatchSwitchInst>(PredBB->getFirstNonPHIIt()))

             return;

    }

  }


  LLVM_DEBUG(dbgs() << "\nLSR on loop ";

             L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);

             dbgs() << ":\n");


  // Check if we expect this loop to use a hardware loop instruction, which will

  // be used when calculating the costs of formulas.

  HardwareLoopInfo HWLoopInfo(L);

  HardwareLoopProfitable =

      TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);


  // Configure SCEVExpander already now, so the correct mode is used for

  // isSafeToExpand() checks.

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

  Rewriter.setDebugType(DEBUG_TYPE);

#endif

  Rewriter.disableCanonicalMode();

  Rewriter.enableLSRMode();


  // First, perform some low-level loop optimizations.

  OptimizeShadowIV();

  OptimizeLoopTermCond();


  // If loop preparation eliminates all interesting IV users, bail.

  if (IU.empty()) return;


  // Skip nested loops until we can model them better with formulae.

  if (!L->isInnermost()) {

    LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");

    return;

  }


  // Start collecting data and preparing for the solver.

  // If number of registers is not the major cost, we cannot benefit from the

  // current profitable chain optimization which is based on number of

  // registers.

  // FIXME: add profitable chain optimization for other kinds major cost, for

  // example number of instructions.

  if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)

    CollectChains();

  CollectInterestingTypesAndFactors();

  CollectFixupsAndInitialFormulae();

  CollectLoopInvariantFixupsAndFormulae();


  if (Uses.empty())

    return;


  LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";

             print_uses(dbgs()));

  LLVM_DEBUG(dbgs() << "The baseline solution requires ";

             BaselineCost.print(dbgs()); dbgs() << "\n");


  // Now use the reuse data to generate a bunch of interesting ways

  // to formulate the values needed for the uses.

  GenerateAllReuseFormulae();


  FilterOutUndesirableDedicatedRegisters();

  NarrowSearchSpaceUsingHeuristics();


  SmallVector<const Formula *, 8> Solution;

  Solve(Solution);


  // Release memory that is no longer needed.

  Factors.clear();

  Types.clear();

  RegUses.clear();


  if (Solution.empty())

    return;


#ifndef NDEBUG

  // Formulae should be legal.

  for (const LSRUse &LU : Uses) {

    for (const Formula &F : LU.Formulae)

      assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

                        F) && "Illegal formula generated!");

  };

#endif


  // Now that we've decided what we want, make it so.

  ImplementSolution(Solution);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void LSRInstance::print_factors_and_types(raw_ostream &OS) const {

  if (Factors.empty() && Types.empty()) return;


  OS << "LSR has identified the following interesting factors and types: ";

  bool First = true;


  for (int64_t Factor : Factors) {

    if (!First) OS << ", ";

    First = false;

    OS << '*' << Factor;

  }


  for (Type *Ty : Types) {

    if (!First) OS << ", ";

    First = false;

    OS << '(' << *Ty << ')';

  }

  OS << '\n';

}


void LSRInstance::print_fixups(raw_ostream &OS) const {

  OS << "LSR is examining the following fixup sites:\n";

  for (const LSRUse &LU : Uses)

    for (const LSRFixup &LF : LU.Fixups) {

      dbgs() << "  ";

      LF.print(OS);

      OS << '\n';

    }

}


void LSRInstance::print_uses(raw_ostream &OS) const {

  OS << "LSR is examining the following uses:\n";

  for (const LSRUse &LU : Uses) {

    dbgs() << "  ";

    LU.print(OS);

    OS << '\n';

    for (const Formula &F : LU.Formulae) {

      OS << "    ";

      F.print(OS);

      OS << '\n';

    }

  }

}


void LSRInstance::print(raw_ostream &OS) const {

  print_factors_and_types(OS);

  print_fixups(OS);

  print_uses(OS);

}


LLVM_DUMP_METHOD void LSRInstance::dump() const {

  print(errs()); errs() << '\n';

}

#endif


namespace {


class LoopStrengthReduce : public LoopPass {

public:

  static char ID; // Pass ID, replacement for typeid


  LoopStrengthReduce();


private:

  bool runOnLoop(Loop *L, LPPassManager &LPM) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override;

};


} // end anonymous namespace


LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {

  initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());

}


void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {

  // We split critical edges, so we change the CFG.  However, we do update

  // many analyses if they are around.

  AU.addPreservedID(LoopSimplifyID);


  AU.addRequired<LoopInfoWrapperPass>();

  AU.addPreserved<LoopInfoWrapperPass>();

  AU.addRequiredID(LoopSimplifyID);

  AU.addRequired<DominatorTreeWrapperPass>();

  AU.addPreserved<DominatorTreeWrapperPass>();

  AU.addRequired<ScalarEvolutionWrapperPass>();

  AU.addPreserved<ScalarEvolutionWrapperPass>();

  AU.addRequired<AssumptionCacheTracker>();

  AU.addRequired<TargetLibraryInfoWrapperPass>();

  // Requiring LoopSimplify a second time here prevents IVUsers from running

  // twice, since LoopSimplify was invalidated by running ScalarEvolution.

  AU.addRequiredID(LoopSimplifyID);

  AU.addRequired<IVUsersWrapperPass>();

  AU.addPreserved<IVUsersWrapperPass>();

  AU.addRequired<TargetTransformInfoWrapperPass>();

  AU.addPreserved<MemorySSAWrapperPass>();

}


namespace {


/// Enables more convenient iteration over a DWARF expression vector.

static iterator_range<llvm::DIExpression::expr_op_iterator>

ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {

  llvm::DIExpression::expr_op_iterator Begin =

      llvm::DIExpression::expr_op_iterator(Expr.begin());

  llvm::DIExpression::expr_op_iterator End =

      llvm::DIExpression::expr_op_iterator(Expr.end());

  return {Begin, End};

}


struct SCEVDbgValueBuilder {

  SCEVDbgValueBuilder() = default;

  SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }


  void clone(const SCEVDbgValueBuilder &Base) {

    LocationOps = Base.LocationOps;

    Expr = Base.Expr;

  }


  void clear() {

    LocationOps.clear();

    Expr.clear();

  }


  /// The DIExpression as we translate the SCEV.

  SmallVector<uint64_t, 6> Expr;

  /// The location ops of the DIExpression.

  SmallVector<Value *, 2> LocationOps;


  void pushOperator(uint64_t Op) { Expr.push_back(Op); }

  void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }


  /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value

  /// in the set of values referenced by the expression.

  void pushLocation(llvm::Value *V) {

    Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);

    auto *It = llvm::find(LocationOps, V);

    unsigned ArgIndex = 0;

    if (It != LocationOps.end()) {

      ArgIndex = std::distance(LocationOps.begin(), It);

    } else {

      ArgIndex = LocationOps.size();

      LocationOps.push_back(V);

    }

    Expr.push_back(ArgIndex);

  }


  void pushValue(const SCEVUnknown *U) {

    llvm::Value *V = cast<SCEVUnknown>(U)->getValue();

    pushLocation(V);

  }


  bool pushConst(const SCEVConstant *C) {

    if (C->getAPInt().getSignificantBits() > 64)

      return false;

    Expr.push_back(llvm::dwarf::DW_OP_consts);

    Expr.push_back(C->getAPInt().getSExtValue());

    return true;

  }


  // Iterating the expression as DWARF ops is convenient when updating

  // DWARF_OP_LLVM_args.

  iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {

    return ToDwarfOpIter(Expr);

  }


  /// Several SCEV types are sequences of the same arithmetic operator applied

  /// to constants and values that may be extended or truncated.

  bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,

                          uint64_t DwarfOp) {

    assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&

           "Expected arithmetic SCEV type");

    bool Success = true;

    unsigned EmitOperator = 0;

    for (const auto &Op : CommExpr->operands()) {

      Success &= pushSCEV(Op);


      if (EmitOperator >= 1)

        pushOperator(DwarfOp);

      ++EmitOperator;

    }

    return Success;

  }


  // TODO: Identify and omit noop casts.

  bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {

    const llvm::SCEV *Inner = C->getOperand(0);

    const llvm::Type *Type = C->getType();

    uint64_t ToWidth = Type->getIntegerBitWidth();

    bool Success = pushSCEV(Inner);

    uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,

                          IsSigned ? llvm::dwarf::DW_ATE_signed

                                   : llvm::dwarf::DW_ATE_unsigned};

    for (const auto &Op : CastOps)

      pushOperator(Op);

    return Success;

  }


  // TODO: MinMax - although these haven't been encountered in the test suite.

  bool pushSCEV(const llvm::SCEV *S) {

    bool Success = true;

    if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {

      Success &= pushConst(StartInt);


    } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {

      if (!U->getValue())

        return false;

      pushLocation(U->getValue());


    } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {

      Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);


    } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {

      Success &= pushSCEV(UDiv->getLHS());

      Success &= pushSCEV(UDiv->getRHS());

      pushOperator(llvm::dwarf::DW_OP_div);


    } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {

      // Assert if a new and unknown SCEVCastEXpr type is encountered.

      assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||

              isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&

             "Unexpected cast type in SCEV.");

      Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));


    } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {

      Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);


    } else if (isa<SCEVAddRecExpr>(S)) {

      // Nested SCEVAddRecExpr are generated by nested loops and are currently

      // unsupported.

      return false;


    } else {

      return false;

    }

    return Success;

  }


  /// Return true if the combination of arithmetic operator and underlying

  /// SCEV constant value is an identity function.

  bool isIdentityFunction(uint64_t Op, const SCEV *S) {

    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {

      if (C->getAPInt().getSignificantBits() > 64)

        return false;

      int64_t I = C->getAPInt().getSExtValue();

      switch (Op) {

      case llvm::dwarf::DW_OP_plus:

      case llvm::dwarf::DW_OP_minus:

        return I == 0;

      case llvm::dwarf::DW_OP_mul:

      case llvm::dwarf::DW_OP_div:

        return I == 1;

      }

    }

    return false;

  }


  /// Convert a SCEV of a value to a DIExpression that is pushed onto the

  /// builder's expression stack. The stack should already contain an

  /// expression for the iteration count, so that it can be multiplied by

  /// the stride and added to the start.

  /// Components of the expression are omitted if they are an identity function.

  /// Chain (non-affine) SCEVs are not supported.

  bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {

    assert(SAR.isAffine() && "Expected affine SCEV");

    const SCEV *Start = SAR.getStart();

    const SCEV *Stride = SAR.getStepRecurrence(SE);


    // Skip pushing arithmetic noops.

    if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {

      if (!pushSCEV(Stride))

        return false;

      pushOperator(llvm::dwarf::DW_OP_mul);

    }

    if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {

      if (!pushSCEV(Start))

        return false;

      pushOperator(llvm::dwarf::DW_OP_plus);

    }

    return true;

  }


  /// Create an expression that is an offset from a value (usually the IV).

  void createOffsetExpr(int64_t Offset, Value *OffsetValue) {

    pushLocation(OffsetValue);

    DIExpression::appendOffset(Expr, Offset);

    LLVM_DEBUG(

        dbgs() << "scev-salvage: Generated IV offset expression. Offset: "

               << std::to_string(Offset) << "\n");

  }


  /// Combine a translation of the SCEV and the IV to create an expression that

  /// recovers a location's value.

  /// returns true if an expression was created.

  bool createIterCountExpr(const SCEV *S,

                           const SCEVDbgValueBuilder &IterationCount,

                           ScalarEvolution &SE) {

    // SCEVs for SSA values are most frquently of the form

    // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).

    // This is because %a is a PHI node that is not the IV. However, these

    // SCEVs have not been observed to result in debuginfo-lossy optimisations,

    // so its not expected this point will be reached.

    if (!isa<SCEVAddRecExpr>(S))

      return false;


    LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S

                      << '\n');


    const auto *Rec = cast<SCEVAddRecExpr>(S);

    if (!Rec->isAffine())

      return false;


    if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)

      return false;


    // Initialise a new builder with the iteration count expression. In

    // combination with the value's SCEV this enables recovery.

    clone(IterationCount);

    if (!SCEVToValueExpr(*Rec, SE))

      return false;


    return true;

  }


  /// Convert a SCEV of a value to a DIExpression that is pushed onto the

  /// builder's expression stack. The stack should already contain an

  /// expression for the iteration count, so that it can be multiplied by

  /// the stride and added to the start.

  /// Components of the expression are omitted if they are an identity function.

  bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,

                           ScalarEvolution &SE) {

    assert(SAR.isAffine() && "Expected affine SCEV");

    const SCEV *Start = SAR.getStart();

    const SCEV *Stride = SAR.getStepRecurrence(SE);


    // Skip pushing arithmetic noops.

    if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {

      if (!pushSCEV(Start))

        return false;

      pushOperator(llvm::dwarf::DW_OP_minus);

    }

    if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {

      if (!pushSCEV(Stride))

        return false;

      pushOperator(llvm::dwarf::DW_OP_div);

    }

    return true;

  }


  // Append the current expression and locations to a location list and an

  // expression list. Modify the DW_OP_LLVM_arg indexes to account for

  // the locations already present in the destination list.

  void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,

                       SmallVectorImpl<Value *> &DestLocations) {

    assert(!DestLocations.empty() &&

           "Expected the locations vector to contain the IV");

    // The DWARF_OP_LLVM_arg arguments of the expression being appended must be

    // modified to account for the locations already in the destination vector.

    // All builders contain the IV as the first location op.

    assert(!LocationOps.empty() &&

           "Expected the location ops to contain the IV.");

    // DestIndexMap[n] contains the index in DestLocations for the nth

    // location in this SCEVDbgValueBuilder.

    SmallVector<uint64_t, 2> DestIndexMap;

    for (const auto &Op : LocationOps) {

      auto It = find(DestLocations, Op);

      if (It != DestLocations.end()) {

        // Location already exists in DestLocations, reuse existing ArgIndex.

        DestIndexMap.push_back(std::distance(DestLocations.begin(), It));

        continue;

      }

      // Location is not in DestLocations, add it.

      DestIndexMap.push_back(DestLocations.size());

      DestLocations.push_back(Op);

    }


    for (const auto &Op : expr_ops()) {

      if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {

        Op.appendToVector(DestExpr);

        continue;

      }


      DestExpr.push_back(dwarf::DW_OP_LLVM_arg);

      // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,

      // DestIndexMap[n] contains its new index in DestLocations.

      uint64_t NewIndex = DestIndexMap[Op.getArg(0)];

      DestExpr.push_back(NewIndex);

    }

  }

};


/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs

/// and DIExpression.

struct DVIRecoveryRec {

  DVIRecoveryRec(DbgVariableRecord *DVR)

      : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}


  DbgVariableRecord *DbgRef;

  DIExpression *Expr;

  bool HadLocationArgList;

  SmallVector<WeakVH, 2> LocationOps;

  SmallVector<const llvm::SCEV *, 2> SCEVs;

  SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;


  void clear() {

    for (auto &RE : RecoveryExprs)

      RE.reset();

    RecoveryExprs.clear();

  }


  ~DVIRecoveryRec() { clear(); }

};

} // namespace


/// Returns the total number of DW_OP_llvm_arg operands in the expression.

/// This helps in determining if a DIArglist is necessary or can be omitted from

/// the dbg.value.


static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {

  auto expr_ops = ToDwarfOpIter(Expr);

  unsigned Count = 0;

  for (auto Op : expr_ops)

    if (Op.getOp() == dwarf::DW_OP_LLVM_arg)

      Count++;

  return Count;

}


/// Overwrites DVI with the location and Ops as the DIExpression. This will

/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,

/// because a DIArglist is not created for the first argument of the dbg.value.

template <typename T>


static void updateDVIWithLocation(T &DbgVal, Value *Location,

                                  SmallVectorImpl<uint64_t> &Ops) {

  assert(numLLVMArgOps(Ops) == 0 && "Expected expression that does not "

                                    "contain any DW_OP_llvm_arg operands.");

  DbgVal.setRawLocation(ValueAsMetadata::get(Location));

  DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

  DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

}


/// Overwrite DVI with locations placed into a DIArglist.

template <typename T>


static void updateDVIWithLocations(T &DbgVal,

                                   SmallVectorImpl<Value *> &Locations,

                                   SmallVectorImpl<uint64_t> &Ops) {

  assert(numLLVMArgOps(Ops) != 0 &&

         "Expected expression that references DIArglist locations using "

         "DW_OP_llvm_arg operands.");

  SmallVector<ValueAsMetadata *, 3> MetadataLocs;

  for (Value *V : Locations)

    MetadataLocs.push_back(ValueAsMetadata::get(V));

  auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);

  DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));

  DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));

}


/// Write the new expression and new location ops for the dbg.value. If possible

/// reduce the szie of the dbg.value by omitting DIArglist. This

/// can be omitted if:

/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.

/// 2. The DW_OP_LLVM_arg is the first operand in the expression.


static void UpdateDbgValue(DVIRecoveryRec &DVIRec,

                           SmallVectorImpl<Value *> &NewLocationOps,

                           SmallVectorImpl<uint64_t> &NewExpr) {

  DbgVariableRecord *DbgVal = DVIRec.DbgRef;

  unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);

  if (NumLLVMArgs == 0) {

    // Location assumed to be on the stack.

    updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr);

  } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {

    // There is only a single DW_OP_llvm_arg at the start of the expression,

    // so it can be omitted along with DIArglist.

    assert(NewExpr[1] == 0 &&

           "Lone LLVM_arg in a DIExpression should refer to location-op 0.");

    llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));

    updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);

  } else {

    // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.

    updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);

  }


  // If the DIExpression was previously empty then add the stack terminator.

  // Non-empty expressions have only had elements inserted into them and so

  // the terminator should already be present e.g. stack_value or fragment.

  DIExpression *SalvageExpr = DbgVal->getExpression();

  if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {

    SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});

    DbgVal->setExpression(SalvageExpr);

  }

}


/// Cached location ops may be erased during LSR, in which case a poison is

/// required when restoring from the cache. The type of that location is no

/// longer available, so just use int8. The poison will be replaced by one or

/// more locations later when a SCEVDbgValueBuilder selects alternative

/// locations to use for the salvage.


static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {

  return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C));

}


/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.


static void restorePreTransformState(DVIRecoveryRec &DVIRec) {

  DbgVariableRecord *DbgVal = DVIRec.DbgRef;

  LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"

                    << "scev-salvage: post-LSR: " << *DbgVal << '\n');

  assert(DVIRec.Expr && "Expected an expression");

  DbgVal->setExpression(DVIRec.Expr);


  // Even a single location-op may be inside a DIArgList and referenced with

  // DW_OP_LLVM_arg, which is valid only with a DIArgList.

  if (!DVIRec.HadLocationArgList) {

    assert(DVIRec.LocationOps.size() == 1 &&

           "Unexpected number of location ops.");

    // LSR's unsuccessful salvage attempt may have added DIArgList, which in

    // this case was not present before, so force the location back to a

    // single uncontained Value.

    Value *CachedValue =

        getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());

    DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));

  } else {

    SmallVector<ValueAsMetadata *, 3> MetadataLocs;

    for (WeakVH VH : DVIRec.LocationOps) {

      Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());

      MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));

    }

    auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);

    DbgVal->setRawLocation(

        llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));

  }

  LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');

}


static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,

                       llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,

                       const SCEV *SCEVInductionVar,

                       SCEVDbgValueBuilder IterCountExpr) {


  if (!DVIRec.DbgRef->isKillLocation())

    return false;


  // LSR may have caused several changes to the dbg.value in the failed salvage

  // attempt. So restore the DIExpression, the location ops and also the

  // location ops format, which is always DIArglist for multiple ops, but only

  // sometimes for a single op.

  restorePreTransformState(DVIRec);


  // LocationOpIndexMap[i] will store the post-LSR location index of

  // the non-optimised out location at pre-LSR index i.

  SmallVector<int64_t, 2> LocationOpIndexMap;

  LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);

  SmallVector<Value *, 2> NewLocationOps;

  NewLocationOps.push_back(LSRInductionVar);


  for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {

    WeakVH VH = DVIRec.LocationOps[i];

    // Place the locations not optimised out in the list first, avoiding

    // inserts later. The map is used to update the DIExpression's

    // DW_OP_LLVM_arg arguments as the expression is updated.

    if (VH && !isa<UndefValue>(VH)) {

      NewLocationOps.push_back(VH);

      LocationOpIndexMap[i] = NewLocationOps.size() - 1;

      LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i

                        << " now at index " << LocationOpIndexMap[i] << "\n");

      continue;

    }


    // It's possible that a value referred to in the SCEV may have been

    // optimised out by LSR.

    if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||

        SE.containsUndefs(DVIRec.SCEVs[i])) {

      LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i

                        << " refers to a location that is now undef or erased. "

                           "Salvage abandoned.\n");

      return false;

    }


    LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i

                      << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");


    DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();

    SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();


    // Create an offset-based salvage expression if possible, as it requires

    // less DWARF ops than an iteration count-based expression.

    if (std::optional<APInt> Offset =

            SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {

      if (Offset->getSignificantBits() <= 64)

        SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);

      else

        return false;

    } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,

                                                 SE))

      return false;

  }


  // Merge the DbgValueBuilder generated expressions and the original

  // DIExpression, place the result into an new vector.

  SmallVector<uint64_t, 3> NewExpr;

  if (DVIRec.Expr->getNumElements() == 0) {

    assert(DVIRec.RecoveryExprs.size() == 1 &&

           "Expected only a single recovery expression for an empty "

           "DIExpression.");

    assert(DVIRec.RecoveryExprs[0] &&

           "Expected a SCEVDbgSalvageBuilder for location 0");

    SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();

    B->appendToVectors(NewExpr, NewLocationOps);

  }

  for (const auto &Op : DVIRec.Expr->expr_ops()) {

    // Most Ops needn't be updated.

    if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {

      Op.appendToVector(NewExpr);

      continue;

    }


    uint64_t LocationArgIndex = Op.getArg(0);

    SCEVDbgValueBuilder *DbgBuilder =

        DVIRec.RecoveryExprs[LocationArgIndex].get();

    // The location doesn't have s SCEVDbgValueBuilder, so LSR did not

    // optimise it away. So just translate the argument to the updated

    // location index.

    if (!DbgBuilder) {

      NewExpr.push_back(dwarf::DW_OP_LLVM_arg);

      assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&

             "Expected a positive index for the location-op position.");

      NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);

      continue;

    }

    // The location has a recovery expression.

    DbgBuilder->appendToVectors(NewExpr, NewLocationOps);

  }


  UpdateDbgValue(DVIRec, NewLocationOps, NewExpr);

  LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DbgRef << "\n");

  return true;

}


/// Obtain an expression for the iteration count, then attempt to salvage the

/// dbg.value intrinsics.


static void DbgRewriteSalvageableDVIs(

    llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar,

    SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {

  if (DVIToUpdate.empty())

    return;


  const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);

  assert(SCEVInductionVar &&

         "Anticipated a SCEV for the post-LSR induction variable");


  if (const SCEVAddRecExpr *IVAddRec =

          dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {

    if (!IVAddRec->isAffine())

      return;


    // Prevent translation using excessive resources.

    if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)

      return;


    // The iteration count is required to recover location values.

    SCEVDbgValueBuilder IterCountExpr;

    IterCountExpr.pushLocation(LSRInductionVar);

    if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))

      return;


    LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar

                      << '\n');


    for (auto &DVIRec : DVIToUpdate) {

      SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,

                 IterCountExpr);

    }

  }

}


/// Identify and cache salvageable DVI locations and expressions along with the

/// corresponding SCEV(s). Also ensure that the DVI is not deleted between

/// cacheing and salvaging.


static void DbgGatherSalvagableDVI(

    Loop *L, ScalarEvolution &SE,

    SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {

  for (const auto &B : L->getBlocks()) {

    for (auto &I : *B) {

      for (DbgVariableRecord &DbgVal : filterDbgVars(I.getDbgRecordRange())) {

        if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())

          continue;


        // Ensure that if any location op is undef that the dbg.vlue is not

        // cached.

        if (DbgVal.isKillLocation())

          continue;


        // Check that the location op SCEVs are suitable for translation to

        // DIExpression.

        const auto &HasTranslatableLocationOps =

            [&](const DbgVariableRecord &DbgValToTranslate) -> bool {

          for (const auto LocOp : DbgValToTranslate.location_ops()) {

            if (!LocOp)

              return false;


            if (!SE.isSCEVable(LocOp->getType()))

              return false;


            const SCEV *S = SE.getSCEV(LocOp);

            if (SE.containsUndefs(S))

              return false;

          }

          return true;

        };


        if (!HasTranslatableLocationOps(DbgVal))

          continue;


        std::unique_ptr<DVIRecoveryRec> NewRec =

            std::make_unique<DVIRecoveryRec>(&DbgVal);

        // Each location Op may need a SCEVDbgValueBuilder in order to recover

        // it. Pre-allocating a vector will enable quick lookups of the builder

        // later during the salvage.

        NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());

        for (const auto LocOp : DbgVal.location_ops()) {

          NewRec->SCEVs.push_back(SE.getSCEV(LocOp));

          NewRec->LocationOps.push_back(LocOp);

          NewRec->HadLocationArgList = DbgVal.hasArgList();

        }

        SalvageableDVISCEVs.push_back(std::move(NewRec));

      }

    }

  }

}


/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback

/// any PHi from the loop header is usable, but may have less chance of

/// surviving subsequent transforms.


static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,

                                           const LSRInstance &LSR) {


  auto IsSuitableIV = [&](PHINode *P) {

    if (!SE.isSCEVable(P->getType()))

      return false;

    if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))

      return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));

    return false;

  };


  // For now, just pick the first IV that was generated and inserted by

  // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away

  // by subsequent transforms.

  for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {

    if (!IV)

      continue;


    // There should only be PHI node IVs.

    PHINode *P = cast<PHINode>(&*IV);


    if (IsSuitableIV(P))

      return P;

  }


  for (PHINode &P : L.getHeader()->phis()) {

    if (IsSuitableIV(&P))

      return &P;

  }

  return nullptr;

}


static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,

                               DominatorTree &DT, LoopInfo &LI,

                               const TargetTransformInfo &TTI,

                               AssumptionCache &AC, TargetLibraryInfo &TLI,

                               MemorySSA *MSSA) {


  // Debug preservation - before we start removing anything identify which DVI

  // meet the salvageable criteria and store their DIExpression and SCEVs.

  SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;

  DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords);


  bool Changed = false;

  std::unique_ptr<MemorySSAUpdater> MSSAU;

  if (MSSA)

    MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);


  // Run the main LSR transformation.

  const LSRInstance &Reducer =

      LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());

  Changed |= Reducer.getChanged();


  // Remove any extra phis created by processing inner loops.

  Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

  if (EnablePhiElim && L->isLoopSimplifyForm()) {

    SmallVector<WeakTrackingVH, 16> DeadInsts;

    const DataLayout &DL = L->getHeader()->getDataLayout();

    SCEVExpander Rewriter(SE, DL, "lsr", false);

#if LLVM_ENABLE_ABI_BREAKING_CHECKS

    Rewriter.setDebugType(DEBUG_TYPE);

#endif

    unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);

    Rewriter.clear();

    if (numFolded) {

      Changed = true;

      RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,

                                                           MSSAU.get());

      DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

    }

  }

  // LSR may at times remove all uses of an induction variable from a loop.

  // The only remaining use is the PHI in the exit block.

  // When this is the case, if the exit value of the IV can be calculated using

  // SCEV, we can replace the exit block PHI with the final value of the IV and

  // skip the updates in each loop iteration.

  if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {

    SmallVector<WeakTrackingVH, 16> DeadInsts;

    const DataLayout &DL = L->getHeader()->getDataLayout();

    SCEVExpander Rewriter(SE, DL, "lsr", true);

    int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,

                                         UnusedIndVarInLoop, DeadInsts);

    Rewriter.clear();

    if (Rewrites) {

      Changed = true;

      RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,

                                                           MSSAU.get());

      DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());

    }

  }


  if (SalvageableDVIRecords.empty())

    return Changed;


  // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with

  // expressions composed using the derived iteration count.

  // TODO: Allow for multiple IV references for nested AddRecSCEVs

  for (const auto &L : LI) {

    if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))

      DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);

    else {

      LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "

                           "could not be identified.\n");

    }

  }


  for (auto &Rec : SalvageableDVIRecords)

    Rec->clear();

  SalvageableDVIRecords.clear();

  return Changed;

}


bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {

  if (skipLoop(L))

    return false;


  auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();

  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();

  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(

      *L->getHeader()->getParent());

  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(

      *L->getHeader()->getParent());

  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(

      *L->getHeader()->getParent());

  auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();

  MemorySSA *MSSA = nullptr;

  if (MSSAAnalysis)

    MSSA = &MSSAAnalysis->getMSSA();

  return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);

}


PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,

                                              LoopStandardAnalysisResults &AR,

                                              LPMUpdater &) {

  if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,

                          AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))

    return PreservedAnalyses::all();


  auto PA = getLoopPassPreservedAnalyses();

  if (AR.MSSA)

    PA.preserve<MemorySSAAnalysis>();

  return PA;

}


char LoopStrengthReduce::ID = 0;


INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",

                      "Loop Strength Reduction", false, false)

INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)

INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopSimplify)

INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",

                    "Loop Strength Reduction", false, false)


Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }

Success
#define Success
Definition AArch64Disassembler.cpp:42

for
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Definition AArch64ExpandPseudoInsts.cpp:114

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

false
Function Alias Analysis false
Definition AliasAnalysis.cpp:734

print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition ArchiveWriter.cpp:205

AssumptionCache.h

getParent
static const Function * getParent(const Value *V)
Definition BasicAliasAnalysis.cpp:885

BasicBlockUtils.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

CommandLine.h

clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition CommandLine.h:687

Compiler.h

LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:52

isCanonical
static bool isCanonical(const MDString *S)
Definition DebugInfoMetadata.cpp:555

DebugInfoMetadata.h

DenseMap.h
This file defines the DenseMap class.

DenseSet.h
This file defines the DenseSet and SmallDenseSet classes.

DerivedTypes.h

DomTreeUpdater.h

Dominators.h

Dwarf.h
This file contains constants used for implementing Dwarf debug support.

MemorySSA
early cse Early CSE w MemorySSA
Definition EarlyCSE.cpp:1962

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GlobalValue.h

Loops
Hexagon Hardware Loops
Definition HexagonHardwareLoops.cpp:367

IRBuilder.h

BasicBlock.h

Constant.h

Instruction.h

IntrinsicInst.h

Module.h
Module.h This file contains the declarations for the Module class.

Operator.h

Type.h

Use.h
This defines the Use class.

User.h

Value.h

Users
iv Induction Variable Users
Definition IVUsers.cpp:48

IVUsers.h

InitializePasses.h

InstrTypes.h

Instructions.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3368

isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539

LoopAnalysisManager.h
This header provides classes for managing per-loop analyses.

LoopInfo.h

LoopPass.h

SalvageDVI
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
Definition LoopStrengthReduce.cpp:6773

DropScaledForVScale
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))

getWideOperand
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
Definition LoopStrengthReduce.cpp:2969

isAddSExtable
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
Definition LoopStrengthReduce.cpp:810

mayUsePostIncMode
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
Definition LoopStrengthReduce.cpp:3878

restorePreTransformState
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
Definition LoopStrengthReduce.cpp:6742

ExtractImmediate
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
Definition LoopStrengthReduce.cpp:935

containsAddRecDependentOnLoop
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
Definition LoopStrengthReduce.cpp:615

findIVOperand
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
Definition LoopStrengthReduce.cpp:2950

PreferredAddresingMode
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))

isLegalUse
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
Definition LoopStrengthReduce.cpp:1902

DbgGatherSalvagableDVI
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
Definition LoopStrengthReduce.cpp:6917

isMulSExtable
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
Definition LoopStrengthReduce.cpp:818

MaxSCEVSalvageExpressionSize
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
Definition LoopStrengthReduce.cpp:144

isExistingPhi
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
Definition LoopStrengthReduce.cpp:1090

getScalingFactorCost
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
Definition LoopStrengthReduce.cpp:1948

InsnsCost
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))

StressIVChain
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))

isAddressUse
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
Definition LoopStrengthReduce.cpp:992

ExtractSymbol
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
Definition LoopStrengthReduce.cpp:966

updateDVIWithLocation
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
Definition LoopStrengthReduce.cpp:6672

isLegalAddImmediate
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
Definition LoopStrengthReduce.cpp:1923

AllowDropSolutionIfLessProfitable
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))

EnableVScaleImmediates
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))

getExprBase
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
Definition LoopStrengthReduce.cpp:2985

isAlwaysFoldable
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
Definition LoopStrengthReduce.cpp:1993

GetInductionVariable
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
Definition LoopStrengthReduce.cpp:6972

IsSimplerBaseSCEVForTarget
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
Definition LoopStrengthReduce.cpp:5284

MaxIVUsers
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
Definition LoopStrengthReduce.cpp:138

isHighCostExpansion
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
Definition LoopStrengthReduce.cpp:1110

getValueOrPoison
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
Definition LoopStrengthReduce.cpp:6737

getAccessType
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
Definition LoopStrengthReduce.cpp:1037

numLLVMArgOps
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
Definition LoopStrengthReduce.cpp:6659

DbgRewriteSalvageableDVIs
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
Definition LoopStrengthReduce.cpp:6879

EnablePhiElim
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))

UpdateDbgValue
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
Definition LoopStrengthReduce.cpp:6702

isAddRecSExtable
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
Definition LoopStrengthReduce.cpp:802

canHoistIVInc
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
Definition LoopStrengthReduce.cpp:6004

DoInitialMatch
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
Definition LoopStrengthReduce.cpp:540

isAMCompletelyFolded
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
Definition LoopStrengthReduce.cpp:1931

LSRExpNarrow
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))

FilterSameScaledReg
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))

updateDVIWithLocations
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
Definition LoopStrengthReduce.cpp:6683

ComplexityLimit
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))

ReduceLoopStrength
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
Definition LoopStrengthReduce.cpp:7004

isProfitableChain
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
Definition LoopStrengthReduce.cpp:3052

CollectSubexprs
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
Definition LoopStrengthReduce.cpp:3823

getExactSDiv
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
Definition LoopStrengthReduce.cpp:830

canFoldIVIncExpr
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
Definition LoopStrengthReduce.cpp:3344

getAnyExtendConsideringPostIncUses
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
Definition LoopStrengthReduce.cpp:4376

getSetupCost
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
Definition LoopStrengthReduce.cpp:1375

SetupCostDepthLimit
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))

LoopStrengthReduce.h

LoopUtils.h

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

G
#define G(x, y, z)
Definition MD5.cpp:56

Reg
Register Reg
Definition MachineSink.cpp:2117

MathExtras.h

MemorySSAUpdater.h

MemorySSA.h
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

T
#define T
Definition Mips16ISelLowering.cpp:353

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

P
#define P(N)

Fixup
PowerPC TLS Dynamic Call Fixup
Definition PPCTLSDynamicCall.cpp:336

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

Pass.h

PointerIntPair.h
This file defines the PointerIntPair class.

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:71

Uses
Remove Loads Into Fake Uses
Definition RemoveLoadsIntoFakeUses.cpp:81

isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition RustDemangle.cpp:181

RA
SI optimize exec mask operations pre RA
Definition SIOptimizeExecMaskingPreRA.cpp:81

Address
@ Address
Definition SPIRVEmitNonSemanticDI.cpp:58

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScalarEvolutionExpander.h

ScalarEvolutionExpressions.h

ScalarEvolutionNormalization.h

ScalarEvolutionPatternMatch.h

ScalarEvolution.h

Scalar.h

SetVector.h
This file implements a set that has insertion order iteration characteristics.

SmallBitVector.h
This file implements the SmallBitVector class.

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallSet.h
This file defines the SmallSet class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

UnknownAddressSpace
static const unsigned UnknownAddressSpace
Definition StraightLineStrengthReduce.cpp:94

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

Utils.h

ValueHandle.h

ValueTracking.h

Rewriter
Virtual Register Rewriter
Definition VirtRegMap.cpp:269

RHS
Value * RHS
Definition X86PartialReduction.cpp:74

LHS
Value * LHS
Definition X86PartialReduction.cpp:73

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:68

IV
static const uint32_t IV[8]
Definition blake3_impl.h:83

NewExpr
Definition ItaniumDemangle.h:2119

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540

llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329

llvm::APInt::sdiv
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644

llvm::APInt::getSignificantBits
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531

llvm::APInt::srem
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:412

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequiredID
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
Definition Pass.cpp:284

llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition PassAnalysisSupport.h:89

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:43

llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition Instructions.h:506

llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition Instructions.h:709

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170

llvm::BasicBlock::moveBefore
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:386

llvm::BasicBlock::isLandingPad
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition BasicBlock.cpp:661

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233

llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition InstrTypes.h:374

llvm::BinaryOperator::Create
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition Instructions.cpp:2707

llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition Instructions.h:3130

llvm::BranchInst::getCondition
Value * getCondition() const
Definition Instructions.h:3133

llvm::CastInst::getCastOpcode
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Definition Instructions.cpp:3216

llvm::CastInst::Create
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition Instructions.cpp:3043

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:699

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition InstrTypes.h:700

llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791

llvm::ConstantInt::isValueValidForType
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
Definition Constants.cpp:1602

llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131

llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163

llvm::Constant::getAllOnesValue
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition Constants.cpp:420

llvm::DIArgList::get
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
Definition DebugInfoMetadata.cpp:2578

llvm::DIExpression
DWARF expression.
Definition DebugInfoMetadata.h:3299

llvm::DIExpression::expr_ops
iterator_range< expr_op_iterator > expr_ops() const
Definition DebugInfoMetadata.h:3447

llvm::DIExpression::append
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
Definition DebugInfoMetadata.cpp:2211

llvm::DIExpression::getNumElements
unsigned getNumElements() const
Definition DebugInfoMetadata.h:3326

llvm::DIExpression::appendOffset
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
Definition DebugInfoMetadata.cpp:1984

llvm::DIExpression::isComplex
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
Definition DebugInfoMetadata.cpp:1803

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63

llvm::DbgRecord::getContext
LLVM_ABI LLVMContext & getContext()
Definition DebugProgramInstruction.cpp:530

llvm::DbgVariableRecord
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Definition DebugProgramInstruction.h:277

llvm::DbgVariableRecord::isKillLocation
LLVM_ABI bool isKillLocation() const
Definition DebugProgramInstruction.cpp:369

llvm::DbgVariableRecord::setRawLocation
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
Definition DebugProgramInstruction.h:475

llvm::DbgVariableRecord::setExpression
void setExpression(DIExpression *NewExpr)
Definition DebugProgramInstruction.h:458

llvm::DbgVariableRecord::getExpression
DIExpression * getExpression() const
Definition DebugProgramInstruction.h:459

llvm::DenseMapBase::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214

llvm::DenseMapBase::clear
void clear()
Definition DenseMap.h:119

llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition GenericDomTree.h:89

llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition GenericDomTree.h:401

llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition GenericDomTree.h:443

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165

llvm::DominatorTree::findNearestCommonDominator
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition Dominators.cpp:357

llvm::DominatorTree::dominates
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition Dominators.cpp:135

llvm::GlobalValue
Definition GlobalValue.h:49

llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition GlobalValue.h:296

llvm::IVStrideUse
IVStrideUse - Keep track of one use of a strided induction variable.
Definition IVUsers.h:35

llvm::IVStrideUse::transformToPostInc
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Definition IVUsers.cpp:365

llvm::IVStrideUse::getOperandValToReplace
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
Definition IVUsers.h:54

llvm::IVStrideUse::setUser
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Definition IVUsers.h:48

llvm::IVUsersAnalysis
Analysis pass that exposes the IVUsers for a loop.
Definition IVUsers.h:184

llvm::IVUsersWrapperPass
Definition IVUsers.h:163

llvm::IVUsers
Definition IVUsers.h:91

llvm::IVUsers::const_iterator
ilist< IVStrideUse >::const_iterator const_iterator
Definition IVUsers.h:142

llvm::IVUsers::end
iterator end()
Definition IVUsers.h:144

llvm::IVUsers::begin
iterator begin()
Definition IVUsers.h:143

llvm::IVUsers::empty
bool empty() const
Definition IVUsers.h:147

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::print
LLVM_ABI void print(raw_ostream &OS) const
Definition InstructionCost.cpp:19

llvm::InstructionCost::getValue
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition InstructionCost.h:88

llvm::InstructionCost::isValid
bool isValid() const
Definition InstructionCost.h:80

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::isLifetimeStartOrEnd
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
Definition Instruction.cpp:1232

llvm::Instruction::getNumSuccessors
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition Instruction.cpp:1281

llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition Instruction.h:513

llvm::Instruction::moveBefore
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition Instruction.cpp:184

llvm::Instruction::isEHPad
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition Instruction.h:879

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:104

llvm::Instruction::getAccessType
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
Definition Instruction.cpp:1128

llvm::Instruction::getOpcodeName
const char * getOpcodeName() const
Definition Instruction.h:314

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition Instruction.h:312

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition Instruction.h:510

llvm::Instruction::getDataLayout
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition Instruction.cpp:86

llvm::IntegerType::get
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition LoopPassManager.h:226

llvm::LoadInst
An instruction for reading from memory.
Definition Instructions.h:180

llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition GenericLoopInfoImpl.h:33

llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition GenericLoopInfo.h:90

llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition GenericLoopInfo.h:82

llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596

llvm::LoopInfo
Definition LoopInfo.h:408

llvm::LoopStrengthReducePass::run
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition LoopStrengthReduce.cpp:7105

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565

llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936

llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702

llvm::PHINode
Definition Instructions.h:2638

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition Instructions.h:2773

llvm::PHINode::blocks
iterator_range< const_block_iterator > blocks() const
Definition Instructions.h:2699

llvm::PHINode::incoming_values
op_range incoming_values()
Definition Instructions.h:2703

llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition Instructions.h:2716

llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition Instructions.h:2733

llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition Instructions.h:2713

llvm::PHINode::getIncomingValueNumForOperand
static unsigned getIncomingValueNumForOperand(unsigned i)
Definition Instructions.h:2727

llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition Instructions.h:2807

llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition Instructions.h:2709

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition Instructions.h:2673

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition PassRegistry.cpp:24

llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:1885

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::SCEVAddExpr
This node represents an addition of some number of SCEVs.
Definition ScalarEvolutionExpressions.h:267

llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition ScalarEvolutionExpressions.h:348

llvm::SCEVAddRecExpr::getType
Type * getType() const
Definition ScalarEvolutionExpressions.h:358

llvm::SCEVAddRecExpr::getStart
const SCEV * getStart() const
Definition ScalarEvolutionExpressions.h:359

llvm::SCEVAddRecExpr::getStepRecurrence
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Definition ScalarEvolutionExpressions.h:366

llvm::SCEVAddRecExpr::isAffine
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
Definition ScalarEvolutionExpressions.h:376

llvm::SCEVAddRecExpr::getLoop
const Loop * getLoop() const
Definition ScalarEvolutionExpressions.h:360

llvm::SCEVConstant
This class represents a constant integer value.
Definition ScalarEvolutionExpressions.h:61

llvm::SCEVConstant::getType
Type * getType() const
Definition ScalarEvolutionExpressions.h:73

llvm::SCEVConstant::getValue
ConstantInt * getValue() const
Definition ScalarEvolutionExpressions.h:70

llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition ScalarEvolutionExpressions.h:71

llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition ScalarEvolutionExpander.h:64

llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition ScalarEvolutionExpressions.h:291

llvm::SCEVNAryExpr::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition ScalarEvolutionExpressions.h:227

llvm::SCEVNAryExpr::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition ScalarEvolutionExpressions.h:231

llvm::SCEVNAryExpr::operands
ArrayRef< const SCEV * > operands() const
Definition ScalarEvolutionExpressions.h:219

llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition ScalarEvolutionExpressions.h:580

llvm::SCEV
This class represents an analyzed expression in the program.
Definition ScalarEvolution.h:72

llvm::SCEV::operands
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
Definition ScalarEvolution.cpp:417

llvm::SCEV::getExpressionSize
unsigned short getExpressionSize() const
Definition ScalarEvolution.h:170

llvm::SCEV::isZero
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
Definition ScalarEvolution.cpp:445

llvm::SCEV::getSCEVType
SCEVTypes getSCEVType() const
Definition ScalarEvolution.h:141

llvm::SCEV::getType
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Definition ScalarEvolution.cpp:383

llvm::SCEV::FlagAnyWrap
@ FlagAnyWrap
Definition ScalarEvolution.h:128

llvm::ScalarEvolutionWrapperPass
Definition ScalarEvolution.h:2392

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:448

llvm::ScalarEvolution::getBackedgeTakenCount
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
Definition ScalarEvolution.cpp:8390

llvm::ScalarEvolution::getZero
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
Definition ScalarEvolution.h:663

llvm::ScalarEvolution::getTypeSizeInBits
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
Definition ScalarEvolution.cpp:4490

llvm::ScalarEvolution::getConstant
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
Definition ScalarEvolution.cpp:470

llvm::ScalarEvolution::getSCEV
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition ScalarEvolution.cpp:4589

llvm::ScalarEvolution::getNoopOrSignExtend
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
Definition ScalarEvolution.cpp:4784

llvm::ScalarEvolution::isLoopInvariant
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition ScalarEvolution.cpp:14185

llvm::ScalarEvolution::getAddRecExpr
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
Definition ScalarEvolution.cpp:3684

llvm::ScalarEvolution::isSCEVable
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Definition ScalarEvolution.cpp:4483

llvm::ScalarEvolution::getEffectiveSCEVType
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
Definition ScalarEvolution.cpp:4500

llvm::ScalarEvolution::getMinusSCEV
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition ScalarEvolution.cpp:4697

llvm::ScalarEvolution::getAnyExtendExpr
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
Definition ScalarEvolution.cpp:2169

llvm::ScalarEvolution::containsUndefs
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
Definition ScalarEvolution.cpp:13662

llvm::ScalarEvolution::getSignExtendExpr
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
Definition ScalarEvolution.cpp:1890

llvm::ScalarEvolution::getVScale
LLVM_ABI const SCEV * getVScale(Type *Ty)
Definition ScalarEvolution.cpp:491

llvm::ScalarEvolution::hasComputableLoopEvolution
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
Definition ScalarEvolution.cpp:14189

llvm::ScalarEvolution::getPointerBase
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
Definition ScalarEvolution.cpp:4865

llvm::ScalarEvolution::getMulExpr
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Definition ScalarEvolution.cpp:3109

llvm::ScalarEvolution::getUnknown
LLVM_ABI const SCEV * getUnknown(Value *V)
Definition ScalarEvolution.cpp:4453

llvm::ScalarEvolution::computeConstantDifference
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
Definition ScalarEvolution.cpp:12148

llvm::ScalarEvolution::properlyDominates
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
Definition ScalarEvolution.cpp:14272

llvm::ScalarEvolution::getAddExpr
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition ScalarEvolution.cpp:2515

llvm::ScalarEvolution::containsErasedValue
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
Definition ScalarEvolution.cpp:13671

llvm::ScalarEvolution::getContext
LLVMContext & getContext() const
Definition ScalarEvolution.h:491

llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104

llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition SetVector.h:119

llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:109

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168

llvm::SmallBitVector::find_first
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition SmallBitVector.h:230

llvm::SmallBitVector::set
SmallBitVector & set()
Definition SmallBitVector.h:366

llvm::SmallBitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition SmallBitVector.h:183

llvm::SmallBitVector::find_next
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition SmallBitVector.h:277

llvm::SmallBitVector::size
size_type size() const
Returns the number of bits in this bitvector.
Definition SmallBitVector.h:195

llvm::SmallBitVector::resize
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
Definition SmallBitVector.h:332

llvm::SmallBitVector::count
size_type count() const
Returns the number of bits which are set.
Definition SmallBitVector.h:200

llvm::SmallBitVector::reset
SmallBitVector & reset()
Definition SmallBitVector.h:402

llvm::SmallPtrSetImplBase::clear
void clear()
Definition SmallPtrSet.h:102

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition SmallPtrSet.h:380

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition SmallPtrSet.h:470

llvm::SmallPtrSetImpl::insert_range
void insert_range(Range &&R)
Definition SmallPtrSet.h:490

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:401

llvm::SmallSet::clear
void clear()
Definition SmallSet.h:208

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:574

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:674

llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition SmallVector.h:705

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:938

llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition SmallVector.h:664

llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition SmallVector.h:738

llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition SmallVector.h:579

llvm::SmallVectorImpl::clear
void clear()
Definition SmallVector.h:611

llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition SmallVector.h:578

llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition SmallVector.h:639

llvm::SmallVectorTemplateBase::pop_back
void pop_back()
Definition SmallVector.h:426

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:414

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition SmallVector.h:270

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVectorTemplateCommon::front
reference front()
Definition SmallVector.h:300

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition SmallVector.h:268

llvm::SmallVectorTemplateCommon::back
reference back()
Definition SmallVector.h:309

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1197

llvm::StackOffset::get
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42

llvm::StoreInst
An instruction for storing to memory.
Definition Instructions.h:296

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:285

llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition TargetTransformInfo.h:2049

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:220

llvm::TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable
LLVM_ABI bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
Definition TargetTransformInfo.cpp:444

llvm::TargetTransformInfo::isLSRCostLess
LLVM_ABI bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Definition TargetTransformInfo.cpp:435

llvm::TargetTransformInfo::isIndexedStoreLegal
LLVM_ABI bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition TargetTransformInfo.cpp:1354

llvm::TargetTransformInfo::getRegisterClassForType
LLVM_ABI unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition TargetTransformInfo.cpp:771

llvm::TargetTransformInfo::isLegalAddressingMode
LLVM_ABI bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition TargetTransformInfo.cpp:425

llvm::TargetTransformInfo::isIndexedLoadLegal
LLVM_ABI bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition TargetTransformInfo.cpp:1349

llvm::TargetTransformInfo::isTypeLegal
LLVM_ABI bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition TargetTransformInfo.cpp:586

llvm::TargetTransformInfo::isLegalAddImmediate
LLVM_ABI bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition TargetTransformInfo.cpp:413

llvm::TargetTransformInfo::canSaveCmp
LLVM_ABI bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition TargetTransformInfo.cpp:456

llvm::TargetTransformInfo::getNumberOfRegisters
LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const
Definition TargetTransformInfo.cpp:762

llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition TargetTransformInfo.h:1774

llvm::TargetTransformInfo::canMacroFuseCmp
LLVM_ABI bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition TargetTransformInfo.cpp:452

llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
Definition TargetTransformInfo.h:801

llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Prefer post-indexed addressing mode.
Definition TargetTransformInfo.h:804

llvm::TargetTransformInfo::AMK_All
@ AMK_All
Consider all addressing modes.
Definition TargetTransformInfo.h:805

llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Prefer pre-indexed addressing mode.
Definition TargetTransformInfo.h:803

llvm::TargetTransformInfo::AMK_None
@ AMK_None
Don't prefer any addressing mode.
Definition TargetTransformInfo.h:802

llvm::TargetTransformInfo::isTruncateFree
LLVM_ABI bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition TargetTransformInfo.cpp:576

llvm::TruncInst
This class represents a truncation of integer types.
Definition Instructions.h:4555

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Type::isScalableTy
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267

llvm::Type::getPointerAddressSpace
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition DerivedTypes.h:771

llvm::Type::getInt8Ty
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128

llvm::Type::getFPMantissaWidth
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:236

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139

llvm::User
Definition User.h:44

llvm::User::op_iterator
Use * op_iterator
Definition User.h:279

llvm::User::operands
op_range operands()
Definition User.h:292

llvm::User::replaceUsesOfWith
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21

llvm::User::op_begin
op_iterator op_begin()
Definition User.h:284

llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition User.h:237

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:232

llvm::User::op_end
op_iterator op_end()
Definition User.h:286

llvm::ValueAsMetadata::get
static LLVM_ABI ValueAsMetadata * get(Value *V)
Definition Metadata.cpp:502

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546

llvm::Value::users
iterator_range< user_iterator > users()
Definition Value.h:426

llvm::Value::printAsOperand
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition AsmWriter.cpp:5307

llvm::Value::getContext
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101

llvm::Value::uses
iterator_range< use_iterator > uses()
Definition Value.h:380

llvm::WeakVH
A nullable Value handle that is nullable.
Definition ValueHandle.h:145

llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition CommandLine.h:400

llvm::cl::opt
Definition CommandLine.h:1429

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194

llvm::detail::DenseSetImpl::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:174

llvm::details::FixedOrScalableQuantity
Definition TypeSize.h:86

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:130

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

uint64_t

Changed
Changed
Definition ObjCARCOpts.cpp:2370

iterator_range.h
This provides a very simple, boring adaptor for a begin and end iterator into a range type.

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:239

llvm::AArch64::Fixups
Fixups
Definition AArch64FixupKinds.h:17

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:129

llvm::ARM_AM::add
@ add
Definition ARMAddressingModes.h:39

llvm::ARM::ProfileKind::M
@ M
Definition ARMTargetParser.h:171

llvm::COFF::Entry
@ Entry
Definition COFF.h:862

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::ISD::Constant
@ Constant
Definition ISDOpcodes.h:86

llvm::M68k::MemAddrModeKind::U
@ U
Definition M68kBaseInfo.h:61

llvm::M68k::MemAddrModeKind::V
@ V
Definition M68kBaseInfo.h:63

llvm::M68k::MemAddrModeKind::u
@ u
Definition M68kBaseInfo.h:60

llvm::M68k::MemAddrModeKind::f
@ f
Definition M68kBaseInfo.h:56

llvm::M68k::MemAddrModeKind::K
@ K
Definition M68kBaseInfo.h:68

llvm::M68k::MemAddrModeKind::L
@ L
Definition M68kBaseInfo.h:70

llvm::MCID::RegSequence
@ RegSequence
Definition MCInstrDesc.h:183

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:401

llvm::RISCVFenceField::O
@ O
Definition RISCVBaseInfo.h:400

llvm::SCEVPatternMatch
Definition ScalarEvolutionPatternMatch.h:19

llvm::SCEVPatternMatch::m_SCEVVScale
class_match< const SCEVVScale > m_SCEVVScale()
Definition ScalarEvolutionPatternMatch.h:67

llvm::SCEVPatternMatch::m_scev_APInt
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
Definition ScalarEvolutionPatternMatch.h:148

llvm::SCEVPatternMatch::m_SCEVConstant
class_match< const SCEVConstant > m_SCEVConstant()
Definition ScalarEvolutionPatternMatch.h:64

llvm::SCEVPatternMatch::m_scev_AffineAddRec
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
Definition ScalarEvolutionPatternMatch.h:255

llvm::SCEVPatternMatch::m_scev_Mul
SCEVBinaryExpr_match< SCEVMulExpr, Op0_t, Op1_t > m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1)
Definition ScalarEvolutionPatternMatch.h:214

llvm::SCEVPatternMatch::match
bool match(const SCEV *S, const Pattern &P)
Definition ScalarEvolutionPatternMatch.h:21

llvm::SCEVPatternMatch::m_Loop
class_match< const Loop > m_Loop()
Definition ScalarEvolutionPatternMatch.h:224

llvm::SCEVPatternMatch::m_scev_SpecificInt
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
Definition ScalarEvolutionPatternMatch.h:117

llvm::SCEVPatternMatch::m_SCEV
class_match< const SCEV > m_SCEV()
Definition ScalarEvolutionPatternMatch.h:63

llvm::SI
Definition SIInstrInfo.h:1720

llvm::SPII::Store
@ Store
Definition SparcInstrInfo.h:33

llvm::SystemZICMP::Any
@ Any
Definition SystemZISelLowering.h:414

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
Definition X86BaseInfo.h:109

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition CommandLine.h:712

llvm::cl::BOU_FALSE
@ BOU_FALSE
Definition CommandLine.h:638

llvm::cl::BOU_UNSET
@ BOU_UNSET
Definition CommandLine.h:638

llvm::cl::BOU_TRUE
@ BOU_TRUE
Definition CommandLine.h:638

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::codeview::FrameCookieKind::Copy
@ Copy
Definition CodeView.h:495

llvm::dwarf::DW_OP_LLVM_arg
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
Definition Dwarf.h:149

llvm::dwarf::DW_OP_LLVM_convert
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Definition Dwarf.h:145

llvm::lltok::Kind
Kind
Definition LLToken.h:18

llvm::logicalview::LVCompareKind::Types
@ Types
Definition LVOptions.h:139

llvm::logicalview::LVAttributeKind::Inserted
@ Inserted
Definition LVOptions.h:109

llvm::memprof::Meta::Start
@ Start
Definition MemProf.h:69

llvm::msgpack::Type::Map
@ Map
Definition MsgPackReader.h:63

llvm::numbers::e
constexpr double e
Definition MathExtras.h:47

llvm::objcarc::Sequence
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41

llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition OptimizationRemarkEmitter.h:139

llvm::pdb::DbgHeaderType::Max
@ Max
Definition RawConstants.h:98

llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390

llvm::rdf::Use
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm::sandboxir::end
iterator end() const
Definition BasicBlock.h:89

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm::sandboxir::begin
LLVM_ABI iterator begin() const

llvm::sframe::BaseReg
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77

llvm::telemetry::KindType
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
Definition Telemetry.h:85

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318

llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition SparseBitVector.h:874

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705

llvm::PseudoProbeType::Block
@ Block
Definition PseudoProbe.h:30

llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
Definition GCNRegPressure.cpp:237

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::Depth
@ Depth
Definition SIMachineScheduler.h:36

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649

llvm::salvageDebugInfo
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725

llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116

llvm::LoopSimplifyID
LLVM_ABI char & LoopSimplifyID
Definition LoopSimplify.cpp:784

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition AddressRanges.h:151

llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186

llvm::DomTreeNode
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95

llvm::LoopAnalysisManager
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
Definition LoopAnalysisManager.h:79

llvm::matchSimpleRecurrence
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
Definition ValueTracking.cpp:9158

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342

llvm::DeleteDeadPHIs
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
Definition BasicBlockUtils.cpp:163

llvm::initializeLoopStrengthReducePass
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408

llvm::denormalizeForPostIncUse
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
Definition ScalarEvolutionNormalization.cpp:120

llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition PointerIntPair.h:268

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::ConstantFoldCastOperand
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
Definition ConstantFolding.cpp:1482

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1123

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548

llvm::SplitLandingPadPredecessors
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
Definition BasicBlockUtils.cpp:1375

llvm::Key
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Definition PassManager.h:668

llvm::normalizeForPostIncUse
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
Definition ScalarEvolutionNormalization.cpp:97

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition raw_ostream.cpp:908

llvm::iterator_range
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:215

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:37

llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::createLoopStrengthReducePass
LLVM_ABI Pass * createLoopStrengthReducePass()
Definition LoopStrengthReduce.cpp:7131

llvm::SplitCriticalEdge
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
Definition BreakCriticalEdges.cpp:101

llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition Local.cpp:548

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:223

llvm::PseudoProbeReservedId::Last
@ Last
Definition PseudoProbe.h:28

llvm::formLCSSAForInstructions
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition LCSSA.cpp:308

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565

llvm::getLoopPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition LoopAnalysisManager.cpp:141

llvm::PostIncLoopSet
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
Definition ScalarEvolutionNormalization.h:49

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738

llvm::rewriteLoopExitValues
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
Definition LoopUtils.cpp:1658

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877

llvm::scAddRecExpr
@ scAddRecExpr
Definition ScalarEvolutionExpressions.h:49

llvm::scAddExpr
@ scAddExpr
Definition ScalarEvolutionExpressions.h:46

llvm::scVScale
@ scVScale
Definition ScalarEvolutionExpressions.h:42

llvm::scUnknown
@ scUnknown
Definition ScalarEvolutionExpressions.h:56

llvm::scConstant
@ scConstant
Definition ScalarEvolutionExpressions.h:41

llvm::scSignExtend
@ scSignExtend
Definition ScalarEvolutionExpressions.h:45

llvm::scTruncate
@ scTruncate
Definition ScalarEvolutionExpressions.h:43

llvm::scZeroExtend
@ scZeroExtend
Definition ScalarEvolutionExpressions.h:44

llvm::scMulExpr
@ scMulExpr
Definition ScalarEvolutionExpressions.h:47

llvm::UnusedIndVarInLoop
@ UnusedIndVarInLoop
Definition LoopUtils.h:520

llvm::InlinerFunctionImportStatsOpts::Basic
@ Basic
Definition ImportedFunctionsInliningStatistics.h:107

llvm::filterDbgVars
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Definition DebugProgramInstruction.h:562

llvm::SCEVExprContains
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Definition ScalarEvolutionExpressions.h:723

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853

raw_ostream.h

N
#define N

llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition TargetTransformInfo.h:99

llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition LoopAnalysisManager.h:54

llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition LoopAnalysisManager.h:59

llvm::LoopStandardAnalysisResults::MSSA
MemorySSA * MSSA
Definition LoopAnalysisManager.h:64

llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition LoopAnalysisManager.h:61

llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition LoopAnalysisManager.h:56

llvm::LoopStandardAnalysisResults::TLI
TargetLibraryInfo & TLI
Definition LoopAnalysisManager.h:60

llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition LoopAnalysisManager.h:58

llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition LoopAnalysisManager.h:57

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition TargetTransformInfo.h:74

llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition TargetTransformInfo.h:79

llvm::cl::desc
Definition CommandLine.h:410