132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc(
"LSR search space complexity limit"));
187 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
191 cl::desc(
"Attempt to drop solution if it is less profitable"));
195 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
199 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
205 cl::desc(
"Stress test LSR IV chains"));
215 std::numeric_limits<unsigned>::max();
217 Type *MemTy =
nullptr;
220 MemAccessTy() =
default;
221 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
224 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
242 SmallBitVector UsedByIndices;
244 void print(raw_ostream &OS)
const;
251 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
254 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(
V) {}
258 constexpr Immediate() =
delete;
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal,
false};
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal,
true};
266 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
267 return {MinVal, Scalable};
269 static constexpr Immediate getZero() {
return {0,
false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(),
false};
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(),
false};
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(),
true};
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(),
true};
283 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
285 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
287 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
288 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
291 constexpr bool isMin()
const {
292 return Quantity == std::numeric_limits<ScalarTy>::min();
295 constexpr bool isMax()
const {
296 return Quantity == std::numeric_limits<ScalarTy>::max();
300 constexpr Immediate addUnsigned(
const Immediate &
RHS)
const {
301 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
302 ScalarTy
Value = (uint64_t)Quantity +
RHS.getKnownMinValue();
303 return {
Value, Scalable ||
RHS.isScalable()};
306 constexpr Immediate subUnsigned(
const Immediate &
RHS)
const {
307 assert(isCompatibleImmediate(
RHS) &&
"Incompatible Immediates");
308 ScalarTy
Value = (uint64_t)Quantity -
RHS.getKnownMinValue();
309 return {
Value, Scalable ||
RHS.isScalable()};
313 constexpr Immediate mulUnsigned(
const ScalarTy
RHS)
const {
314 ScalarTy
Value = (uint64_t)Quantity *
RHS;
315 return {
Value, Scalable};
319 const SCEV *getSCEV(ScalarEvolution &SE,
Type *Ty)
const {
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE,
Type *Ty)
const {
327 const SCEV *NegS = SE.
getConstant(Ty, -(uint64_t)Quantity);
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE,
Type *Ty)
const {
345struct KeyOrderTargetImmediate {
346 bool operator()(
const Immediate &
LHS,
const Immediate &
RHS)
const {
347 if (
LHS.isScalable() && !
RHS.isScalable())
349 if (!
LHS.isScalable() &&
RHS.isScalable())
351 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
358struct KeyOrderSizeTAndImmediate {
359 bool operator()(
const std::pair<size_t, Immediate> &
LHS,
360 const std::pair<size_t, Immediate> &
RHS)
const {
361 size_t LSize =
LHS.first;
362 size_t RSize =
RHS.first;
364 return LSize < RSize;
365 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
372 OS <<
"[NumUses=" << UsedByIndices.
count() <<
']';
384 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
386 RegUsesTy RegUsesMap;
390 void countRegister(
const SCEV *
Reg,
size_t LUIdx);
391 void dropRegister(
const SCEV *
Reg,
size_t LUIdx);
392 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
394 bool isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const;
396 const SmallBitVector &getUsedByIndices(
const SCEV *
Reg)
const;
412RegUseTracker::countRegister(
const SCEV *
Reg,
size_t LUIdx) {
413 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(
Reg);
414 RegSortData &RSD = Pair.first->second;
417 RSD.UsedByIndices.
resize(std::max(RSD.UsedByIndices.
size(), LUIdx + 1));
418 RSD.UsedByIndices.
set(LUIdx);
422RegUseTracker::dropRegister(
const SCEV *
Reg,
size_t LUIdx) {
423 RegUsesTy::iterator It = RegUsesMap.find(
Reg);
424 assert(It != RegUsesMap.end());
425 RegSortData &RSD = It->second;
427 RSD.UsedByIndices.
reset(LUIdx);
431RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
432 assert(LUIdx <= LastLUIdx);
436 for (
auto &Pair : RegUsesMap) {
437 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
438 if (LUIdx < UsedByIndices.
size())
439 UsedByIndices[LUIdx] =
440 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
441 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
446RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *
Reg,
size_t LUIdx)
const {
447 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
448 if (
I == RegUsesMap.end())
450 const SmallBitVector &UsedByIndices =
I->second.UsedByIndices;
452 if (i == -1)
return false;
453 if ((
size_t)i != LUIdx)
return true;
457const SmallBitVector &RegUseTracker::getUsedByIndices(
const SCEV *
Reg)
const {
458 RegUsesTy::const_iterator
I = RegUsesMap.find(
Reg);
459 assert(
I != RegUsesMap.end() &&
"Unknown register!");
460 return I->second.UsedByIndices;
463void RegUseTracker::clear() {
474 GlobalValue *BaseGV =
nullptr;
477 Immediate BaseOffset = Immediate::getZero();
480 bool HasBaseReg =
false;
503 const SCEV *ScaledReg =
nullptr;
508 Immediate UnfoldedOffset = Immediate::getZero();
512 void initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE);
516 void canonicalize(
const Loop &L);
520 bool hasZeroEnd()
const;
522 bool countsDownToZero()
const;
524 size_t getNumRegs()
const;
527 void deleteBaseReg(
const SCEV *&S);
529 bool referencesReg(
const SCEV *S)
const;
530 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
531 const RegUseTracker &RegUses)
const;
533 void print(raw_ostream &OS)
const;
552 for (
const SCEV *S :
Add->operands())
558 const SCEV *Start, *Step;
573 if (
Mul->getOperand(0)->isAllOnesValue()) {
582 for (
const SCEV *S : MyGood)
584 for (
const SCEV *S : MyBad)
596void Formula::initialMatch(
const SCEV *S, Loop *L, ScalarEvolution &SE) {
603 BaseRegs.push_back(Sum);
609 BaseRegs.push_back(Sum);
624bool Formula::isCanonical(
const Loop &L)
const {
625 assert((Scale == 0 || ScaledReg) &&
626 "ScaledReg must be non-null if Scale is non-zero");
629 return BaseRegs.size() <= 1;
634 if (Scale == 1 && BaseRegs.empty())
643 return none_of(BaseRegs, [&L](
const SCEV *S) {
654void Formula::canonicalize(
const Loop &L) {
658 if (BaseRegs.empty()) {
660 assert(ScaledReg &&
"Expected 1*reg => reg");
661 assert(Scale == 1 &&
"Expected 1*reg => reg");
662 BaseRegs.push_back(ScaledReg);
670 ScaledReg = BaseRegs.pop_back_val();
678 auto I =
find_if(BaseRegs, [&L](
const SCEV *S) {
681 if (
I != BaseRegs.end())
691bool Formula::unscale() {
695 BaseRegs.push_back(ScaledReg);
700bool Formula::hasZeroEnd()
const {
701 if (UnfoldedOffset || BaseOffset)
703 if (BaseRegs.size() != 1 || ScaledReg)
708bool Formula::countsDownToZero()
const {
711 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
712 const APInt *StepInt;
720size_t Formula::getNumRegs()
const {
721 return !!ScaledReg + BaseRegs.size();
726Type *Formula::getType()
const {
727 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
728 ScaledReg ? ScaledReg->
getType() :
734void Formula::deleteBaseReg(
const SCEV *&S) {
735 if (&S != &BaseRegs.back())
741bool Formula::referencesReg(
const SCEV *S)
const {
747bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
748 const RegUseTracker &RegUses)
const {
750 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
752 for (
const SCEV *BaseReg : BaseRegs)
753 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
758#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
759void Formula::print(raw_ostream &OS)
const {
765 if (BaseOffset.isNonZero()) {
769 for (
const SCEV *BaseReg : BaseRegs) {
771 OS <<
"reg(" << *
BaseReg <<
')';
773 if (HasBaseReg && BaseRegs.empty()) {
775 OS <<
"**error: HasBaseReg**";
776 }
else if (!HasBaseReg && !BaseRegs.empty()) {
778 OS <<
"**error: !HasBaseReg**";
782 OS << Scale <<
"*reg(";
789 if (UnfoldedOffset.isNonZero()) {
790 if (!
First) OS <<
" + ";
791 OS <<
"imm(" << UnfoldedOffset <<
')';
832 bool IgnoreSignificantBits =
false) {
843 if (
RA.isAllOnes()) {
844 if (
LHS->getType()->isPointerTy())
857 const APInt &LA =
C->getAPInt();
866 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
868 IgnoreSignificantBits);
869 if (!Step)
return nullptr;
871 IgnoreSignificantBits);
872 if (!Start)
return nullptr;
885 for (
const SCEV *S :
Add->operands()) {
887 if (!
Op)
return nullptr;
915 for (
const SCEV *S :
Mul->operands()) {
918 IgnoreSignificantBits)) {
938 if (
C->getSignificantBits() <= 64) {
940 return Immediate::getFixed(
C->getSExtValue());
945 if (Result.isNonZero())
951 if (Result.isNonZero())
959 return Immediate::getScalable(
C->getSExtValue());
961 return Immediate::getZero();
996 if (
SI->getPointerOperand() == OperandVal)
1001 switch (
II->getIntrinsicID()) {
1002 case Intrinsic::memset:
1003 case Intrinsic::prefetch:
1004 case Intrinsic::masked_load:
1005 if (
II->getArgOperand(0) == OperandVal)
1008 case Intrinsic::masked_store:
1009 if (
II->getArgOperand(1) == OperandVal)
1012 case Intrinsic::memmove:
1013 case Intrinsic::memcpy:
1014 if (
II->getArgOperand(0) == OperandVal ||
1015 II->getArgOperand(1) == OperandVal)
1020 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo)) {
1021 if (IntrInfo.
PtrVal == OperandVal)
1027 if (RMW->getPointerOperand() == OperandVal)
1030 if (CmpX->getPointerOperand() == OperandVal)
1039 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1043 AccessTy.MemTy = Ty;
1047 AccessTy.AddrSpace =
SI->getPointerAddressSpace();
1049 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1051 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1053 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1055 switch (
II->getIntrinsicID()) {
1056 case Intrinsic::prefetch:
1057 case Intrinsic::memset:
1058 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1059 AccessTy.MemTy = OperandVal->
getType();
1061 case Intrinsic::memmove:
1062 case Intrinsic::memcpy:
1064 AccessTy.MemTy = OperandVal->
getType();
1066 case Intrinsic::masked_load:
1067 AccessTy.AddrSpace =
1068 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1070 case Intrinsic::masked_store:
1071 AccessTy.AddrSpace =
1072 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1076 if (
TTI.getTgtMemIntrinsic(
II, IntrInfo) && IntrInfo.
PtrVal) {
1132 if (!Processed.
insert(S).second)
1136 for (
const SCEV *S :
Add->operands()) {
1143 const SCEV *Op0, *Op1;
1152 Value *UVal = U->getValue();
1156 if (UI && UI->
getOpcode() == Instruction::Mul &&
1189 const LSRUse &LU,
const Formula &
F);
1193 const LSRUse &LU,
const Formula &
F,
1200 const Loop *
L =
nullptr;
1201 ScalarEvolution *SE =
nullptr;
1202 const TargetTransformInfo *
TTI =
nullptr;
1203 TargetTransformInfo::LSRCost
C;
1208 Cost(
const Loop *L, ScalarEvolution &SE,
const TargetTransformInfo &
TTI,
1210 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1228 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1229 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1230 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1231 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0
u);
1237 return C.NumRegs == ~0
u;
1240 void RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1241 const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
1242 bool HardwareLoopProfitable,
1243 SmallPtrSetImpl<const SCEV *> *LoserRegs =
nullptr);
1245 void print(raw_ostream &OS)
const;
1249 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1250 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1251 bool HardwareLoopProfitable);
1252 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1253 SmallPtrSetImpl<const SCEV *> &Regs,
1254 const LSRUse &LU,
bool HardwareLoopProfitable,
1255 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1266 Value *OperandValToReplace =
nullptr;
1276 Immediate
Offset = Immediate::getZero();
1278 LSRFixup() =
default;
1280 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1282 void print(raw_ostream &OS)
const;
1292 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1305 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1308 MemAccessTy AccessTy;
1314 Immediate MinOffset = Immediate::getFixedMax();
1315 Immediate MaxOffset = Immediate::getFixedMin();
1319 bool AllFixupsOutsideLoop =
true;
1326 bool RigidFormula =
false;
1332 Type *WidestFixupType =
nullptr;
1340 SmallPtrSet<const SCEV *, 4> Regs;
1342 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1344 LSRFixup &getNewFixup() {
1345 Fixups.push_back(LSRFixup());
1349 void pushFixup(LSRFixup &f) {
1351 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1352 MaxOffset =
f.Offset;
1353 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1354 MinOffset =
f.Offset;
1357 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1358 float getNotSelectedProbability(
const SCEV *
Reg)
const;
1359 bool InsertFormula(
const Formula &
F,
const Loop &L);
1360 void DeleteFormula(Formula &
F);
1361 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1363 void print(raw_ostream &OS)
const;
1370 LSRUse::KindType Kind, MemAccessTy AccessTy,
1371 GlobalValue *BaseGV, Immediate BaseOffset,
1372 bool HasBaseReg, int64_t Scale,
1373 Instruction *
Fixup =
nullptr);
1386 [&](
unsigned i,
const SCEV *
Reg) {
1387 return i + getSetupCost(Reg, Depth - 1);
1396void Cost::RateRegister(
const Formula &
F,
const SCEV *
Reg,
1397 SmallPtrSetImpl<const SCEV *> &Regs,
const LSRUse &LU,
1398 bool HardwareLoopProfitable) {
1403 if (AR->getLoop() != L) {
1410 if (!AR->getLoop()->contains(L)) {
1420 unsigned LoopCost = 1;
1424 const SCEVConstant *Step;
1429 Step->
getAPInt() ==
F.BaseOffset.getFixedValue()) ||
1436 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1437 HardwareLoopProfitable)
1439 C.AddRecCost += LoopCost;
1444 if (!Regs.
count(AR->getOperand(1))) {
1445 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1457 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1466void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1467 SmallPtrSetImpl<const SCEV *> &Regs,
1468 const LSRUse &LU,
bool HardwareLoopProfitable,
1469 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1470 if (LoserRegs && LoserRegs->
count(
Reg)) {
1475 RateRegister(
F,
Reg, Regs, LU, HardwareLoopProfitable);
1476 if (LoserRegs && isLoser())
1481void Cost::RateFormula(
const Formula &
F, SmallPtrSetImpl<const SCEV *> &Regs,
1482 const DenseSet<const SCEV *> &VisitedRegs,
1483 const LSRUse &LU,
bool HardwareLoopProfitable,
1484 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1487 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1489 unsigned PrevAddRecCost =
C.AddRecCost;
1490 unsigned PrevNumRegs =
C.NumRegs;
1491 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1492 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1493 if (VisitedRegs.
count(ScaledReg)) {
1497 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1502 for (
const SCEV *BaseReg :
F.BaseRegs) {
1503 if (VisitedRegs.
count(BaseReg)) {
1507 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1514 size_t NumBaseParts =
F.getNumRegs();
1515 if (NumBaseParts > 1)
1520 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1526 for (
const LSRFixup &
Fixup : LU.Fixups) {
1527 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1528 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1532 else if (
Offset.isNonZero())
1534 APInt(64,
Offset.getKnownMinValue(),
true).getSignificantBits();
1538 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1559 if (
C.NumRegs > TTIRegNum) {
1562 if (PrevNumRegs > TTIRegNum)
1563 C.Insns += (
C.NumRegs - PrevNumRegs);
1565 C.Insns += (
C.NumRegs - TTIRegNum);
1578 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1582 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1585 if (LU.Kind != LSRUse::ICmpZero)
1586 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1592 C.Insns = std::numeric_limits<unsigned>::max();
1593 C.NumRegs = std::numeric_limits<unsigned>::max();
1594 C.AddRecCost = std::numeric_limits<unsigned>::max();
1595 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1596 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1597 C.ImmCost = std::numeric_limits<unsigned>::max();
1598 C.SetupCost = std::numeric_limits<unsigned>::max();
1599 C.ScaleCost = std::numeric_limits<unsigned>::max();
1603bool Cost::isLess(
const Cost &
Other)
const {
1605 C.Insns !=
Other.C.Insns)
1606 return C.Insns <
Other.C.Insns;
1610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1613 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1614 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1615 if (
C.AddRecCost != 0)
1616 OS <<
", with addrec cost " <<
C.AddRecCost;
1617 if (
C.NumIVMuls != 0)
1618 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1619 << (
C.NumIVMuls == 1 ?
"" :
"s");
1620 if (
C.NumBaseAdds != 0)
1621 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1622 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1623 if (
C.ScaleCost != 0)
1624 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1626 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1627 if (
C.SetupCost != 0)
1628 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1637bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1640 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1641 if (PN->getIncomingValue(i) == OperandValToReplace &&
1642 L->contains(PN->getIncomingBlock(i)))
1647 return !
L->contains(UserInst);
1650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1651void LSRFixup::print(raw_ostream &OS)
const {
1656 Store->getOperand(0)->printAsOperand(OS,
false);
1662 OS <<
", OperandValToReplace=";
1665 for (
const Loop *PIL : PostIncLoops) {
1666 OS <<
", PostIncLoop=";
1667 PIL->getHeader()->printAsOperand(OS,
false);
1671 OS <<
", Offset=" <<
Offset;
1681bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1683 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1690float LSRUse::getNotSelectedProbability(
const SCEV *
Reg)
const {
1692 for (
const Formula &
F : Formulae)
1693 if (
F.referencesReg(
Reg))
1695 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1700bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1701 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1703 if (!Formulae.empty() && RigidFormula)
1707 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1715 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1716 "Zero allocated in a scaled register!");
1718 for (
const SCEV *BaseReg :
F.BaseRegs)
1719 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1723 Formulae.push_back(
F);
1734void LSRUse::DeleteFormula(Formula &
F) {
1735 if (&
F != &Formulae.back())
1737 Formulae.pop_back();
1741void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1743 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1745 for (
const Formula &
F : Formulae) {
1746 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1751 for (
const SCEV *S : OldRegs)
1753 RegUses.dropRegister(S, LUIdx);
1756#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1757void LSRUse::print(raw_ostream &OS)
const {
1758 OS <<
"LSR Use: Kind=";
1760 case Basic: OS <<
"Basic";
break;
1761 case Special: OS <<
"Special";
break;
1762 case ICmpZero: OS <<
"ICmpZero";
break;
1764 OS <<
"Address of ";
1768 OS << *AccessTy.MemTy;
1771 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1774 OS <<
", Offsets={";
1775 bool NeedComma =
false;
1776 for (
const LSRFixup &
Fixup : Fixups) {
1777 if (NeedComma) OS <<
',';
1783 if (AllFixupsOutsideLoop)
1784 OS <<
", all-fixups-outside-loop";
1786 if (WidestFixupType)
1787 OS <<
", widest fixup type: " << *WidestFixupType;
1796 LSRUse::KindType Kind, MemAccessTy AccessTy,
1798 bool HasBaseReg, int64_t Scale,
1801 case LSRUse::Address: {
1802 int64_t FixedOffset =
1803 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1804 int64_t ScalableOffset =
1805 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1806 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1807 HasBaseReg, Scale, AccessTy.AddrSpace,
1808 Fixup, ScalableOffset);
1810 case LSRUse::ICmpZero:
1817 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1822 if (Scale != 0 && Scale != -1)
1827 if (BaseOffset.isNonZero()) {
1830 if (BaseOffset.isScalable())
1840 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1841 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1849 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1851 case LSRUse::Special:
1853 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1860 Immediate MinOffset, Immediate MaxOffset,
1861 LSRUse::KindType Kind, MemAccessTy AccessTy,
1863 bool HasBaseReg, int64_t Scale) {
1864 if (BaseOffset.isNonZero() &&
1865 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1866 BaseOffset.isScalable() != MaxOffset.isScalable()))
1869 int64_t
Base = BaseOffset.getKnownMinValue();
1870 int64_t Min = MinOffset.getKnownMinValue();
1871 int64_t Max = MaxOffset.getKnownMinValue();
1874 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1877 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1880 HasBaseReg, Scale) &&
1886 Immediate MinOffset, Immediate MaxOffset,
1887 LSRUse::KindType Kind, MemAccessTy AccessTy,
1888 const Formula &
F,
const Loop &L) {
1896 assert((
F.isCanonical(L) ||
F.Scale != 0));
1898 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1903 Immediate MaxOffset, LSRUse::KindType Kind,
1905 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1908 BaseOffset, HasBaseReg, Scale) ||
1913 BaseGV, BaseOffset,
true, 0));
1917 Immediate MaxOffset, LSRUse::KindType Kind,
1918 MemAccessTy AccessTy,
const Formula &
F) {
1919 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1920 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1926 return TTI.isLegalAddScalableImmediate(
Offset.getKnownMinValue());
1928 return TTI.isLegalAddImmediate(
Offset.getFixedValue());
1932 const LSRUse &LU,
const Formula &
F) {
1934 if (LU.Kind == LSRUse::Address &&
TTI.LSRWithInstrQueries()) {
1935 for (
const LSRFixup &
Fixup : LU.Fixups)
1937 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1938 F.Scale,
Fixup.UserInst))
1944 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1949 const LSRUse &LU,
const Formula &
F,
1958 return F.Scale != 1;
1961 case LSRUse::Address: {
1963 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1964 if (
F.BaseOffset.isScalable()) {
1965 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1966 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1968 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1969 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1973 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1976 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1979 "Legal addressing mode has an illegal cost!");
1980 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1982 case LSRUse::ICmpZero:
1984 case LSRUse::Special:
1994 LSRUse::KindType Kind, MemAccessTy AccessTy,
1998 if (BaseOffset.isZero() && !BaseGV)
2003 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2007 if (!HasBaseReg && Scale == 1) {
2017 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2027 Immediate MaxOffset, LSRUse::KindType Kind,
2028 MemAccessTy AccessTy,
const SCEV *S,
2031 if (S->
isZero())
return true;
2039 if (!S->
isZero())
return false;
2042 if (BaseOffset.isZero() && !BaseGV)
2045 if (BaseOffset.isScalable())
2050 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2053 BaseOffset, HasBaseReg, Scale);
2070 const SCEV *IncExpr;
2072 IVInc(Instruction *U,
Value *O,
const SCEV *
E)
2073 : UserInst(
U), IVOperand(
O), IncExpr(
E) {}
2080 const SCEV *ExprBase =
nullptr;
2082 IVChain() =
default;
2083 IVChain(
const IVInc &Head,
const SCEV *
Base)
2084 : Incs(1, Head), ExprBase(
Base) {}
2089 const_iterator
begin()
const {
2091 return std::next(Incs.
begin());
2093 const_iterator
end()
const {
2098 bool hasIncs()
const {
return Incs.
size() >= 2; }
2107 bool isProfitableIncrement(
const SCEV *OperExpr,
2108 const SCEV *IncExpr,
2116 SmallPtrSet<Instruction*, 4> FarUsers;
2117 SmallPtrSet<Instruction*, 4> NearUsers;
2123 ScalarEvolution &SE;
2126 AssumptionCache &AC;
2127 TargetLibraryInfo &TLI;
2128 const TargetTransformInfo &
TTI;
2130 MemorySSAUpdater *MSSAU;
2134 bool HardwareLoopProfitable =
false;
2148 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2155 SmallSetVector<Type *, 4> Types;
2161 RegUseTracker RegUses;
2166 static const unsigned MaxChains = 8;
2172 SmallPtrSet<Use*, MaxChains> IVIncSet;
2175 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2181 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2183 void OptimizeShadowIV();
2184 bool FindIVUserForCond(ICmpInst *
Cond, IVStrideUse *&CondUse);
2185 ICmpInst *OptimizeMax(ICmpInst *
Cond, IVStrideUse* &CondUse);
2186 void OptimizeLoopTermCond();
2188 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2189 SmallVectorImpl<ChainUsers> &ChainUsersVec);
2190 void FinalizeChain(IVChain &Chain);
2191 void CollectChains();
2192 void GenerateIVChain(
const IVChain &Chain,
2193 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2195 void CollectInterestingTypesAndFactors();
2196 void CollectFixupsAndInitialFormulae();
2199 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2202 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2203 LSRUse::KindType Kind, MemAccessTy AccessTy);
2205 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2206 MemAccessTy AccessTy);
2208 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2210 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2212 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2213 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2214 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2215 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2217 void CollectLoopInvariantFixupsAndFormulae();
2219 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2220 unsigned Depth = 0);
2222 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2224 size_t Idx,
bool IsScaledReg =
false);
2225 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2226 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2227 const Formula &
Base,
size_t Idx,
2228 bool IsScaledReg =
false);
2229 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2230 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2231 const Formula &
Base,
2232 const SmallVectorImpl<Immediate> &Worklist,
2233 size_t Idx,
bool IsScaledReg =
false);
2234 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2235 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2236 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2237 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2238 void GenerateCrossUseConstantOffsets();
2239 void GenerateAllReuseFormulae();
2241 void FilterOutUndesirableDedicatedRegisters();
2243 size_t EstimateSearchSpaceComplexity()
const;
2244 void NarrowSearchSpaceByDetectingSupersets();
2245 void NarrowSearchSpaceByCollapsingUnrolledCode();
2246 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2247 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2248 void NarrowSearchSpaceByFilterPostInc();
2249 void NarrowSearchSpaceByDeletingCostlyFormulas();
2250 void NarrowSearchSpaceByPickingWinnerRegs();
2251 void NarrowSearchSpaceUsingHeuristics();
2253 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2255 SmallVectorImpl<const Formula *> &Workspace,
2256 const Cost &CurCost,
2257 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2258 DenseSet<const SCEV *> &VisitedRegs)
const;
2259 void Solve(SmallVectorImpl<const Formula *> &Solution)
const;
2263 const SmallVectorImpl<Instruction *> &Inputs)
const;
2266 const LSRUse &LU)
const;
2268 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2270 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const;
2271 void RewriteForPHI(PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2273 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2274 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2275 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2276 void ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution);
2279 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2280 LoopInfo &LI,
const TargetTransformInfo &
TTI, AssumptionCache &AC,
2281 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2283 bool getChanged()
const {
return Changed; }
2284 const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs()
const {
2285 return ScalarEvolutionIVs;
2288 void print_factors_and_types(raw_ostream &OS)
const;
2289 void print_fixups(raw_ostream &OS)
const;
2290 void print_uses(raw_ostream &OS)
const;
2291 void print(raw_ostream &OS)
const;
2299void LSRInstance::OptimizeShadowIV() {
2309 Type *DestTy =
nullptr;
2310 bool IsSigned =
false;
2326 DestTy = UCast->getDestTy();
2330 DestTy = SCast->getDestTy();
2332 if (!DestTy)
continue;
2352 if (Mantissa == -1)
continue;
2356 unsigned Entry, Latch;
2366 if (!Init)
continue;
2367 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2371 BinaryOperator *Incr =
2373 if (!Incr)
continue;
2374 if (Incr->
getOpcode() != Instruction::Add
2375 && Incr->
getOpcode() != Instruction::Sub)
2379 ConstantInt *
C =
nullptr;
2391 if (!
C->getValue().isStrictlyPositive())
2399 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2401 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2402 : Instruction::FSub,
2419bool LSRInstance::FindIVUserForCond(ICmpInst *
Cond, IVStrideUse *&CondUse) {
2420 for (IVStrideUse &U : IU)
2421 if (
U.getUser() ==
Cond) {
2479ICmpInst *LSRInstance::OptimizeMax(ICmpInst *
Cond, IVStrideUse* &CondUse) {
2494 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2495 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2501 const SCEVNAryExpr *
Max =
nullptr;
2503 Pred = ICmpInst::ICMP_SLE;
2506 Pred = ICmpInst::ICMP_SLT;
2509 Pred = ICmpInst::ICMP_ULT;
2518 if (
Max->getNumOperands() != 2)
2521 const SCEV *MaxLHS =
Max->getOperand(0);
2522 const SCEV *MaxRHS =
Max->getOperand(1);
2527 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2538 "Loop condition operand is an addrec in a different loop!");
2542 Value *NewRHS =
nullptr;
2543 if (ICmpInst::isTrueWhenEqual(Pred)) {
2547 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2548 NewRHS = BO->getOperand(0);
2551 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2552 NewRHS = BO->getOperand(0);
2560 NewRHS = SU->getValue();
2572 ICmpInst *NewCond =
new ICmpInst(
Cond->getIterator(), Pred,
2573 Cond->getOperand(0), NewRHS,
"scmp");
2577 Cond->replaceAllUsesWith(NewCond);
2580 Cond->eraseFromParent();
2582 if (
Cmp->use_empty()) {
2584 Cmp->eraseFromParent();
2591LSRInstance::OptimizeLoopTermCond() {
2592 SmallPtrSet<Instruction *, 4> PostIncs;
2607 SmallVector<BasicBlock*, 8> ExitingBlocks;
2608 L->getExitingBlocks(ExitingBlocks);
2616 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2630 IVStrideUse *CondUse =
nullptr;
2632 if (!FindIVUserForCond(
Cond, CondUse))
2646 if (!DT.
dominates(ExitingBlock, LatchBlock))
2651 if (LatchBlock != ExitingBlock)
2652 for (
const IVStrideUse &UI : IU)
2655 if (&UI != CondUse &&
2659 const SCEV *
A = IU.getStride(*CondUse, L);
2660 const SCEV *
B = IU.getStride(UI, L);
2661 if (!
A || !
B)
continue;
2670 if (
const SCEVConstant *
D =
2672 const ConstantInt *
C =
D->getValue();
2674 if (
C->isOne() ||
C->isMinusOne())
2675 goto decline_post_inc;
2677 if (
C->getValue().getSignificantBits() >= 64 ||
2678 C->getValue().isMinSignedValue())
2679 goto decline_post_inc;
2682 MemAccessTy AccessTy =
2684 int64_t Scale =
C->getSExtValue();
2688 AccessTy.AddrSpace))
2689 goto decline_post_inc;
2694 AccessTy.AddrSpace))
2695 goto decline_post_inc;
2700 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2706 if (
Cond->getNextNode() != TermBr) {
2707 if (
Cond->hasOneUse()) {
2711 ICmpInst *OldCond =
Cond;
2713 Cond->setName(
L->getHeader()->getName() +
".termcond");
2735 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2736 for (Instruction *Inst : PostIncs)
2742bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2743 bool HasBaseReg, LSRUse::KindType Kind,
2744 MemAccessTy AccessTy) {
2745 Immediate NewMinOffset = LU.MinOffset;
2746 Immediate NewMaxOffset = LU.MaxOffset;
2747 MemAccessTy NewAccessTy = AccessTy;
2752 if (LU.Kind != Kind)
2758 if (Kind == LSRUse::Address) {
2759 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2760 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->
getContext(),
2761 AccessTy.AddrSpace);
2766 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2768 LU.MaxOffset - NewOffset, HasBaseReg))
2770 NewMinOffset = NewOffset;
2771 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2773 NewOffset - LU.MinOffset, HasBaseReg))
2775 NewMaxOffset = NewOffset;
2781 if (NewAccessTy.MemTy && NewAccessTy.MemTy->
isVoidTy() &&
2782 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2786 LU.MinOffset = NewMinOffset;
2787 LU.MaxOffset = NewMaxOffset;
2788 LU.AccessTy = NewAccessTy;
2795std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2796 LSRUse::KindType Kind,
2797 MemAccessTy AccessTy) {
2798 const SCEV *
Copy = Expr;
2805 Offset = Immediate::getFixed(0);
2808 std::pair<UseMapTy::iterator, bool>
P =
2809 UseMap.
try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2812 size_t LUIdx =
P.first->second;
2813 LSRUse &LU =
Uses[LUIdx];
2814 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2816 return std::make_pair(LUIdx,
Offset);
2820 size_t LUIdx =
Uses.size();
2821 P.first->second = LUIdx;
2822 Uses.push_back(LSRUse(Kind, AccessTy));
2823 LSRUse &LU =
Uses[LUIdx];
2827 return std::make_pair(LUIdx,
Offset);
2831void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2832 if (&LU != &
Uses.back())
2837 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2843LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2844 const LSRUse &OrigLU) {
2846 for (LSRUse &LU :
Uses) {
2852 if (&LU != &OrigLU &&
2853 LU.Kind != LSRUse::ICmpZero &&
2854 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2855 LU.WidestFixupType == OrigLU.WidestFixupType &&
2856 LU.HasFormulaWithSameRegs(OrigF)) {
2858 for (
const Formula &
F : LU.Formulae) {
2861 if (
F.BaseRegs == OrigF.BaseRegs &&
2862 F.ScaledReg == OrigF.ScaledReg &&
2863 F.BaseGV == OrigF.BaseGV &&
2864 F.Scale == OrigF.Scale &&
2865 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2866 if (
F.BaseOffset.isZero())
2881void LSRInstance::CollectInterestingTypesAndFactors() {
2882 SmallSetVector<const SCEV *, 4> Strides;
2886 for (
const IVStrideUse &U : IU) {
2887 const SCEV *Expr = IU.getExpr(U);
2905 }
while (!Worklist.
empty());
2909 for (SmallSetVector<const SCEV *, 4>::const_iterator
2911 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2912 std::next(
I); NewStrideIter !=
E; ++NewStrideIter) {
2913 const SCEV *OldStride = *
I;
2914 const SCEV *NewStride = *NewStrideIter;
2924 if (
const SCEVConstant *Factor =
2927 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2928 Factors.insert(Factor->getAPInt().getSExtValue());
2929 }
else if (
const SCEVConstant *Factor =
2933 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2934 Factors.insert(Factor->getAPInt().getSExtValue());
2940 if (Types.size() == 1)
2952 for(; OI != OE; ++OI) {
2971 return Trunc->getOperand(0);
3004 if (SubExpr->getSCEVType() ==
scAddExpr)
3007 if (SubExpr->getSCEVType() !=
scMulExpr)
3023bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3024 const SCEV *IncExpr,
3025 ScalarEvolution &SE) {
3038 SmallPtrSet<const SCEV*, 8> Processed;
3059 if (!Chain.hasIncs())
3062 if (!
Users.empty()) {
3063 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3065 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3068 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3077 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3080 const SCEV *LastIncExpr =
nullptr;
3081 unsigned NumConstIncrements = 0;
3082 unsigned NumVarIncrements = 0;
3083 unsigned NumReusedIncrements = 0;
3085 if (
TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3088 for (
const IVInc &Inc : Chain) {
3089 if (
TTI.isProfitableLSRChainElement(Inc.UserInst))
3091 if (Inc.IncExpr->isZero())
3097 ++NumConstIncrements;
3101 if (Inc.IncExpr == LastIncExpr)
3102 ++NumReusedIncrements;
3106 LastIncExpr = Inc.IncExpr;
3111 if (NumConstIncrements > 1)
3118 cost += NumVarIncrements;
3122 cost -= NumReusedIncrements;
3124 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3131void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3132 SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3136 const SCEV *
const OperExpr = SE.
getSCEV(NextIV);
3137 const SCEV *
const OperExprBase =
getExprBase(OperExpr);
3141 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3142 const SCEV *LastIncExpr =
nullptr;
3143 for (; ChainIdx < NChains; ++ChainIdx) {
3144 IVChain &Chain = IVChainVec[ChainIdx];
3162 const SCEV *PrevExpr = SE.
getSCEV(PrevIV);
3163 const SCEV *IncExpr = SE.
getMinusSCEV(OperExpr, PrevExpr);
3167 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3168 LastIncExpr = IncExpr;
3174 if (ChainIdx == NChains) {
3181 LastIncExpr = OperExpr;
3188 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3190 ChainUsersVec.
resize(NChains);
3191 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3192 <<
") IV=" << *LastIncExpr <<
"\n");
3194 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3195 <<
") IV+" << *LastIncExpr <<
"\n");
3197 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3199 IVChain &Chain = IVChainVec[ChainIdx];
3201 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3203 if (!LastIncExpr->
isZero()) {
3204 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3213 for (User *U : IVOper->
users()) {
3219 IVChain::const_iterator IncIter = Chain.Incs.begin();
3220 IVChain::const_iterator IncEnd = Chain.Incs.end();
3221 for( ; IncIter != IncEnd; ++IncIter) {
3222 if (IncIter->UserInst == OtherUse)
3225 if (IncIter != IncEnd)
3230 && IU.isIVUserOrOperand(OtherUse)) {
3233 NearUsers.
insert(OtherUse);
3238 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3263void LSRInstance::CollectChains() {
3267 SmallVector<BasicBlock *,8> LatchPath;
3270 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3276 for (BasicBlock *BB :
reverse(LatchPath)) {
3277 for (Instruction &
I : *BB) {
3289 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3290 ChainIdx < NChains; ++ChainIdx) {
3291 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3294 SmallPtrSet<Instruction*, 4> UniqueOperands;
3297 while (IVOpIter != IVOpEnd) {
3299 if (UniqueOperands.
insert(IVOpInst).second)
3300 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3301 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3306 for (PHINode &PN :
L->getHeader()->phis()) {
3313 ChainInstruction(&PN, IncV, ChainUsersVec);
3316 unsigned ChainIdx = 0;
3317 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3318 UsersIdx < NChains; ++UsersIdx) {
3320 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3323 if (ChainIdx != UsersIdx)
3324 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3325 FinalizeChain(IVChainVec[ChainIdx]);
3328 IVChainVec.resize(ChainIdx);
3331void LSRInstance::FinalizeChain(IVChain &Chain) {
3332 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3333 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3335 for (
const IVInc &Inc : Chain) {
3337 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3338 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3339 IVIncSet.insert(UseI);
3347 Immediate IncOffset = Immediate::getZero();
3356 C->getSignificantBits() > 64)
3358 IncOffset = Immediate::getScalable(
C->getSExtValue());
3374void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3375 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3378 const IVInc &Head = Chain.Incs[0];
3383 Value *IVSrc =
nullptr;
3384 while (IVOpIter != IVOpEnd) {
3395 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3396 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3399 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3401 if (IVOpIter == IVOpEnd) {
3403 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3406 assert(IVSrc &&
"Failed to find IV chain source");
3411 const SCEV *LeftOverExpr =
nullptr;
3412 const SCEV *Accum = SE.
getZero(IntTy);
3416 for (
const IVInc &Inc : Chain) {
3419 InsertPt =
L->getLoopLatch()->getTerminator();
3423 Value *IVOper = IVSrc;
3424 if (!Inc.IncExpr->isZero()) {
3429 LeftOverExpr = LeftOverExpr ?
3430 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3434 bool FoundBase =
false;
3435 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3436 const SCEV *Remainder = SE.
getMinusSCEV(Accum, MapScev);
3438 if (!Remainder->
isZero()) {
3440 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3441 const SCEV *IVOperExpr =
3443 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3452 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3455 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3458 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3462 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3465 LeftOverExpr =
nullptr;
3469 if (IVTy != OperTy) {
3471 "cannot extend a chained IV");
3473 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3475 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3482 for (PHINode &Phi :
L->getHeader()->phis()) {
3486 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3489 Value *IVOper = IVSrc;
3491 if (IVTy != PostIncTy) {
3493 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3494 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3495 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3497 Phi.replaceUsesOfWith(PostIncV, IVOper);
3503void LSRInstance::CollectFixupsAndInitialFormulae() {
3504 BranchInst *ExitBranch =
nullptr;
3505 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3508 SmallPtrSet<const SCEV *, 16> Regs;
3509 DenseSet<const SCEV *> VisitedRegs;
3510 DenseSet<size_t> VisitedLSRUse;
3512 for (
const IVStrideUse &U : IU) {
3517 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3518 if (IVIncSet.count(UseI)) {
3519 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3523 LSRUse::KindType
Kind = LSRUse::Basic;
3524 MemAccessTy AccessTy;
3526 Kind = LSRUse::Address;
3530 const SCEV *S = IU.getExpr(U);
3546 if (CI->isEquality()) {
3549 Value *
NV = CI->getOperand(1);
3550 if (NV ==
U.getOperandValToReplace()) {
3551 CI->setOperand(1, CI->getOperand(0));
3552 CI->setOperand(0, NV);
3553 NV = CI->getOperand(1);
3560 (!
NV->getType()->isPointerTy() ||
3567 Kind = LSRUse::ICmpZero;
3569 }
else if (
L->isLoopInvariant(NV) &&
3572 !
NV->getType()->isPointerTy()) {
3583 Kind = LSRUse::ICmpZero;
3590 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3591 if (Factors[i] != -1)
3592 Factors.insert(-(uint64_t)Factors[i]);
3598 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3599 size_t LUIdx =
P.first;
3601 LSRUse &LU =
Uses[LUIdx];
3604 LSRFixup &LF = LU.getNewFixup();
3605 LF.UserInst = UserInst;
3606 LF.OperandValToReplace =
U.getOperandValToReplace();
3607 LF.PostIncLoops = TmpPostIncLoops;
3609 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3612 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3614 F.initialMatch(S, L, SE);
3615 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3616 HardwareLoopProfitable);
3617 VisitedLSRUse.
insert(LUIdx);
3620 if (!LU.WidestFixupType ||
3623 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3626 if (LU.Formulae.empty()) {
3627 InsertInitialFormula(S, LU, LUIdx);
3628 CountRegisters(LU.Formulae.back(), LUIdx);
3637void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3641 LU.RigidFormula =
true;
3644 F.initialMatch(S, L, SE);
3645 bool Inserted = InsertFormula(LU, LUIdx,
F);
3646 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3652LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3653 LSRUse &LU,
size_t LUIdx) {
3655 F.BaseRegs.push_back(S);
3656 F.HasBaseReg =
true;
3657 bool Inserted = InsertFormula(LU, LUIdx,
F);
3658 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3662void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3664 RegUses.countRegister(
F.ScaledReg, LUIdx);
3665 for (
const SCEV *BaseReg :
F.BaseRegs)
3666 RegUses.countRegister(BaseReg, LUIdx);
3671bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3674 "Formula is illegal");
3676 if (!LU.InsertFormula(
F, *L))
3679 CountRegisters(
F, LUIdx);
3689LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3691 SmallPtrSet<const SCEV *, 32> Visited;
3698 while (!Worklist.
empty()) {
3702 if (!Visited.
insert(S).second)
3713 const Value *
V = US->getValue();
3716 if (
L->contains(Inst))
continue;
3720 for (
const Use &U :
V->uses()) {
3730 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3752 bool HasIncompatibleEHPTerminatedBlock =
false;
3754 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3755 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3756 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3757 HasIncompatibleEHPTerminatedBlock =
true;
3762 if (HasIncompatibleEHPTerminatedBlock) {
3785 unsigned OtherIdx = !
U.getOperandNo();
3786 Value *OtherOp = ICI->getOperand(OtherIdx);
3796 std::pair<size_t, Immediate>
P =
3797 getUse(S, LSRUse::Basic, MemAccessTy());
3798 size_t LUIdx =
P.first;
3800 LSRUse &LU =
Uses[LUIdx];
3801 LSRFixup &LF = LU.getNewFixup();
3802 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3803 LF.OperandValToReplace =
U;
3805 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3806 if (!LU.WidestFixupType ||
3809 LU.WidestFixupType = LF.OperandValToReplace->
getType();
3810 InsertSupplementalFormula(US, LU, LUIdx);
3811 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3827 unsigned Depth = 0) {
3834 for (
const SCEV *S :
Add->operands()) {
3841 const SCEV *Start, *Step;
3846 if (Start->isZero())
3855 Remainder =
nullptr;
3857 if (Remainder != Start) {
3879 LSRUse &LU,
const SCEV *S,
const Loop *L,
3881 if (LU.Kind != LSRUse::Address ||
3882 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3888 if (
TTI.isIndexedLoadLegal(
TTI.MIM_PostInc, S->
getType()) ||
3897void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3898 const Formula &
Base,
3899 unsigned Depth,
size_t Idx,
3901 const SCEV *
BaseReg = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
3909 const SCEV *Remainder =
CollectSubexprs(BaseReg,
nullptr, AddOps, L, SE);
3913 if (AddOps.
size() == 1)
3927 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3932 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3936 if (InnerAddOps.size() == 1 &&
3938 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3941 const SCEV *InnerSum = SE.
getAddExpr(InnerAddOps);
3946 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3955 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3958 F.ScaledReg =
nullptr;
3961 F.BaseRegs.erase(
F.BaseRegs.begin() + Idx);
3962 }
else if (IsScaledReg)
3963 F.ScaledReg = InnerSum;
3965 F.BaseRegs[Idx] = InnerSum;
3973 Immediate::getFixed((uint64_t)
F.UnfoldedOffset.getFixedValue() +
3976 F.BaseRegs.push_back(*J);
3981 if (InsertFormula(LU, LUIdx,
F))
3988 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3994void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3996 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4001 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4002 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4004 if (
Base.Scale == 1)
4005 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4011void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4014 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4015 (
Base.UnfoldedOffset.isNonZero()) <=
4023 Formula NewBase =
Base;
4024 NewBase.BaseRegs.clear();
4025 Type *CombinedIntegerType =
nullptr;
4026 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4029 if (!CombinedIntegerType)
4031 Ops.push_back(BaseReg);
4034 NewBase.BaseRegs.push_back(BaseReg);
4038 if (
Ops.size() == 0)
4043 auto GenerateFormula = [&](
const SCEV *Sum) {
4044 Formula
F = NewBase;
4052 F.BaseRegs.push_back(Sum);
4054 (void)InsertFormula(LU, LUIdx,
F);
4058 if (
Ops.size() > 1) {
4065 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4066 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4068 NewBase.UnfoldedOffset.getFixedValue(),
true));
4069 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4075void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4076 const Formula &
Base,
size_t Idx,
4078 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4080 if (
G->isZero() || !GV)
4084 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4089 F.BaseRegs[Idx] =
G;
4090 (void)InsertFormula(LU, LUIdx,
F);
4094void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4097 if (
Base.BaseGV)
return;
4099 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4100 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4101 if (
Base.Scale == 1)
4102 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4107void LSRInstance::GenerateConstantOffsetsImpl(
4108 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4109 const SmallVectorImpl<Immediate> &Worklist,
size_t Idx,
bool IsScaledReg) {
4111 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4113 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4115 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4117 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4119 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4125 F.ScaledReg =
nullptr;
4127 F.deleteBaseReg(
F.BaseRegs[Idx]);
4129 }
else if (IsScaledReg)
4132 F.BaseRegs[Idx] = NewG;
4134 (void)InsertFormula(LU, LUIdx,
F);
4138 const SCEV *
G = IsScaledReg ?
Base.ScaledReg :
Base.BaseRegs[Idx];
4149 const APInt *StepInt;
4154 for (Immediate
Offset : Worklist) {
4156 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4162 for (Immediate
Offset : Worklist)
4166 if (
G->isZero() ||
Imm.isZero() ||
4167 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4170 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4171 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4176 F.BaseRegs[Idx] =
G;
4181 (void)InsertFormula(LU, LUIdx,
F);
4185void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4191 if (LU.MaxOffset != LU.MinOffset)
4194 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4195 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4196 if (
Base.Scale == 1)
4197 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4203void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4205 if (LU.Kind != LSRUse::ICmpZero)
return;
4213 if (LU.MinOffset != LU.MaxOffset)
return;
4216 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4218 for (
const SCEV *BaseReg :
Base.BaseRegs)
4219 if (
BaseReg->getType()->isPointerTy())
4221 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4224 for (int64_t Factor : Factors) {
4229 if (
Base.BaseOffset.isMin() && Factor == -1)
4232 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4234 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4235 assert(Factor != 0 &&
"Zero factor not expected!");
4236 if (NewBaseOffset.getFixedValue() / Factor !=
4237 Base.BaseOffset.getFixedValue())
4245 Immediate
Offset = LU.MinOffset;
4246 if (
Offset.isMin() && Factor == -1)
4249 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4257 F.BaseOffset = NewBaseOffset;
4264 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4266 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4269 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4283 if (
F.UnfoldedOffset.isNonZero()) {
4284 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4286 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4287 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4288 Base.UnfoldedOffset.getFixedValue())
4292 IntTy,
F.UnfoldedOffset.getFixedValue()))
4297 (void)InsertFormula(LU, LUIdx,
F);
4304void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4311 if (
Base.Scale != 0 && !
Base.unscale())
4314 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4317 for (int64_t Factor : Factors) {
4318 Base.Scale = Factor;
4319 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4321 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4325 if (LU.Kind == LSRUse::Basic &&
4326 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4327 LU.AccessTy,
Base) &&
4328 LU.AllFixupsOutsideLoop)
4329 LU.Kind = LSRUse::Special;
4335 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4336 Base.BaseOffset.isZero() && !
Base.BaseGV)
4339 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4341 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4342 const SCEV *FactorS = SE.
getConstant(IntTy, Factor);
4347 if (
const SCEV *Quotient =
getExactSDiv(AR, FactorS, SE,
true))
4348 if (!Quotient->isZero()) {
4351 F.ScaledReg = Quotient;
4352 F.deleteBaseReg(
F.BaseRegs[i]);
4356 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4357 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4361 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4363 (void)InsertFormula(LU, LUIdx,
F);
4379 const SCEV *Result =
nullptr;
4380 for (
auto &L :
Loops) {
4384 if (!New || (Result && New != Result))
4389 assert(Result &&
"failed to create expression");
4394void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4396 if (
Base.BaseGV)
return;
4406 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4409 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4413 for (
auto &LF : LU.Fixups)
4414 Loops.push_back(LF.PostIncLoops);
4416 for (
Type *SrcTy : Types) {
4425 const SCEV *NewScaledReg =
4427 if (!NewScaledReg || NewScaledReg->
isZero())
4429 F.ScaledReg = NewScaledReg;
4431 bool HasZeroBaseReg =
false;
4432 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4433 const SCEV *NewBaseReg =
4435 if (!NewBaseReg || NewBaseReg->
isZero()) {
4436 HasZeroBaseReg =
true;
4446 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4450 (void)InsertFormula(LU, LUIdx,
F);
4463 const SCEV *OrigReg;
4465 WorkItem(
size_t LI, Immediate
I,
const SCEV *R)
4466 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4468 void print(raw_ostream &OS)
const;
4474#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4475void WorkItem::print(raw_ostream &OS)
const {
4476 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4477 <<
" , add offset " <<
Imm;
4487void LSRInstance::GenerateCrossUseConstantOffsets() {
4489 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4491 DenseMap<const SCEV *, ImmMapTy>
Map;
4492 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4494 for (
const SCEV *Use : RegUses) {
4497 auto Pair =
Map.try_emplace(
Reg);
4500 Pair.first->second.insert(std::make_pair(Imm, Use));
4501 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(Use);
4508 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4510 for (
const SCEV *
Reg : Sequence) {
4511 const ImmMapTy &Imms =
Map.find(
Reg)->second;
4514 if (Imms.size() == 1)
4518 for (
const auto &Entry
4520 <<
' ' <<
Entry.first;
4524 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4526 const SCEV *OrigReg = J->second;
4528 Immediate JImm = J->first;
4529 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4532 UsedByIndicesMap[
Reg].
count() == 1) {
4540 Immediate
First = Imms.begin()->first;
4541 Immediate
Last = std::prev(Imms.end())->first;
4542 if (!
First.isCompatibleImmediate(
Last)) {
4549 bool Scalable =
First.isScalable() ||
Last.isScalable();
4550 int64_t FI =
First.getKnownMinValue();
4551 int64_t LI =
Last.getKnownMinValue();
4554 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4557 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4558 ImmMapTy::const_iterator OtherImms[] = {
4559 Imms.begin(), std::prev(Imms.end()),
4560 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4561 for (
const auto &M : OtherImms) {
4562 if (M == J || M == JE)
continue;
4563 if (!JImm.isCompatibleImmediate(
M->first))
4567 Immediate
Imm = JImm.subUnsigned(
M->first);
4568 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4570 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4571 WorkItems.
push_back(WorkItem(LUIdx, Imm, OrigReg));
4578 UsedByIndicesMap.
clear();
4579 UniqueItems.
clear();
4582 for (
const WorkItem &WI : WorkItems) {
4583 size_t LUIdx = WI.LUIdx;
4584 LSRUse &LU =
Uses[LUIdx];
4585 Immediate
Imm = WI.Imm;
4586 const SCEV *OrigReg = WI.OrigReg;
4589 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4593 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4594 Formula
F = LU.Formulae[
L];
4601 if (
F.ScaledReg == OrigReg) {
4602 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4604 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4606 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4607 if (
F.referencesReg(S))
4610 NewF.BaseOffset =
Offset;
4611 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4614 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4623 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4625 if (
C->getValue()->isNegative() !=
4626 (NewF.BaseOffset.isLessThanZero()) &&
4627 (
C->getAPInt().abs() * APInt(
BitWidth,
F.Scale))
4628 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4633 NewF.canonicalize(*this->L);
4634 (void)InsertFormula(LU, LUIdx, NewF);
4637 for (
size_t N = 0, NE =
F.BaseRegs.size();
N != NE; ++
N) {
4639 if (BaseReg != OrigReg)
4642 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4643 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4644 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4646 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4648 LU.Kind, LU.AccessTy, NewF)) {
4652 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4656 NewF.UnfoldedOffset = NewUnfoldedOffset;
4658 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4663 for (
const SCEV *NewReg : NewF.BaseRegs)
4665 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4667 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4669 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4670 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4673 NewF.BaseOffset.getFixedValue()))
4678 NewF.canonicalize(*this->L);
4679 (void)InsertFormula(LU, LUIdx, NewF);
4690LSRInstance::GenerateAllReuseFormulae() {
4693 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4694 LSRUse &LU =
Uses[LUIdx];
4695 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4696 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4697 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4698 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4700 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4701 LSRUse &LU =
Uses[LUIdx];
4702 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4703 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4704 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4705 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4706 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4707 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4708 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4709 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4711 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4712 LSRUse &LU =
Uses[LUIdx];
4713 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4714 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4717 GenerateCrossUseConstantOffsets();
4720 "After generating reuse formulae:\n";
4721 print_uses(
dbgs()));
4726void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4727 DenseSet<const SCEV *> VisitedRegs;
4728 SmallPtrSet<const SCEV *, 16> Regs;
4729 SmallPtrSet<const SCEV *, 16> LoserRegs;
4731 bool ChangedFormulae =
false;
4736 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>,
size_t>;
4738 BestFormulaeTy BestFormulae;
4740 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4741 LSRUse &LU =
Uses[LUIdx];
4746 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4747 FIdx != NumForms; ++FIdx) {
4748 Formula &
F = LU.Formulae[FIdx];
4759 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4761 if (CostF.isLoser()) {
4773 for (
const SCEV *
Reg :
F.BaseRegs) {
4774 if (RegUses.isRegUsedByUsesOtherThan(
Reg, LUIdx))
4778 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4779 Key.push_back(
F.ScaledReg);
4784 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4785 BestFormulae.insert(std::make_pair(
Key, FIdx));
4789 Formula &Best = LU.Formulae[
P.first->second];
4791 Cost CostBest(L, SE,
TTI, AMK);
4793 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4794 HardwareLoopProfitable);
4795 if (CostF.isLess(CostBest))
4799 " in favor of formula ";
4800 Best.print(
dbgs());
dbgs() <<
'\n');
4803 ChangedFormulae =
true;
4805 LU.DeleteFormula(
F);
4813 LU.RecomputeRegs(LUIdx, RegUses);
4816 BestFormulae.clear();
4821 "After filtering out undesirable candidates:\n";
4829size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4831 for (
const LSRUse &LU :
Uses) {
4832 size_t FSize = LU.Formulae.size();
4847void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4851 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4852 "which use a superset of registers used by other "
4855 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4856 LSRUse &LU =
Uses[LUIdx];
4858 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4859 Formula &
F = LU.Formulae[i];
4860 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4866 I =
F.BaseRegs.begin(),
E =
F.BaseRegs.end();
I !=
E; ++
I) {
4872 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4873 (uint64_t)
C->getValue()->getSExtValue());
4874 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4875 (
I -
F.BaseRegs.begin()));
4876 if (LU.HasFormulaWithSameRegs(NewF)) {
4879 LU.DeleteFormula(
F);
4890 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4891 (
I -
F.BaseRegs.begin()));
4892 if (LU.HasFormulaWithSameRegs(NewF)) {
4895 LU.DeleteFormula(
F);
4906 LU.RecomputeRegs(LUIdx, RegUses);
4915void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4920 dbgs() <<
"The search space is too complex.\n"
4921 "Narrowing the search space by assuming that uses separated "
4922 "by a constant offset will use the same registers.\n");
4926 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4927 LSRUse &LU =
Uses[LUIdx];
4928 for (
const Formula &
F : LU.Formulae) {
4929 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4932 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4936 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4937 LU.Kind, LU.AccessTy))
4942 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4945 for (LSRFixup &
Fixup : LU.Fixups) {
4946 Fixup.Offset +=
F.BaseOffset;
4947 LUThatHas->pushFixup(
Fixup);
4953 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4954 Formula &
F = LUThatHas->Formulae[i];
4955 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4956 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4958 LUThatHas->DeleteFormula(
F);
4966 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4969 DeleteUse(LU, LUIdx);
4982void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4986 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4987 "undesirable dedicated registers.\n");
4989 FilterOutUndesirableDedicatedRegisters();
5004void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5009 dbgs() <<
"The search space is too complex.\n"
5010 "Narrowing the search space by choosing the best Formula "
5011 "from the Formulae with the same Scale and ScaledReg.\n");
5014 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>,
size_t>;
5016 BestFormulaeTy BestFormulae;
5018 bool ChangedFormulae =
false;
5020 DenseSet<const SCEV *> VisitedRegs;
5021 SmallPtrSet<const SCEV *, 16> Regs;
5023 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5024 LSRUse &LU =
Uses[LUIdx];
5029 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5034 size_t FARegNum = 0;
5035 for (
const SCEV *
Reg : FA.BaseRegs) {
5036 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5037 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5039 size_t FBRegNum = 0;
5040 for (
const SCEV *
Reg : FB.BaseRegs) {
5041 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(
Reg);
5042 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5044 if (FARegNum != FBRegNum)
5045 return FARegNum < FBRegNum;
5052 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5054 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5055 return CostFA.isLess(CostFB);
5059 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5061 Formula &
F = LU.Formulae[FIdx];
5064 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5068 Formula &Best = LU.Formulae[
P.first->second];
5069 if (IsBetterThan(
F, Best))
5073 " in favor of formula ";
5074 Best.print(
dbgs());
dbgs() <<
'\n');
5076 ChangedFormulae =
true;
5078 LU.DeleteFormula(
F);
5084 LU.RecomputeRegs(LUIdx, RegUses);
5087 BestFormulae.clear();
5092 "After filtering out undesirable candidates:\n";
5099void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5106 "Narrowing the search space by choosing the lowest "
5107 "register Formula for PostInc Uses.\n");
5109 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5110 LSRUse &LU =
Uses[LUIdx];
5112 if (LU.Kind != LSRUse::Address)
5118 size_t MinRegs = std::numeric_limits<size_t>::max();
5119 for (
const Formula &
F : LU.Formulae)
5120 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5123 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5125 Formula &
F = LU.Formulae[FIdx];
5126 if (
F.getNumRegs() > MinRegs) {
5129 LU.DeleteFormula(
F);
5136 LU.RecomputeRegs(LUIdx, RegUses);
5187void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5196 SmallPtrSet<const SCEV *, 4> UniqRegs;
5200 DenseMap <const SCEV *, float> RegNumMap;
5201 for (
const SCEV *
Reg : RegUses) {
5205 for (
const LSRUse &LU :
Uses) {
5206 if (!LU.Regs.count(
Reg))
5208 float P = LU.getNotSelectedProbability(
Reg);
5214 RegNumMap.
insert(std::make_pair(
Reg, PNotSel));
5218 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5221 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5222 LSRUse &LU =
Uses[LUIdx];
5224 if (LU.Formulae.size() < 2)
5229 float FMinRegNum = LU.Formulae[0].getNumRegs();
5230 float FMinARegNum = LU.Formulae[0].getNumRegs();
5232 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5233 Formula &
F = LU.Formulae[i];
5236 for (
const SCEV *BaseReg :
F.BaseRegs) {
5237 if (UniqRegs.
count(BaseReg))
5239 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5242 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5244 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5245 if (!UniqRegs.
count(ScaledReg)) {
5247 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5250 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5253 if (FMinRegNum > FRegNum ||
5254 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5255 FMinRegNum = FRegNum;
5256 FMinARegNum = FARegNum;
5261 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5263 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5264 while (LU.Formulae.size() != 1) {
5267 LU.Formulae.pop_back();
5269 LU.RecomputeRegs(LUIdx, RegUses);
5270 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5271 Formula &
F = LU.Formulae[0];
5287 MemAccessTy AccessType) {
5297 return TTI.isLegalAddressingMode(
5298 AccessType.MemTy,
nullptr,
5299 Diff->getSExtValue(),
5300 true, 0, AccessType.AddrSpace) &&
5301 !
TTI.isLegalAddressingMode(
5302 AccessType.MemTy,
nullptr,
5303 -Diff->getSExtValue(),
5304 true, 0, AccessType.AddrSpace);
5310void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5313 SmallPtrSet<const SCEV *, 4> Taken;
5321 const SCEV *Best =
nullptr;
5322 unsigned BestNum = 0;
5323 for (
const SCEV *
Reg : RegUses) {
5328 BestNum = RegUses.getUsedByIndices(
Reg).count();
5330 unsigned Count = RegUses.getUsedByIndices(
Reg).count();
5331 if (
Count > BestNum) {
5339 if (
Count == BestNum) {
5340 int LUIdx = RegUses.getUsedByIndices(
Reg).find_first();
5341 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5343 Uses[LUIdx].AccessTy)) {
5350 assert(Best &&
"Failed to find best LSRUse candidate");
5352 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5353 <<
" will yield profitable reuse.\n");
5358 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5359 LSRUse &LU =
Uses[LUIdx];
5360 if (!LU.Regs.count(Best))
continue;
5363 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5364 Formula &
F = LU.Formulae[i];
5365 if (!
F.referencesReg(Best)) {
5367 LU.DeleteFormula(
F);
5371 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5377 LU.RecomputeRegs(LUIdx, RegUses);
5388void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5389 NarrowSearchSpaceByDetectingSupersets();
5390 NarrowSearchSpaceByCollapsingUnrolledCode();
5391 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5393 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5394 NarrowSearchSpaceByFilterPostInc();
5396 NarrowSearchSpaceByDeletingCostlyFormulas();
5398 NarrowSearchSpaceByPickingWinnerRegs();
5402void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5404 SmallVectorImpl<const Formula *> &Workspace,
5405 const Cost &CurCost,
5406 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5407 DenseSet<const SCEV *> &VisitedRegs)
const {
5418 const LSRUse &LU =
Uses[Workspace.
size()];
5424 SmallSetVector<const SCEV *, 4> ReqRegs;
5425 for (
const SCEV *S : CurRegs)
5426 if (LU.Regs.count(S))
5429 SmallPtrSet<const SCEV *, 16> NewRegs;
5430 Cost NewCost(L, SE,
TTI, AMK);
5431 for (
const Formula &
F : LU.Formulae) {
5439 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5440 for (
const SCEV *
Reg : ReqRegs) {
5441 if ((
F.ScaledReg &&
F.ScaledReg ==
Reg) ||
5444 if (NumReqRegsToFind == 0)
5448 if (NumReqRegsToFind != 0) {
5459 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5460 if (NewCost.isLess(SolutionCost)) {
5462 if (Workspace.
size() !=
Uses.size()) {
5463 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5464 NewRegs, VisitedRegs);
5465 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5466 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5469 dbgs() <<
".\nRegs:\n";
5470 for (
const SCEV *S : NewRegs)
dbgs()
5471 <<
"- " << *S <<
"\n";
5474 SolutionCost = NewCost;
5475 Solution = Workspace;
5484void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution)
const {
5486 Cost SolutionCost(L, SE,
TTI, AMK);
5487 SolutionCost.Lose();
5488 Cost CurCost(L, SE,
TTI, AMK);
5489 SmallPtrSet<const SCEV *, 16> CurRegs;
5490 DenseSet<const SCEV *> VisitedRegs;
5494 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5495 CurRegs, VisitedRegs);
5496 if (Solution.
empty()) {
5503 "The chosen solution requires ";
5504 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5505 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5510 Solution[i]->print(
dbgs());
5516 const bool EnableDropUnprofitableSolution = [&] {
5528 if (BaselineCost.isLess(SolutionCost)) {
5529 if (!EnableDropUnprofitableSolution)
5531 dbgs() <<
"Baseline is more profitable than chosen solution, "
5532 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5535 "solution, dropping LSR solution.\n";);
5546 const SmallVectorImpl<Instruction *> &Inputs)
5550 bool AllDominate =
true;
5557 for (Instruction *Inst : Inputs) {
5558 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5559 AllDominate =
false;
5564 if (Tentative->
getParent() == Inst->getParent() &&
5565 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5575 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5576 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5580 if (!Rung)
return IP;
5581 Rung = Rung->getIDom();
5582 if (!Rung)
return IP;
5583 IDom = Rung->getBlock();
5586 const Loop *IDomLoop = LI.getLoopFor(IDom);
5587 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5588 if (IDomDepth <= IPLoopDepth &&
5589 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5606 SmallVector<Instruction *, 4> Inputs;
5609 if (LU.Kind == LSRUse::ICmpZero)
5610 if (Instruction *
I =
5613 if (LF.PostIncLoops.
count(L)) {
5614 if (LF.isUseFullyOutsideLoop(L))
5615 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5621 for (
const Loop *PIL : LF.PostIncLoops) {
5622 if (PIL == L)
continue;
5627 if (!ExitingBlocks.
empty()) {
5629 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5636 "Insertion point must be a normal instruction");
5646 while (IP->isEHPad()) ++IP;
5651 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5659Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5661 SmallVectorImpl<WeakTrackingVH> &DeadInsts)
const {
5662 if (LU.RigidFormula)
5663 return LF.OperandValToReplace;
5667 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5672 Rewriter.setPostInc(LF.PostIncLoops);
5677 Type *Ty =
F.getType();
5691 for (
const SCEV *
Reg :
F.BaseRegs) {
5692 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5700 Value *ICmpScaledV =
nullptr;
5702 const SCEV *ScaledS =
F.ScaledReg;
5708 if (LU.Kind == LSRUse::ICmpZero) {
5718 "The only scale supported by ICmpZero uses is -1!");
5719 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5727 if (!
Ops.empty() && LU.Kind == LSRUse::Address &&
5737 Ops.push_back(ScaledS);
5763 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5764 "Expanding mismatched offsets\n");
5766 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5767 if (
Offset.isNonZero()) {
5768 if (LU.Kind == LSRUse::ICmpZero) {
5773 ConstantInt::get(IntTy, -(uint64_t)
Offset.getFixedValue());
5776 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5781 Ops.push_back(
Offset.getUnknownSCEV(SE, IntTy));
5786 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5787 if (UnfoldedOffset.isNonZero()) {
5789 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5793 const SCEV *FullS =
Ops.empty() ?
5804 if (LU.Kind == LSRUse::ICmpZero) {
5808 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5809 "a scale at the same time!");
5810 if (
F.Scale == -1) {
5811 if (ICmpScaledV->
getType() != OpTy) {
5821 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5822 "ICmp does not support folding a global value and "
5823 "a scale at the same time!");
5825 -(uint64_t)
Offset.getFixedValue());
5826 if (
C->getType() != OpTy) {
5830 assert(
C &&
"Cast of ConstantInt should have folded");
5843void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRUse &LU,
5844 const LSRFixup &LF,
const Formula &
F,
5845 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5846 DenseMap<BasicBlock *, Value *>
Inserted;
5850 bool needUpdateFixups =
false;
5861 Loop *PNLoop = LI.getLoopFor(Parent);
5862 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5868 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5869 .setMergeIdenticalEdges()
5870 .setKeepOneInputPHIs());
5873 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5884 if (
L->contains(BB) && !
L->contains(PN))
5892 needUpdateFixups =
true;
5897 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5910 LF.OperandValToReplace->
getType(),
"tmp",
5917 if (
L->contains(
I) && !
L->contains(BB))
5918 InsertedNonLCSSAInsts.insert(
I);
5921 Pair.first->second = FullV;
5928 if (needUpdateFixups) {
5929 for (LSRUse &LU :
Uses)
5930 for (LSRFixup &
Fixup : LU.Fixups)
5934 if (
Fixup.UserInst == PN) {
5937 bool foundInOriginalPHI =
false;
5939 if (val ==
Fixup.OperandValToReplace) {
5940 foundInOriginalPHI =
true;
5945 if (foundInOriginalPHI)
5956 if (val ==
Fixup.OperandValToReplace)
5957 Fixup.UserInst = NewPN;
5967void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5969 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5973 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
5979 if (FullV->
getType() != OpTy) {
5991 if (LU.Kind == LSRUse::ICmpZero)
6007 if (LU.Kind != LSRUse::Address)
6014 if (IVIncInsertPos->
getParent() == LHeader)
6017 if (!
Fixup.OperandValToReplace ||
6019 Instruction *UI = cast<Instruction>(U);
6020 return UI->getParent() != LHeader;
6025 Type *Ty =
I->getType();
6032void LSRInstance::ImplementSolution(
6033 const SmallVectorImpl<const Formula *> &Solution) {
6039 for (
const IVChain &Chain : IVChainVec) {
6045 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6046 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6049 ?
L->getHeader()->getTerminator()
6051 Rewriter.setIVIncInsertPos(L, InsertPos);
6052 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6056 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6059 for (
const IVChain &Chain : IVChainVec) {
6060 GenerateIVChain(Chain, DeadInsts);
6064 for (
const WeakVH &
IV :
Rewriter.getInsertedIVs())
6082 for (PHINode &PN :
L->getHeader()->phis()) {
6083 BinaryOperator *BO =
nullptr;
6089 case Instruction::Sub:
6094 case Instruction::Add:
6111 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6120LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6121 DominatorTree &DT, LoopInfo &LI,
6122 const TargetTransformInfo &
TTI, AssumptionCache &AC,
6123 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6124 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6127 :
TTI.getPreferredAddressingMode(
L, &SE)),
6129 BaselineCost(
L, SE,
TTI, AMK) {
6131 if (!
L->isLoopSimplifyForm())
6139 unsigned NumUsers = 0;
6143 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6151 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6161 L->getHeader()->printAsOperand(
dbgs(),
false);
6167 HardwareLoopProfitable =
6168 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6172#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6175 Rewriter.disableCanonicalMode();
6176 Rewriter.enableLSRMode();
6180 OptimizeLoopTermCond();
6183 if (IU.empty())
return;
6186 if (!
L->isInnermost()) {
6199 CollectInterestingTypesAndFactors();
6200 CollectFixupsAndInitialFormulae();
6201 CollectLoopInvariantFixupsAndFormulae();
6207 print_uses(
dbgs()));
6209 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6213 GenerateAllReuseFormulae();
6215 FilterOutUndesirableDedicatedRegisters();
6216 NarrowSearchSpaceUsingHeuristics();
6226 if (Solution.
empty())
6231 for (
const LSRUse &LU :
Uses) {
6232 for (
const Formula &
F : LU.Formulae)
6234 F) &&
"Illegal formula generated!");
6239 ImplementSolution(Solution);
6242#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6243void LSRInstance::print_factors_and_types(
raw_ostream &OS)
const {
6244 if (Factors.empty() &&
Types.empty())
return;
6246 OS <<
"LSR has identified the following interesting factors and types: ";
6249 for (int64_t Factor : Factors) {
6250 if (!
First) OS <<
", ";
6252 OS <<
'*' << Factor;
6255 for (
Type *Ty : Types) {
6256 if (!
First) OS <<
", ";
6258 OS <<
'(' << *Ty <<
')';
6263void LSRInstance::print_fixups(raw_ostream &OS)
const {
6264 OS <<
"LSR is examining the following fixup sites:\n";
6265 for (
const LSRUse &LU :
Uses)
6266 for (
const LSRFixup &LF : LU.Fixups) {
6273void LSRInstance::print_uses(raw_ostream &OS)
const {
6274 OS <<
"LSR is examining the following uses:\n";
6275 for (
const LSRUse &LU :
Uses) {
6279 for (
const Formula &
F : LU.Formulae) {
6287void LSRInstance::print(raw_ostream &OS)
const {
6288 print_factors_and_types(OS);
6300class LoopStrengthReduce :
public LoopPass {
6304 LoopStrengthReduce();
6307 bool runOnLoop(Loop *L, LPPassManager &LPM)
override;
6308 void getAnalysisUsage(AnalysisUsage &AU)
const override;
6313LoopStrengthReduce::LoopStrengthReduce() : LoopPass(
ID) {
6317void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6344ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6345 llvm::DIExpression::expr_op_iterator Begin =
6346 llvm::DIExpression::expr_op_iterator(Expr.
begin());
6347 llvm::DIExpression::expr_op_iterator End =
6348 llvm::DIExpression::expr_op_iterator(Expr.
end());
6349 return {Begin, End};
6352struct SCEVDbgValueBuilder {
6353 SCEVDbgValueBuilder() =
default;
6354 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6356 void clone(
const SCEVDbgValueBuilder &
Base) {
6357 LocationOps =
Base.LocationOps;
6362 LocationOps.
clear();
6369 SmallVector<Value *, 2> LocationOps;
6372 void pushUInt(uint64_t Operand) { Expr.
push_back(Operand); }
6379 unsigned ArgIndex = 0;
6380 if (It != LocationOps.
end()) {
6381 ArgIndex = std::distance(LocationOps.
begin(), It);
6383 ArgIndex = LocationOps.
size();
6389 void pushValue(
const SCEVUnknown *U) {
6394 bool pushConst(
const SCEVConstant *
C) {
6395 if (
C->getAPInt().getSignificantBits() > 64)
6397 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6398 Expr.
push_back(
C->getAPInt().getSExtValue());
6405 return ToDwarfOpIter(Expr);
6410 bool pushArithmeticExpr(
const llvm::SCEVCommutativeExpr *CommExpr,
6413 "Expected arithmetic SCEV type");
6415 unsigned EmitOperator = 0;
6416 for (
const auto &
Op : CommExpr->
operands()) {
6419 if (EmitOperator >= 1)
6420 pushOperator(DwarfOp);
6427 bool pushCast(
const llvm::SCEVCastExpr *
C,
bool IsSigned) {
6428 const llvm::SCEV *Inner =
C->getOperand(0);
6429 const llvm::Type *
Type =
C->getType();
6430 uint64_t ToWidth =
Type->getIntegerBitWidth();
6431 bool Success = pushSCEV(Inner);
6433 IsSigned ? llvm::dwarf::DW_ATE_signed
6434 : llvm::dwarf::DW_ATE_unsigned};
6435 for (
const auto &
Op : CastOps)
6441 bool pushSCEV(
const llvm::SCEV *S) {
6444 Success &= pushConst(StartInt);
6449 pushLocation(
U->getValue());
6452 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6455 Success &= pushSCEV(UDiv->getLHS());
6456 Success &= pushSCEV(UDiv->getRHS());
6457 pushOperator(llvm::dwarf::DW_OP_div);
6463 "Unexpected cast type in SCEV.");
6467 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6482 bool isIdentityFunction(uint64_t
Op,
const SCEV *S) {
6484 if (
C->getAPInt().getSignificantBits() > 64)
6486 int64_t
I =
C->getAPInt().getSExtValue();
6488 case llvm::dwarf::DW_OP_plus:
6489 case llvm::dwarf::DW_OP_minus:
6491 case llvm::dwarf::DW_OP_mul:
6492 case llvm::dwarf::DW_OP_div:
6505 bool SCEVToValueExpr(
const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6511 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6512 if (!pushSCEV(Stride))
6514 pushOperator(llvm::dwarf::DW_OP_mul);
6516 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6517 if (!pushSCEV(Start))
6519 pushOperator(llvm::dwarf::DW_OP_plus);
6525 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6526 pushLocation(OffsetValue);
6529 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6530 << std::to_string(
Offset) <<
"\n");
6536 bool createIterCountExpr(
const SCEV *S,
6537 const SCEVDbgValueBuilder &IterationCount,
6538 ScalarEvolution &SE) {
6547 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6551 if (!Rec->isAffine())
6559 clone(IterationCount);
6560 if (!SCEVToValueExpr(*Rec, SE))
6571 bool SCEVToIterCountExpr(
const llvm::SCEVAddRecExpr &SAR,
6572 ScalarEvolution &SE) {
6578 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6579 if (!pushSCEV(Start))
6581 pushOperator(llvm::dwarf::DW_OP_minus);
6583 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6584 if (!pushSCEV(Stride))
6586 pushOperator(llvm::dwarf::DW_OP_div);
6594 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6595 SmallVectorImpl<Value *> &DestLocations) {
6597 "Expected the locations vector to contain the IV");
6602 "Expected the location ops to contain the IV.");
6606 for (
const auto &
Op : LocationOps) {
6607 auto It =
find(DestLocations,
Op);
6608 if (It != DestLocations.
end()) {
6610 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6618 for (
const auto &
Op : expr_ops()) {
6620 Op.appendToVector(DestExpr);
6627 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6635struct DVIRecoveryRec {
6636 DVIRecoveryRec(DbgVariableRecord *DVR)
6637 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6639 DbgVariableRecord *DbgRef;
6641 bool HadLocationArgList;
6647 for (
auto &RE : RecoveryExprs)
6649 RecoveryExprs.clear();
6652 ~DVIRecoveryRec() { clear(); }
6660 auto expr_ops = ToDwarfOpIter(Expr);
6662 for (
auto Op : expr_ops)
6671template <
typename T>
6675 "contain any DW_OP_llvm_arg operands.");
6682template <
typename T>
6687 "Expected expression that references DIArglist locations using "
6688 "DW_OP_llvm_arg operands.");
6690 for (
Value *V : Locations)
6707 if (NumLLVMArgs == 0) {
6714 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6744 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6745 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6746 assert(DVIRec.Expr &&
"Expected an expression");
6751 if (!DVIRec.HadLocationArgList) {
6752 assert(DVIRec.LocationOps.size() == 1 &&
6753 "Unexpected number of location ops.");
6757 Value *CachedValue =
6762 for (
WeakVH VH : DVIRec.LocationOps) {
6770 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6775 const SCEV *SCEVInductionVar,
6776 SCEVDbgValueBuilder IterCountExpr) {
6790 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6792 NewLocationOps.
push_back(LSRInductionVar);
6794 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6795 WeakVH VH = DVIRec.LocationOps[i];
6801 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6803 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6811 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6812 <<
" refers to a location that is now undef or erased. "
6813 "Salvage abandoned.\n");
6817 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6818 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6820 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6821 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6825 if (std::optional<APInt>
Offset =
6827 if (
Offset->getSignificantBits() <= 64)
6828 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6831 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6840 assert(DVIRec.RecoveryExprs.size() == 1 &&
6841 "Expected only a single recovery expression for an empty "
6843 assert(DVIRec.RecoveryExprs[0] &&
6844 "Expected a SCEVDbgSalvageBuilder for location 0");
6845 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6846 B->appendToVectors(
NewExpr, NewLocationOps);
6848 for (
const auto &
Op : DVIRec.Expr->
expr_ops()) {
6856 SCEVDbgValueBuilder *DbgBuilder =
6857 DVIRec.RecoveryExprs[LocationArgIndex].get();
6863 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6864 "Expected a positive index for the location-op position.");
6865 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6869 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6873 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6881 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6882 if (DVIToUpdate.empty())
6886 assert(SCEVInductionVar &&
6887 "Anticipated a SCEV for the post-LSR induction variable");
6891 if (!IVAddRec->isAffine())
6899 SCEVDbgValueBuilder IterCountExpr;
6900 IterCountExpr.pushLocation(LSRInductionVar);
6901 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6904 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6907 for (
auto &DVIRec : DVIToUpdate) {
6908 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6919 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6920 for (
const auto &
B : L->getBlocks()) {
6921 for (
auto &
I : *
B) {
6923 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6928 if (DbgVal.isKillLocation())
6933 const auto &HasTranslatableLocationOps =
6935 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6949 if (!HasTranslatableLocationOps(DbgVal))
6952 std::unique_ptr<DVIRecoveryRec> NewRec =
6953 std::make_unique<DVIRecoveryRec>(&DbgVal);
6957 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6958 for (
const auto LocOp : DbgVal.location_ops()) {
6959 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6960 NewRec->LocationOps.push_back(LocOp);
6961 NewRec->HadLocationArgList = DbgVal.hasArgList();
6963 SalvageableDVISCEVs.push_back(std::move(NewRec));
6973 const LSRInstance &LSR) {
6975 auto IsSuitableIV = [&](
PHINode *
P) {
6986 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
6993 if (IsSuitableIV(
P))
6997 for (
PHINode &
P : L.getHeader()->phis()) {
6998 if (IsSuitableIV(&
P))
7016 std::unique_ptr<MemorySSAUpdater> MSSAU;
7018 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7021 const LSRInstance &Reducer =
7022 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7023 Changed |= Reducer.getChanged();
7029 const DataLayout &
DL = L->getHeader()->getDataLayout();
7031#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7034 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7048 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7050 const DataLayout &
DL = L->getHeader()->getDataLayout();
7063 if (SalvageableDVIRecords.
empty())
7069 for (
const auto &L : LI) {
7073 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7074 "could not be identified.\n");
7078 for (
auto &Rec : SalvageableDVIRecords)
7080 SalvageableDVIRecords.
clear();
7084bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7088 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7089 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7090 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7091 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7092 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7093 *
L->getHeader()->getParent());
7094 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7095 *
L->getHeader()->getParent());
7096 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7097 *
L->getHeader()->getParent());
7098 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7101 MSSA = &MSSAAnalysis->getMSSA();
7118char LoopStrengthReduce::ID = 0;
7121 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
SCEVBinaryExpr_match< SCEVMulExpr, Op0_t, Op1_t > m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.