130using namespace SCEVPatternMatch;
132#define DEBUG_TYPE "loop-reduce"
149 cl::desc(
"Enable LSR phi elimination"));
154 cl::desc(
"Add instruction count to a LSR cost model"));
159 cl::desc(
"Narrow LSR complex solution using"
160 " expectation of registers number"));
166 cl::desc(
"Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
171 cl::desc(
"A flag that overrides the target's preferred addressing mode."),
174 "Don't prefer any addressing mode"),
177 "Prefer pre-indexed addressing mode"),
180 "Prefer post-indexed addressing mode")));
184 cl::init(std::numeric_limits<uint16_t>::max()),
185 cl::desc(
"LSR search space complexity limit"));
189 cl::desc(
"The limit on recursion depth for LSRs setup cost"));
193 cl::desc(
"Attempt to drop solution if it is less profitable"));
197 cl::desc(
"Enable analysis of vscale-relative immediates in LSR"));
201 cl::desc(
"Avoid using scaled registers with vscale-relative addressing"));
207 cl::desc(
"Stress test LSR IV chains"));
216 static const unsigned UnknownAddressSpace =
217 std::numeric_limits<unsigned>::max();
219 Type *MemTy =
nullptr;
220 unsigned AddrSpace = UnknownAddressSpace;
222 MemAccessTy() =
default;
223 MemAccessTy(
Type *Ty,
unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
226 return MemTy ==
Other.MemTy && AddrSpace ==
Other.AddrSpace;
232 unsigned AS = UnknownAddressSpace) {
253 constexpr Immediate(ScalarTy MinVal,
bool Scalable)
254 : FixedOrScalableQuantity(MinVal, Scalable) {}
256 constexpr Immediate(
const FixedOrScalableQuantity<Immediate, int64_t> &V)
257 : FixedOrScalableQuantity(
V) {}
260 constexpr Immediate() =
delete;
262 static constexpr Immediate getFixed(ScalarTy MinVal) {
263 return {MinVal,
false};
265 static constexpr Immediate getScalable(ScalarTy MinVal) {
266 return {MinVal,
true};
268 static constexpr Immediate
get(ScalarTy MinVal,
bool Scalable) {
269 return {MinVal, Scalable};
271 static constexpr Immediate getZero() {
return {0,
false}; }
272 static constexpr Immediate getFixedMin() {
273 return {std::numeric_limits<int64_t>::min(),
false};
275 static constexpr Immediate getFixedMax() {
276 return {std::numeric_limits<int64_t>::max(),
false};
278 static constexpr Immediate getScalableMin() {
279 return {std::numeric_limits<int64_t>::min(),
true};
281 static constexpr Immediate getScalableMax() {
282 return {std::numeric_limits<int64_t>::max(),
true};
285 constexpr bool isLessThanZero()
const {
return Quantity < 0; }
287 constexpr bool isGreaterThanZero()
const {
return Quantity > 0; }
289 constexpr bool isCompatibleImmediate(
const Immediate &Imm)
const {
290 return isZero() ||
Imm.isZero() ||
Imm.Scalable == Scalable;
293 constexpr bool isMin()
const {
294 return Quantity == std::numeric_limits<ScalarTy>::min();
297 constexpr bool isMax()
const {
298 return Quantity == std::numeric_limits<ScalarTy>::max();
302 constexpr Immediate addUnsigned(
const Immediate &RHS)
const {
303 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
305 return {
Value, Scalable ||
RHS.isScalable()};
308 constexpr Immediate subUnsigned(
const Immediate &RHS)
const {
309 assert(isCompatibleImmediate(RHS) &&
"Incompatible Immediates");
311 return {
Value, Scalable ||
RHS.isScalable()};
315 constexpr Immediate mulUnsigned(
const ScalarTy RHS)
const {
317 return {
Value, Scalable};
347struct KeyOrderTargetImmediate {
348 bool operator()(
const Immediate &LHS,
const Immediate &RHS)
const {
349 if (
LHS.isScalable() && !
RHS.isScalable())
351 if (!
LHS.isScalable() &&
RHS.isScalable())
353 return LHS.getKnownMinValue() <
RHS.getKnownMinValue();
360struct KeyOrderSizeTAndImmediate {
361 bool operator()(
const std::pair<size_t, Immediate> &LHS,
362 const std::pair<size_t, Immediate> &RHS)
const {
363 size_t LSize =
LHS.first;
364 size_t RSize =
RHS.first;
366 return LSize < RSize;
367 return KeyOrderTargetImmediate()(
LHS.second,
RHS.second);
372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
374 OS <<
"[NumUses=" << UsedByIndices.count() <<
']';
388 RegUsesTy RegUsesMap;
392 void countRegister(
const SCEV *Reg,
size_t LUIdx);
393 void dropRegister(
const SCEV *Reg,
size_t LUIdx);
394 void swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx);
396 bool isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const;
414RegUseTracker::countRegister(
const SCEV *Reg,
size_t LUIdx) {
415 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(Reg);
416 RegSortData &RSD = Pair.first->second;
419 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
420 RSD.UsedByIndices.set(LUIdx);
424RegUseTracker::dropRegister(
const SCEV *Reg,
size_t LUIdx) {
425 RegUsesTy::iterator It = RegUsesMap.find(Reg);
426 assert(It != RegUsesMap.end());
427 RegSortData &RSD = It->second;
428 assert(RSD.UsedByIndices.size() > LUIdx);
429 RSD.UsedByIndices.reset(LUIdx);
433RegUseTracker::swapAndDropUse(
size_t LUIdx,
size_t LastLUIdx) {
434 assert(LUIdx <= LastLUIdx);
438 for (
auto &Pair : RegUsesMap) {
440 if (LUIdx < UsedByIndices.
size())
441 UsedByIndices[LUIdx] =
442 LastLUIdx < UsedByIndices.
size() ? UsedByIndices[LastLUIdx] :
false;
443 UsedByIndices.
resize(std::min(UsedByIndices.
size(), LastLUIdx));
448RegUseTracker::isRegUsedByUsesOtherThan(
const SCEV *Reg,
size_t LUIdx)
const {
449 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
450 if (
I == RegUsesMap.end())
454 if (i == -1)
return false;
455 if ((
size_t)i != LUIdx)
return true;
460 RegUsesTy::const_iterator
I = RegUsesMap.find(Reg);
461 assert(
I != RegUsesMap.end() &&
"Unknown register!");
462 return I->second.UsedByIndices;
465void RegUseTracker::clear() {
479 Immediate BaseOffset = Immediate::getZero();
482 bool HasBaseReg =
false;
505 const SCEV *ScaledReg =
nullptr;
510 Immediate UnfoldedOffset = Immediate::getZero();
518 void canonicalize(
const Loop &L);
522 bool hasZeroEnd()
const;
524 bool countsDownToZero()
const;
526 size_t getNumRegs()
const;
529 void deleteBaseReg(
const SCEV *&S);
531 bool referencesReg(
const SCEV *S)
const;
532 bool hasRegsUsedByUsesOtherThan(
size_t LUIdx,
533 const RegUseTracker &RegUses)
const;
554 for (
const SCEV *S :
Add->operands())
560 const SCEV *Start, *Step;
582 const SCEV *NegOne = SE.
getSCEV(ConstantInt::getAllOnesValue(
584 for (
const SCEV *S : MyGood)
586 for (
const SCEV *S : MyBad)
605 BaseRegs.push_back(Sum);
611 BaseRegs.push_back(Sum);
619 return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
626bool Formula::isCanonical(
const Loop &L)
const {
627 assert((Scale == 0 || ScaledReg) &&
628 "ScaledReg must be non-null if Scale is non-zero");
631 return BaseRegs.size() <= 1;
636 if (Scale == 1 && BaseRegs.empty())
656void Formula::canonicalize(
const Loop &L) {
660 if (BaseRegs.empty()) {
662 assert(ScaledReg &&
"Expected 1*reg => reg");
663 assert(Scale == 1 &&
"Expected 1*reg => reg");
664 BaseRegs.push_back(ScaledReg);
672 ScaledReg = BaseRegs.pop_back_val();
683 if (
I != BaseRegs.end())
693bool Formula::unscale() {
697 BaseRegs.push_back(ScaledReg);
702bool Formula::hasZeroEnd()
const {
703 if (UnfoldedOffset || BaseOffset)
705 if (BaseRegs.size() != 1 || ScaledReg)
710bool Formula::countsDownToZero()
const {
713 assert(BaseRegs.size() == 1 &&
"hasZeroEnd should mean one BaseReg");
714 const APInt *StepInt;
722size_t Formula::getNumRegs()
const {
723 return !!ScaledReg + BaseRegs.size();
728Type *Formula::getType()
const {
729 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
730 ScaledReg ? ScaledReg->getType() :
731 BaseGV ? BaseGV->getType() :
736void Formula::deleteBaseReg(
const SCEV *&S) {
737 if (&S != &BaseRegs.back())
743bool Formula::referencesReg(
const SCEV *S)
const {
749bool Formula::hasRegsUsedByUsesOtherThan(
size_t LUIdx,
750 const RegUseTracker &RegUses)
const {
752 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
754 for (
const SCEV *BaseReg : BaseRegs)
755 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
765 BaseGV->printAsOperand(
OS,
false);
767 if (BaseOffset.isNonZero()) {
771 for (
const SCEV *BaseReg : BaseRegs) {
775 if (HasBaseReg && BaseRegs.empty()) {
777 OS <<
"**error: HasBaseReg**";
778 }
else if (!HasBaseReg && !BaseRegs.empty()) {
780 OS <<
"**error: !HasBaseReg**";
784 OS << Scale <<
"*reg(";
791 if (UnfoldedOffset.isNonZero()) {
793 OS <<
"imm(" << UnfoldedOffset <<
')';
834 bool IgnoreSignificantBits =
false) {
845 if (
RA.isAllOnes()) {
859 const APInt &LA =
C->getAPInt();
868 if ((IgnoreSignificantBits ||
isAddRecSExtable(AR, SE)) && AR->isAffine()) {
870 IgnoreSignificantBits);
871 if (!Step)
return nullptr;
873 IgnoreSignificantBits);
874 if (!Start)
return nullptr;
887 for (
const SCEV *S :
Add->operands()) {
889 if (!
Op)
return nullptr;
905 dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
920 IgnoreSignificantBits)) {
940 if (
C->getSignificantBits() <= 64) {
942 return Immediate::getFixed(
C->getSExtValue());
947 if (Result.isNonZero())
950 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
953 if (Result.isNonZero())
961 return Immediate::getScalable(
C->getSExtValue());
963 return Immediate::getZero();
969 if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
970 if (
GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
980 }
else if (
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
996 bool isAddress = isa<LoadInst>(Inst);
997 if (
StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
998 if (SI->getPointerOperand() == OperandVal)
1003 switch (
II->getIntrinsicID()) {
1004 case Intrinsic::memset:
1005 case Intrinsic::prefetch:
1006 case Intrinsic::masked_load:
1007 if (
II->getArgOperand(0) == OperandVal)
1010 case Intrinsic::masked_store:
1011 if (
II->getArgOperand(1) == OperandVal)
1014 case Intrinsic::memmove:
1015 case Intrinsic::memcpy:
1016 if (
II->getArgOperand(0) == OperandVal ||
1017 II->getArgOperand(1) == OperandVal)
1023 if (IntrInfo.
PtrVal == OperandVal)
1028 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1029 if (RMW->getPointerOperand() == OperandVal)
1032 if (CmpX->getPointerOperand() == OperandVal)
1041 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->
getContext());
1045 AccessTy.MemTy = Ty;
1048 if (
const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1049 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1050 }
else if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1051 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1052 }
else if (
const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
1053 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1055 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1057 switch (
II->getIntrinsicID()) {
1058 case Intrinsic::prefetch:
1059 case Intrinsic::memset:
1060 AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
1061 AccessTy.MemTy = OperandVal->
getType();
1063 case Intrinsic::memmove:
1064 case Intrinsic::memcpy:
1066 AccessTy.MemTy = OperandVal->
getType();
1068 case Intrinsic::masked_load:
1069 AccessTy.AddrSpace =
1070 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1072 case Intrinsic::masked_store:
1073 AccessTy.AddrSpace =
1074 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1134 if (!Processed.
insert(S).second)
1138 for (
const SCEV *S :
Add->operands()) {
1145 const SCEV *Op0, *Op1;
1148 if (isa<SCEVConstant>(Op0))
1153 if (
const auto *U = dyn_cast<SCEVUnknown>(Op1)) {
1154 Value *UVal = U->getValue();
1158 if (UI && UI->
getOpcode() == Instruction::Mul &&
1191 const LSRUse &LU,
const Formula &
F);
1195 const LSRUse &LU,
const Formula &
F,
1202 const Loop *
L =
nullptr;
1212 L(
L), SE(&SE),
TTI(&
TTI), AMK(AMK) {
1230 return ((
C.Insns |
C.NumRegs |
C.AddRecCost |
C.NumIVMuls |
C.NumBaseAdds
1231 |
C.ImmCost |
C.SetupCost |
C.ScaleCost) != ~0u)
1232 || ((
C.Insns &
C.NumRegs &
C.AddRecCost &
C.NumIVMuls &
C.NumBaseAdds
1233 &
C.ImmCost &
C.SetupCost &
C.ScaleCost) == ~0u);
1239 return C.NumRegs == ~0
u;
1244 bool HardwareLoopProfitable,
1251 void RateRegister(
const Formula &
F,
const SCEV *
Reg,
1253 bool HardwareLoopProfitable);
1254 void RatePrimaryRegister(
const Formula &
F,
const SCEV *
Reg,
1256 const LSRUse &LU,
bool HardwareLoopProfitable,
1268 Value *OperandValToReplace =
nullptr;
1278 Immediate
Offset = Immediate::getZero();
1280 LSRFixup() =
default;
1282 bool isUseFullyOutsideLoop(
const Loop *L)
const;
1310 MemAccessTy AccessTy;
1316 Immediate MinOffset = Immediate::getFixedMax();
1317 Immediate MaxOffset = Immediate::getFixedMin();
1321 bool AllFixupsOutsideLoop =
true;
1328 bool RigidFormula =
false;
1334 Type *WidestFixupType =
nullptr;
1344 LSRUse(KindType K, MemAccessTy AT) :
Kind(
K), AccessTy(AT) {}
1346 LSRFixup &getNewFixup() {
1347 Fixups.push_back(LSRFixup());
1351 void pushFixup(LSRFixup &f) {
1353 if (Immediate::isKnownGT(
f.Offset, MaxOffset))
1354 MaxOffset =
f.Offset;
1355 if (Immediate::isKnownLT(
f.Offset, MinOffset))
1356 MinOffset =
f.Offset;
1359 bool HasFormulaWithSameRegs(
const Formula &
F)
const;
1360 float getNotSelectedProbability(
const SCEV *Reg)
const;
1361 bool InsertFormula(
const Formula &
F,
const Loop &L);
1362 void DeleteFormula(Formula &
F);
1363 void RecomputeRegs(
size_t LUIdx, RegUseTracker &Reguses);
1372 LSRUse::KindType Kind, MemAccessTy AccessTy,
1374 bool HasBaseReg, int64_t Scale,
1378 if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
1382 if (
const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
1384 if (
auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
1386 if (
auto S = dyn_cast<SCEVNAryExpr>(Reg))
1388 [&](
unsigned i,
const SCEV *Reg) {
1389 return i + getSetupCost(Reg, Depth - 1);
1391 if (
auto S = dyn_cast<SCEVUDivExpr>(Reg))
1398void Cost::RateRegister(
const Formula &
F,
const SCEV *Reg,
1400 bool HardwareLoopProfitable) {
1405 if (AR->getLoop() != L) {
1412 if (!AR->getLoop()->contains(L)) {
1422 unsigned LoopCost = 1;
1431 Step->
getAPInt() ==
F.BaseOffset.getFixedValue()) ||
1438 if (LU.Kind == LSRUse::ICmpZero &&
F.countsDownToZero() &&
1439 HardwareLoopProfitable)
1441 C.AddRecCost += LoopCost;
1445 if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
1446 if (!Regs.
count(AR->getOperand(1))) {
1447 RateRegister(
F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1459 C.SetupCost = std::min<unsigned>(
C.SetupCost, 1 << 16);
1461 C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
1468void Cost::RatePrimaryRegister(
const Formula &
F,
const SCEV *Reg,
1470 const LSRUse &LU,
bool HardwareLoopProfitable,
1472 if (LoserRegs && LoserRegs->
count(Reg)) {
1476 if (Regs.
insert(Reg).second) {
1477 RateRegister(
F, Reg, Regs, LU, HardwareLoopProfitable);
1478 if (LoserRegs && isLoser())
1485 const LSRUse &LU,
bool HardwareLoopProfitable,
1489 assert(
F.isCanonical(*L) &&
"Cost is accurate only for canonical formula");
1491 unsigned PrevAddRecCost =
C.AddRecCost;
1492 unsigned PrevNumRegs =
C.NumRegs;
1493 unsigned PrevNumBaseAdds =
C.NumBaseAdds;
1494 if (
const SCEV *ScaledReg =
F.ScaledReg) {
1495 if (VisitedRegs.
count(ScaledReg)) {
1499 RatePrimaryRegister(
F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1504 for (
const SCEV *BaseReg :
F.BaseRegs) {
1505 if (VisitedRegs.
count(BaseReg)) {
1509 RatePrimaryRegister(
F, BaseReg, Regs, LU, HardwareLoopProfitable,
1516 size_t NumBaseParts =
F.getNumRegs();
1517 if (NumBaseParts > 1)
1522 C.NumBaseAdds += (
F.UnfoldedOffset.isNonZero());
1528 for (
const LSRFixup &
Fixup : LU.Fixups) {
1529 if (
Fixup.Offset.isCompatibleImmediate(
F.BaseOffset)) {
1530 Immediate
Offset =
Fixup.Offset.addUnsigned(
F.BaseOffset);
1534 else if (
Offset.isNonZero())
1540 if (LU.Kind == LSRUse::Address &&
Offset.isNonZero() &&
1561 if (
C.NumRegs > TTIRegNum) {
1564 if (PrevNumRegs > TTIRegNum)
1565 C.Insns += (
C.NumRegs - PrevNumRegs);
1567 C.Insns += (
C.NumRegs - TTIRegNum);
1580 if (LU.Kind == LSRUse::ICmpZero && !
F.hasZeroEnd() &&
1584 C.Insns += (
C.AddRecCost - PrevAddRecCost);
1587 if (LU.Kind != LSRUse::ICmpZero)
1588 C.Insns +=
C.NumBaseAdds - PrevNumBaseAdds;
1594 C.Insns = std::numeric_limits<unsigned>::max();
1595 C.NumRegs = std::numeric_limits<unsigned>::max();
1596 C.AddRecCost = std::numeric_limits<unsigned>::max();
1597 C.NumIVMuls = std::numeric_limits<unsigned>::max();
1598 C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1599 C.ImmCost = std::numeric_limits<unsigned>::max();
1600 C.SetupCost = std::numeric_limits<unsigned>::max();
1601 C.ScaleCost = std::numeric_limits<unsigned>::max();
1605bool Cost::isLess(
const Cost &
Other)
const {
1607 C.Insns !=
Other.C.Insns)
1608 return C.Insns <
Other.C.Insns;
1612#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1615 OS <<
C.Insns <<
" instruction" << (
C.Insns == 1 ?
" " :
"s ");
1616 OS <<
C.NumRegs <<
" reg" << (
C.NumRegs == 1 ?
"" :
"s");
1617 if (
C.AddRecCost != 0)
1618 OS <<
", with addrec cost " <<
C.AddRecCost;
1619 if (
C.NumIVMuls != 0)
1620 OS <<
", plus " <<
C.NumIVMuls <<
" IV mul"
1621 << (
C.NumIVMuls == 1 ?
"" :
"s");
1622 if (
C.NumBaseAdds != 0)
1623 OS <<
", plus " <<
C.NumBaseAdds <<
" base add"
1624 << (
C.NumBaseAdds == 1 ?
"" :
"s");
1625 if (
C.ScaleCost != 0)
1626 OS <<
", plus " <<
C.ScaleCost <<
" scale cost";
1628 OS <<
", plus " <<
C.ImmCost <<
" imm cost";
1629 if (
C.SetupCost != 0)
1630 OS <<
", plus " <<
C.SetupCost <<
" setup cost";
1639bool LSRFixup::isUseFullyOutsideLoop(
const Loop *L)
const {
1641 if (
const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
1642 for (
unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1643 if (PN->getIncomingValue(i) == OperandValToReplace &&
1644 L->contains(PN->getIncomingBlock(i)))
1649 return !
L->contains(UserInst);
1652#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1656 if (
StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1658 Store->getOperand(0)->printAsOperand(
OS,
false);
1659 }
else if (UserInst->getType()->isVoidTy())
1660 OS << UserInst->getOpcodeName();
1662 UserInst->printAsOperand(
OS,
false);
1664 OS <<
", OperandValToReplace=";
1665 OperandValToReplace->printAsOperand(
OS,
false);
1667 for (
const Loop *PIL : PostIncLoops) {
1668 OS <<
", PostIncLoop=";
1669 PIL->getHeader()->printAsOperand(
OS,
false);
1683bool LSRUse::HasFormulaWithSameRegs(
const Formula &
F)
const {
1685 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1688 return Uniquifier.
count(Key);
1692float LSRUse::getNotSelectedProbability(
const SCEV *Reg)
const {
1694 for (
const Formula &
F : Formulae)
1695 if (
F.referencesReg(Reg))
1697 return ((
float)(Formulae.size() - FNum)) / Formulae.size();
1702bool LSRUse::InsertFormula(
const Formula &
F,
const Loop &L) {
1703 assert(
F.isCanonical(L) &&
"Invalid canonical representation");
1705 if (!Formulae.empty() && RigidFormula)
1709 if (
F.ScaledReg)
Key.push_back(
F.ScaledReg);
1713 if (!Uniquifier.
insert(Key).second)
1717 assert((!
F.ScaledReg || !
F.ScaledReg->isZero()) &&
1718 "Zero allocated in a scaled register!");
1720 for (
const SCEV *BaseReg :
F.BaseRegs)
1721 assert(!
BaseReg->isZero() &&
"Zero allocated in a base register!");
1725 Formulae.push_back(
F);
1736void LSRUse::DeleteFormula(Formula &
F) {
1737 if (&
F != &Formulae.back())
1739 Formulae.pop_back();
1743void LSRUse::RecomputeRegs(
size_t LUIdx, RegUseTracker &RegUses) {
1747 for (
const Formula &
F : Formulae) {
1748 if (
F.ScaledReg) Regs.
insert(
F.ScaledReg);
1753 for (
const SCEV *S : OldRegs)
1755 RegUses.dropRegister(S, LUIdx);
1758#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1760 OS <<
"LSR Use: Kind=";
1762 case Basic:
OS <<
"Basic";
break;
1763 case Special:
OS <<
"Special";
break;
1764 case ICmpZero:
OS <<
"ICmpZero";
break;
1766 OS <<
"Address of ";
1767 if (AccessTy.MemTy->isPointerTy())
1770 OS << *AccessTy.MemTy;
1773 OS <<
" in addrspace(" << AccessTy.AddrSpace <<
')';
1776 OS <<
", Offsets={";
1777 bool NeedComma =
false;
1778 for (
const LSRFixup &
Fixup : Fixups) {
1779 if (NeedComma)
OS <<
',';
1785 if (AllFixupsOutsideLoop)
1786 OS <<
", all-fixups-outside-loop";
1788 if (WidestFixupType)
1789 OS <<
", widest fixup type: " << *WidestFixupType;
1798 LSRUse::KindType Kind, MemAccessTy AccessTy,
1800 bool HasBaseReg, int64_t Scale,
1803 case LSRUse::Address: {
1804 int64_t FixedOffset =
1805 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1806 int64_t ScalableOffset =
1807 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1809 HasBaseReg, Scale, AccessTy.AddrSpace,
1810 Fixup, ScalableOffset);
1812 case LSRUse::ICmpZero:
1819 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1824 if (Scale != 0 && Scale != -1)
1829 if (BaseOffset.isNonZero()) {
1832 if (BaseOffset.isScalable())
1842 BaseOffset = BaseOffset.getFixed(-(
uint64_t)BaseOffset.getFixedValue());
1851 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1853 case LSRUse::Special:
1855 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1862 Immediate MinOffset, Immediate MaxOffset,
1863 LSRUse::KindType Kind, MemAccessTy AccessTy,
1865 bool HasBaseReg, int64_t Scale) {
1866 if (BaseOffset.isNonZero() &&
1867 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1868 BaseOffset.isScalable() != MaxOffset.isScalable()))
1871 int64_t
Base = BaseOffset.getKnownMinValue();
1872 int64_t Min = MinOffset.getKnownMinValue();
1873 int64_t Max = MaxOffset.getKnownMinValue();
1876 MinOffset = Immediate::get((
uint64_t)
Base + Min, MinOffset.isScalable());
1879 MaxOffset = Immediate::get((
uint64_t)
Base + Max, MaxOffset.isScalable());
1882 HasBaseReg, Scale) &&
1888 Immediate MinOffset, Immediate MaxOffset,
1889 LSRUse::KindType Kind, MemAccessTy AccessTy,
1890 const Formula &
F,
const Loop &L) {
1898 assert((
F.isCanonical(L) ||
F.Scale != 0));
1900 F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
F.Scale);
1905 Immediate MaxOffset, LSRUse::KindType Kind,
1907 Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
1910 BaseOffset, HasBaseReg, Scale) ||
1915 BaseGV, BaseOffset,
true, 0));
1919 Immediate MaxOffset, LSRUse::KindType Kind,
1920 MemAccessTy AccessTy,
const Formula &
F) {
1921 return isLegalUse(
TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV,
1922 F.BaseOffset,
F.HasBaseReg,
F.Scale);
1934 const LSRUse &LU,
const Formula &
F) {
1937 for (
const LSRFixup &
Fixup : LU.Fixups)
1939 (
F.BaseOffset +
Fixup.Offset),
F.HasBaseReg,
1940 F.Scale,
Fixup.UserInst))
1946 LU.AccessTy,
F.BaseGV,
F.BaseOffset,
F.HasBaseReg,
1951 const LSRUse &LU,
const Formula &
F,
1960 return F.Scale != 1;
1963 case LSRUse::Address: {
1965 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1966 if (
F.BaseOffset.isScalable()) {
1967 ScalableMin = (
F.BaseOffset + LU.MinOffset).getKnownMinValue();
1968 ScalableMax = (
F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1970 FixedMin = (
F.BaseOffset + LU.MinOffset).getFixedValue();
1971 FixedMax = (
F.BaseOffset + LU.MaxOffset).getFixedValue();
1975 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1978 F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
1981 "Legal addressing mode has an illegal cost!");
1982 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1984 case LSRUse::ICmpZero:
1986 case LSRUse::Special:
1996 LSRUse::KindType Kind, MemAccessTy AccessTy,
2000 if (BaseOffset.isZero() && !BaseGV)
2005 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2009 if (!HasBaseReg && Scale == 1) {
2019 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2029 Immediate MaxOffset, LSRUse::KindType Kind,
2030 MemAccessTy AccessTy,
const SCEV *S,
2033 if (S->
isZero())
return true;
2041 if (!S->
isZero())
return false;
2044 if (BaseOffset.isZero() && !BaseGV)
2047 if (BaseOffset.isScalable())
2052 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2055 BaseOffset, HasBaseReg, Scale);
2072 const SCEV *IncExpr;
2075 : UserInst(
U), IVOperand(
O), IncExpr(E) {}
2082 const SCEV *ExprBase =
nullptr;
2084 IVChain() =
default;
2085 IVChain(
const IVInc &Head,
const SCEV *
Base)
2086 : Incs(1, Head), ExprBase(
Base) {}
2093 return std::next(Incs.
begin());
2100 bool hasIncs()
const {
return Incs.
size() >= 2; }
2109 bool isProfitableIncrement(
const SCEV *OperExpr,
2110 const SCEV *IncExpr,
2135 bool Changed =
false;
2136 bool HardwareLoopProfitable =
false;
2163 RegUseTracker RegUses;
2168 static const unsigned MaxChains = 8;
2185 void OptimizeShadowIV();
2188 void OptimizeLoopTermCond();
2192 void FinalizeChain(IVChain &Chain);
2193 void CollectChains();
2194 void GenerateIVChain(
const IVChain &Chain,
2197 void CollectInterestingTypesAndFactors();
2198 void CollectFixupsAndInitialFormulae();
2204 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg,
2205 LSRUse::KindType Kind, MemAccessTy AccessTy);
2207 std::pair<size_t, Immediate> getUse(
const SCEV *&Expr, LSRUse::KindType Kind,
2208 MemAccessTy AccessTy);
2210 void DeleteUse(LSRUse &LU,
size_t LUIdx);
2212 LSRUse *FindUseWithSimilarFormula(
const Formula &
F,
const LSRUse &OrigLU);
2214 void InsertInitialFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2215 void InsertSupplementalFormula(
const SCEV *S, LSRUse &LU,
size_t LUIdx);
2216 void CountRegisters(
const Formula &
F,
size_t LUIdx);
2217 bool InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F);
2219 void CollectLoopInvariantFixupsAndFormulae();
2221 void GenerateReassociations(LSRUse &LU,
unsigned LUIdx, Formula
Base,
2222 unsigned Depth = 0);
2224 void GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
2226 size_t Idx,
bool IsScaledReg =
false);
2227 void GenerateCombinations(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2228 void GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2229 const Formula &
Base,
size_t Idx,
2230 bool IsScaledReg =
false);
2231 void GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2232 void GenerateConstantOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
2233 const Formula &
Base,
2235 size_t Idx,
bool IsScaledReg =
false);
2236 void GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2237 void GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2238 void GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2239 void GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base);
2240 void GenerateCrossUseConstantOffsets();
2241 void GenerateAllReuseFormulae();
2243 void FilterOutUndesirableDedicatedRegisters();
2245 size_t EstimateSearchSpaceComplexity()
const;
2246 void NarrowSearchSpaceByDetectingSupersets();
2247 void NarrowSearchSpaceByCollapsingUnrolledCode();
2248 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2249 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2250 void NarrowSearchSpaceByFilterPostInc();
2251 void NarrowSearchSpaceByDeletingCostlyFormulas();
2252 void NarrowSearchSpaceByPickingWinnerRegs();
2253 void NarrowSearchSpaceUsingHeuristics();
2258 const Cost &CurCost,
2268 const LSRUse &LU)
const;
2270 Value *Expand(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2273 void RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
const LSRFixup &LF,
2276 void Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
const Formula &
F,
2285 bool getChanged()
const {
return Changed; }
2287 return ScalarEvolutionIVs;
2301void LSRInstance::OptimizeShadowIV() {
2303 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2311 Type *DestTy =
nullptr;
2312 bool IsSigned =
false;
2326 if (
UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
2328 DestTy = UCast->getDestTy();
2330 else if (
SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
2332 DestTy = SCast->getDestTy();
2334 if (!DestTy)
continue;
2354 if (Mantissa == -1)
continue;
2358 unsigned Entry, Latch;
2368 if (!
Init)
continue;
2369 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2370 (
double)
Init->getSExtValue() :
2371 (
double)
Init->getZExtValue());
2375 if (!Incr)
continue;
2376 if (Incr->
getOpcode() != Instruction::Add
2377 && Incr->
getOpcode() != Instruction::Sub)
2393 if (!
C->getValue().isStrictlyPositive())
2401 Constant *CFP = ConstantFP::get(DestTy,
C->getZExtValue());
2403 Incr->
getOpcode() == Instruction::Add ? Instruction::FAdd
2404 : Instruction::FSub,
2423 if (
U.getUser() ==
Cond) {
2491 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2496 const SCEV *IterationCount = SE.
getAddExpr(One, BackedgeTakenCount);
2497 if (IterationCount != SE.
getSCEV(Sel))
return Cond;
2504 if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
2505 Pred = ICmpInst::ICMP_SLE;
2507 }
else if (
const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
2508 Pred = ICmpInst::ICMP_SLT;
2510 }
else if (
const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
2511 Pred = ICmpInst::ICMP_ULT;
2520 if (
Max->getNumOperands() != 2)
2523 const SCEV *MaxLHS =
Max->getOperand(0);
2524 const SCEV *MaxRHS =
Max->getOperand(1);
2529 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->
isZero() : (MaxLHS != One)))
2539 assert(cast<SCEVAddRecExpr>(
IV)->getLoop() == L &&
2540 "Loop condition operand is an addrec in a different loop!");
2544 Value *NewRHS =
nullptr;
2545 if (ICmpInst::isTrueWhenEqual(Pred)) {
2548 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2549 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2550 NewRHS = BO->getOperand(0);
2552 if (
ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
2553 if (BO1->isOne() && SE.
getSCEV(BO->getOperand(0)) == MaxRHS)
2554 NewRHS = BO->getOperand(0);
2561 else if (
const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
2562 NewRHS = SU->getValue();
2575 Cond->getOperand(0), NewRHS,
"scmp");
2579 Cond->replaceAllUsesWith(NewCond);
2582 Cond->eraseFromParent();
2584 if (
Cmp->use_empty()) {
2586 Cmp->eraseFromParent();
2593LSRInstance::OptimizeLoopTermCond() {
2610 L->getExitingBlocks(ExitingBlocks);
2618 for (
BasicBlock *ExitingBlock : ExitingBlocks) {
2624 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
2634 if (!FindIVUserForCond(
Cond, CondUse))
2648 if (!DT.
dominates(ExitingBlock, LatchBlock))
2653 if (LatchBlock != ExitingBlock)
2657 if (&UI != CondUse &&
2661 const SCEV *
A = IU.getStride(*CondUse, L);
2662 const SCEV *
B = IU.getStride(UI, L);
2663 if (!
A || !
B)
continue;
2676 if (
C->isOne() ||
C->isMinusOne())
2677 goto decline_post_inc;
2679 if (
C->getValue().getSignificantBits() >= 64 ||
2680 C->getValue().isMinSignedValue())
2681 goto decline_post_inc;
2684 MemAccessTy AccessTy =
2686 int64_t Scale =
C->getSExtValue();
2690 AccessTy.AddrSpace))
2691 goto decline_post_inc;
2696 AccessTy.AddrSpace))
2697 goto decline_post_inc;
2702 LLVM_DEBUG(
dbgs() <<
" Change loop exiting icmp to use postinc iv: "
2708 if (
Cond->getNextNode() != TermBr) {
2709 if (
Cond->hasOneUse()) {
2714 Cond = cast<ICmpInst>(
Cond->clone());
2715 Cond->setName(
L->getHeader()->getName() +
".termcond");
2737 IVIncInsertPos =
L->getLoopLatch()->getTerminator();
2744bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2745 bool HasBaseReg, LSRUse::KindType Kind,
2746 MemAccessTy AccessTy) {
2747 Immediate NewMinOffset = LU.MinOffset;
2748 Immediate NewMaxOffset = LU.MaxOffset;
2749 MemAccessTy NewAccessTy = AccessTy;
2754 if (LU.Kind != Kind)
2760 if (Kind == LSRUse::Address) {
2761 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2762 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2763 AccessTy.AddrSpace);
2768 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2770 LU.MaxOffset - NewOffset, HasBaseReg))
2772 NewMinOffset = NewOffset;
2773 }
else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2775 NewOffset - LU.MinOffset, HasBaseReg))
2777 NewMaxOffset = NewOffset;
2783 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2784 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2788 LU.MinOffset = NewMinOffset;
2789 LU.MaxOffset = NewMaxOffset;
2790 LU.AccessTy = NewAccessTy;
2797std::pair<size_t, Immediate> LSRInstance::getUse(
const SCEV *&Expr,
2798 LSRUse::KindType Kind,
2799 MemAccessTy AccessTy) {
2807 Offset = Immediate::getFixed(0);
2810 std::pair<UseMapTy::iterator, bool>
P =
2814 size_t LUIdx =
P.first->second;
2815 LSRUse &LU =
Uses[LUIdx];
2816 if (reconcileNewOffset(LU,
Offset,
true, Kind, AccessTy))
2818 return std::make_pair(LUIdx,
Offset);
2822 size_t LUIdx =
Uses.size();
2823 P.first->second = LUIdx;
2824 Uses.push_back(LSRUse(Kind, AccessTy));
2825 LSRUse &LU =
Uses[LUIdx];
2829 return std::make_pair(LUIdx,
Offset);
2833void LSRInstance::DeleteUse(LSRUse &LU,
size_t LUIdx) {
2834 if (&LU != &
Uses.back())
2839 RegUses.swapAndDropUse(LUIdx,
Uses.size());
2845LSRInstance::FindUseWithSimilarFormula(
const Formula &OrigF,
2846 const LSRUse &OrigLU) {
2848 for (LSRUse &LU :
Uses) {
2854 if (&LU != &OrigLU &&
2855 LU.Kind != LSRUse::ICmpZero &&
2856 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2857 LU.WidestFixupType == OrigLU.WidestFixupType &&
2858 LU.HasFormulaWithSameRegs(OrigF)) {
2860 for (
const Formula &
F : LU.Formulae) {
2863 if (
F.BaseRegs == OrigF.BaseRegs &&
2864 F.ScaledReg == OrigF.ScaledReg &&
2865 F.BaseGV == OrigF.BaseGV &&
2866 F.Scale == OrigF.Scale &&
2867 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2868 if (
F.BaseOffset.isZero())
2883void LSRInstance::CollectInterestingTypesAndFactors() {
2889 const SCEV *Expr = IU.getExpr(U);
2907 }
while (!Worklist.
empty());
2912 I = Strides.
begin(), E = Strides.
end();
I != E; ++
I)
2914 std::next(
I); NewStrideIter != E; ++NewStrideIter) {
2915 const SCEV *OldStride = *
I;
2916 const SCEV *NewStride = *NewStrideIter;
2927 dyn_cast_or_null<SCEVConstant>(
getExactSDiv(NewStride, OldStride,
2929 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2930 Factors.insert(Factor->getAPInt().getSExtValue());
2935 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2936 Factors.insert(Factor->getAPInt().getSExtValue());
2942 if (
Types.size() == 1)
2954 for(; OI != OE; ++OI) {
2955 if (
Instruction *Oper = dyn_cast<Instruction>(*OI)) {
2960 dyn_cast<SCEVAddRecExpr>(SE.
getSCEV(Oper))) {
2972 if (
TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
2973 return Trunc->getOperand(0);
2995 return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
2997 return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
2999 return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
3006 if (SubExpr->getSCEVType() ==
scAddExpr)
3009 if (SubExpr->getSCEVType() !=
scMulExpr)
3015 return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
3025bool IVChain::isProfitableIncrement(
const SCEV *OperExpr,
3026 const SCEV *IncExpr,
3034 if (!isa<SCEVConstant>(IncExpr)) {
3036 if (isa<SCEVConstant>(SE.
getMinusSCEV(OperExpr, HeadExpr)))
3061 if (!Chain.hasIncs())
3064 if (!
Users.empty()) {
3065 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" users:\n";
3067 :
Users) {
dbgs() <<
" " << *Inst <<
"\n"; });
3070 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3078 if (isa<PHINode>(Chain.tailUserInst())
3079 && SE.
getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3082 const SCEV *LastIncExpr =
nullptr;
3083 unsigned NumConstIncrements = 0;
3084 unsigned NumVarIncrements = 0;
3085 unsigned NumReusedIncrements = 0;
3090 for (
const IVInc &Inc : Chain) {
3093 if (Inc.IncExpr->isZero())
3098 if (isa<SCEVConstant>(Inc.IncExpr)) {
3099 ++NumConstIncrements;
3103 if (Inc.IncExpr == LastIncExpr)
3104 ++NumReusedIncrements;
3108 LastIncExpr = Inc.IncExpr;
3113 if (NumConstIncrements > 1)
3120 cost += NumVarIncrements;
3124 cost -= NumReusedIncrements;
3126 LLVM_DEBUG(
dbgs() <<
"Chain: " << *Chain.Incs[0].UserInst <<
" Cost: " << cost
3143 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3144 const SCEV *LastIncExpr =
nullptr;
3145 for (; ChainIdx < NChains; ++ChainIdx) {
3146 IVChain &Chain = IVChainVec[ChainIdx];
3160 if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
3166 if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.
isLoopInvariant(IncExpr, L))
3169 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3170 LastIncExpr = IncExpr;
3176 if (ChainIdx == NChains) {
3177 if (isa<PHINode>(UserInst))
3183 LastIncExpr = OperExpr;
3187 if (!isa<SCEVAddRecExpr>(LastIncExpr))
3190 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3192 ChainUsersVec.
resize(NChains);
3193 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Head: (" << *UserInst
3194 <<
") IV=" << *LastIncExpr <<
"\n");
3196 LLVM_DEBUG(
dbgs() <<
"IV Chain#" << ChainIdx <<
" Inc: (" << *UserInst
3197 <<
") IV+" << *LastIncExpr <<
"\n");
3199 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3201 IVChain &Chain = IVChainVec[ChainIdx];
3205 if (!LastIncExpr->
isZero()) {
3206 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3221 IVChain::const_iterator IncIter = Chain.Incs.begin();
3222 IVChain::const_iterator IncEnd = Chain.Incs.end();
3223 for( ; IncIter != IncEnd; ++IncIter) {
3224 if (IncIter->UserInst == OtherUse)
3227 if (IncIter != IncEnd)
3231 && !isa<SCEVUnknown>(SE.
getSCEV(OtherUse))
3232 && IU.isIVUserOrOperand(OtherUse)) {
3235 NearUsers.
insert(OtherUse);
3240 ChainUsersVec[ChainIdx].FarUsers.
erase(UserInst);
3265void LSRInstance::CollectChains() {
3272 Rung->
getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3281 if (isa<PHINode>(
I) || !IU.isIVUserOrOperand(&
I))
3291 for (
unsigned ChainIdx = 0, NChains = IVChainVec.size();
3292 ChainIdx < NChains; ++ChainIdx) {
3293 ChainUsersVec[ChainIdx].NearUsers.
erase(&
I);
3299 while (IVOpIter != IVOpEnd) {
3300 Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
3301 if (UniqueOperands.
insert(IVOpInst).second)
3302 ChainInstruction(&
I, IVOpInst, ChainUsersVec);
3303 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3308 for (
PHINode &PN :
L->getHeader()->phis()) {
3313 dyn_cast<Instruction>(PN.getIncomingValueForBlock(
L->getLoopLatch()));
3315 ChainInstruction(&PN, IncV, ChainUsersVec);
3318 unsigned ChainIdx = 0;
3319 for (
unsigned UsersIdx = 0, NChains = IVChainVec.size();
3320 UsersIdx < NChains; ++UsersIdx) {
3322 ChainUsersVec[UsersIdx].FarUsers, SE,
TTI))
3325 if (ChainIdx != UsersIdx)
3326 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3327 FinalizeChain(IVChainVec[ChainIdx]);
3330 IVChainVec.resize(ChainIdx);
3333void LSRInstance::FinalizeChain(IVChain &Chain) {
3334 assert(!Chain.Incs.empty() &&
"empty IV chains are not allowed");
3335 LLVM_DEBUG(
dbgs() <<
"Final Chain: " << *Chain.Incs[0].UserInst <<
"\n");
3337 for (
const IVInc &Inc : Chain) {
3339 auto UseI =
find(Inc.UserInst->operands(), Inc.IVOperand);
3340 assert(UseI != Inc.UserInst->op_end() &&
"cannot find IV operand");
3341 IVIncSet.insert(UseI);
3348 const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
3349 Immediate IncOffset = Immediate::getZero();
3358 C->getSignificantBits() > 64)
3360 IncOffset = Immediate::getScalable(
C->getSExtValue());
3376void LSRInstance::GenerateIVChain(
const IVChain &Chain,
3380 const IVInc &Head = Chain.Incs[0];
3385 Value *IVSrc =
nullptr;
3386 while (IVOpIter != IVOpEnd) {
3397 if (SE.
getSCEV(*IVOpIter) == Head.IncExpr
3398 || SE.
getSCEV(IVSrc) == Head.IncExpr) {
3401 IVOpIter =
findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3403 if (IVOpIter == IVOpEnd) {
3405 LLVM_DEBUG(
dbgs() <<
"Concealed chain head: " << *Head.UserInst <<
"\n");
3408 assert(IVSrc &&
"Failed to find IV chain source");
3413 const SCEV *LeftOverExpr =
nullptr;
3418 for (
const IVInc &Inc : Chain) {
3420 if (isa<PHINode>(InsertPt))
3421 InsertPt =
L->getLoopLatch()->getTerminator();
3425 Value *IVOper = IVSrc;
3426 if (!Inc.IncExpr->isZero()) {
3431 LeftOverExpr = LeftOverExpr ?
3432 SE.
getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3436 bool FoundBase =
false;
3437 for (
auto [MapScev, MapIVOper] :
reverse(Bases)) {
3440 if (!Remainder->
isZero()) {
3442 Value *IncV =
Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3443 const SCEV *IVOperExpr =
3445 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3454 if (!FoundBase && LeftOverExpr && !LeftOverExpr->
isZero()) {
3457 Value *IncV =
Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3460 IVOper =
Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3464 assert(IVTy == IVOper->
getType() &&
"inconsistent IV increment type");
3467 LeftOverExpr =
nullptr;
3470 Type *OperTy = Inc.IVOperand->getType();
3471 if (IVTy != OperTy) {
3473 "cannot extend a chained IV");
3475 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy,
"lsr.chain");
3477 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3478 if (
auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3483 if (isa<PHINode>(Chain.tailUserInst())) {
3484 for (
PHINode &Phi :
L->getHeader()->phis()) {
3488 Phi.getIncomingValueForBlock(
L->getLoopLatch()));
3491 Value *IVOper = IVSrc;
3493 if (IVTy != PostIncTy) {
3495 IRBuilder<> Builder(
L->getLoopLatch()->getTerminator());
3496 Builder.SetCurrentDebugLocation(PostIncV->
getDebugLoc());
3497 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy,
"lsr.chain");
3499 Phi.replaceUsesOfWith(PostIncV, IVOper);
3505void LSRInstance::CollectFixupsAndInitialFormulae() {
3507 bool SaveCmp =
TTI.
canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3519 assert(UseI != UserInst->
op_end() &&
"cannot find IV operand");
3520 if (IVIncSet.count(UseI)) {
3521 LLVM_DEBUG(
dbgs() <<
"Use is in profitable chain: " << **UseI <<
'\n');
3525 LSRUse::KindType
Kind = LSRUse::Basic;
3526 MemAccessTy AccessTy;
3528 Kind = LSRUse::Address;
3532 const SCEV *S = IU.getExpr(U);
3543 if (
ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
3546 if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->
getCondition()))
3548 if (CI->isEquality()) {
3551 Value *
NV = CI->getOperand(1);
3552 if (NV ==
U.getOperandValToReplace()) {
3553 CI->setOperand(1, CI->getOperand(0));
3554 CI->setOperand(0, NV);
3555 NV = CI->getOperand(1);
3562 (!
NV->getType()->isPointerTy() ||
3569 Kind = LSRUse::ICmpZero;
3571 }
else if (
L->isLoopInvariant(NV) &&
3572 (!isa<Instruction>(NV) ||
3573 DT.
dominates(cast<Instruction>(NV),
L->getHeader())) &&
3574 !
NV->getType()->isPointerTy()) {
3585 Kind = LSRUse::ICmpZero;
3587 assert(!isa<SCEVCouldNotCompute>(S));
3592 for (
size_t i = 0, e = Factors.size(); i != e; ++i)
3593 if (Factors[i] != -1)
3594 Factors.insert(-(
uint64_t)Factors[i]);
3600 std::pair<size_t, Immediate>
P = getUse(S, Kind, AccessTy);
3601 size_t LUIdx =
P.first;
3603 LSRUse &LU =
Uses[LUIdx];
3606 LSRFixup &LF = LU.getNewFixup();
3607 LF.UserInst = UserInst;
3608 LF.OperandValToReplace =
U.getOperandValToReplace();
3609 LF.PostIncLoops = TmpPostIncLoops;
3611 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3614 if (!VisitedLSRUse.
count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3616 F.initialMatch(S, L, SE);
3617 BaselineCost.RateFormula(
F, Regs, VisitedRegs, LU,
3618 HardwareLoopProfitable);
3619 VisitedLSRUse.
insert(LUIdx);
3622 if (!LU.WidestFixupType ||
3625 LU.WidestFixupType = LF.OperandValToReplace->getType();
3628 if (LU.Formulae.empty()) {
3629 InsertInitialFormula(S, LU, LUIdx);
3630 CountRegisters(LU.Formulae.back(), LUIdx);
3639void LSRInstance::InsertInitialFormula(
const SCEV *S, LSRUse &LU,
3643 LU.RigidFormula =
true;
3646 F.initialMatch(S, L, SE);
3647 bool Inserted = InsertFormula(LU, LUIdx,
F);
3648 assert(Inserted &&
"Initial formula already exists!"); (void)Inserted;
3654LSRInstance::InsertSupplementalFormula(
const SCEV *S,
3655 LSRUse &LU,
size_t LUIdx) {
3657 F.BaseRegs.push_back(S);
3658 F.HasBaseReg =
true;
3659 bool Inserted = InsertFormula(LU, LUIdx,
F);
3660 assert(Inserted &&
"Supplemental formula already exists!"); (void)Inserted;
3664void LSRInstance::CountRegisters(
const Formula &
F,
size_t LUIdx) {
3666 RegUses.countRegister(
F.ScaledReg, LUIdx);
3667 for (
const SCEV *BaseReg :
F.BaseRegs)
3668 RegUses.countRegister(BaseReg, LUIdx);
3673bool LSRInstance::InsertFormula(LSRUse &LU,
unsigned LUIdx,
const Formula &
F) {
3676 "Formula is illegal");
3678 if (!LU.InsertFormula(
F, *L))
3681 CountRegisters(
F, LUIdx);
3691LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3700 while (!Worklist.
empty()) {
3704 if (!Visited.
insert(S).second)
3711 else if (
const SCEVUDivExpr *
D = dyn_cast<SCEVUDivExpr>(S)) {
3714 }
else if (
const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
3715 const Value *
V = US->getValue();
3716 if (
const Instruction *Inst = dyn_cast<Instruction>(V)) {
3718 if (
L->contains(Inst))
continue;
3719 }
else if (isa<Constant>(V))
3722 for (
const Use &U :
V->uses()) {
3723 const Instruction *UserInst = dyn_cast<Instruction>(
U.getUser());
3732 if (UserInst->
getParent()->getParent() !=
L->getHeader()->getParent())
3735 const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
3737 cast<PHINode>(UserInst)->getIncomingBlock(
3752 if (isa<PHINode>(UserInst)) {
3753 const auto *PhiNode = cast<PHINode>(UserInst);
3754 bool HasIncompatibleEHPTerminatedBlock =
false;
3756 for (
unsigned int I = 0;
I < PhiNode->getNumIncomingValues();
I++) {
3757 if (PhiNode->getIncomingValue(
I) == ExpectedValue) {
3758 if (PhiNode->getIncomingBlock(
I)->getTerminator()->isEHPad()) {
3759 HasIncompatibleEHPTerminatedBlock =
true;
3764 if (HasIncompatibleEHPTerminatedBlock) {
3770 if (isa<CatchSwitchInst>(UserInst->
getParent()->getTerminator()))
3777 if (!isa<SCEVUnknown>(UserS))
3786 if (
const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
3787 unsigned OtherIdx = !
U.getOperandNo();
3788 Value *OtherOp = ICI->getOperand(OtherIdx);
3798 std::pair<size_t, Immediate>
P =
3799 getUse(S, LSRUse::Basic, MemAccessTy());
3800 size_t LUIdx =
P.first;
3802 LSRUse &LU =
Uses[LUIdx];
3803 LSRFixup &LF = LU.getNewFixup();
3804 LF.UserInst =
const_cast<Instruction *
>(UserInst);
3805 LF.OperandValToReplace =
U;
3807 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3808 if (!LU.WidestFixupType ||
3811 LU.WidestFixupType = LF.OperandValToReplace->getType();
3812 InsertSupplementalFormula(US, LU, LUIdx);
3813 CountRegisters(LU.Formulae.back(),
Uses.size() - 1);
3829 unsigned Depth = 0) {
3836 for (
const SCEV *S :
Add->operands()) {
3843 const SCEV *Start, *Step;
3848 if (Start->isZero())
3854 if (Remainder && (cast<SCEVAddRecExpr>(S)->getLoop() == L ||
3855 !isa<SCEVAddRecExpr>(Remainder))) {
3857 Remainder =
nullptr;
3859 if (Remainder != Start) {
3863 cast<SCEVAddRecExpr>(S)->getLoop(),
3881 LSRUse &LU,
const SCEV *S,
const Loop *L,
3883 if (LU.Kind != LSRUse::Address ||
3884 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3899void LSRInstance::GenerateReassociationsImpl(LSRUse &LU,
unsigned LUIdx,
3900 const Formula &
Base,
3915 if (AddOps.
size() == 1)
3929 LU.AccessTy, *J,
Base.getNumRegs() > 1))
3934 InnerAddOps.append(std::next(J), std::as_const(AddOps).
end());
3938 if (InnerAddOps.size() == 1 &&
3940 LU.AccessTy, InnerAddOps[0],
Base.getNumRegs() > 1))
3948 if (
F.UnfoldedOffset.isNonZero() &&
F.UnfoldedOffset.isScalable())
3952 const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
3957 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3960 F.ScaledReg =
nullptr;
3963 F.BaseRegs.erase(
F.BaseRegs.begin() +
Idx);
3964 }
else if (IsScaledReg)
3965 F.ScaledReg = InnerSum;
3967 F.BaseRegs[
Idx] = InnerSum;
3975 Immediate::getFixed((
uint64_t)
F.UnfoldedOffset.getFixedValue() +
3978 F.BaseRegs.push_back(*J);
3983 if (InsertFormula(LU, LUIdx,
F))
3990 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
3996void LSRInstance::GenerateReassociations(LSRUse &LU,
unsigned LUIdx,
3998 assert(
Base.isCanonical(*L) &&
"Input must be in the canonical form");
4003 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4004 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth, i);
4006 if (
Base.Scale == 1)
4007 GenerateReassociationsImpl(LU, LUIdx,
Base,
Depth,
4013void LSRInstance::GenerateCombinations(LSRUse &LU,
unsigned LUIdx,
4016 if (
Base.BaseRegs.size() + (
Base.Scale == 1) +
4017 (
Base.UnfoldedOffset.isNonZero()) <=
4025 Formula NewBase =
Base;
4026 NewBase.BaseRegs.clear();
4027 Type *CombinedIntegerType =
nullptr;
4028 for (
const SCEV *BaseReg :
Base.BaseRegs) {
4031 if (!CombinedIntegerType)
4036 NewBase.BaseRegs.push_back(BaseReg);
4040 if (Ops.
size() == 0)
4045 auto GenerateFormula = [&](
const SCEV *Sum) {
4046 Formula
F = NewBase;
4054 F.BaseRegs.push_back(Sum);
4056 (void)InsertFormula(LU, LUIdx,
F);
4060 if (Ops.
size() > 1) {
4067 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4068 assert(CombinedIntegerType &&
"Missing a type for the unfolded offset");
4070 NewBase.UnfoldedOffset.getFixedValue(),
true));
4071 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4077void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU,
unsigned LUIdx,
4078 const Formula &
Base,
size_t Idx,
4082 if (
G->isZero() || !GV)
4086 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4091 F.BaseRegs[
Idx] =
G;
4092 (void)InsertFormula(LU, LUIdx,
F);
4096void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU,
unsigned LUIdx,
4099 if (
Base.BaseGV)
return;
4101 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4102 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, i);
4103 if (
Base.Scale == 1)
4104 GenerateSymbolicOffsetsImpl(LU, LUIdx,
Base, -1,
4109void LSRInstance::GenerateConstantOffsetsImpl(
4110 LSRUse &LU,
unsigned LUIdx,
const Formula &
Base,
4113 auto GenerateOffset = [&](
const SCEV *
G, Immediate
Offset) {
4115 if (!
Base.BaseOffset.isCompatibleImmediate(
Offset))
4117 F.BaseOffset =
Base.BaseOffset.subUnsigned(
Offset);
4119 if (
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F)) {
4121 const SCEV *NewOffset =
Offset.getSCEV(SE,
G->getType());
4127 F.ScaledReg =
nullptr;
4129 F.deleteBaseReg(
F.BaseRegs[
Idx]);
4131 }
else if (IsScaledReg)
4134 F.BaseRegs[
Idx] = NewG;
4136 (void)InsertFormula(LU, LUIdx,
F);
4151 const APInt *StepInt;
4156 for (Immediate
Offset : Worklist) {
4158 Offset = Immediate::getFixed(
Offset.getFixedValue() - Step);
4164 for (Immediate
Offset : Worklist)
4168 if (
G->isZero() ||
Imm.isZero() ||
4169 !
Base.BaseOffset.isCompatibleImmediate(Imm))
4172 F.BaseOffset =
F.BaseOffset.addUnsigned(Imm);
4173 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F))
4178 F.BaseRegs[
Idx] =
G;
4183 (void)InsertFormula(LU, LUIdx,
F);
4187void LSRInstance::GenerateConstantOffsets(LSRUse &LU,
unsigned LUIdx,
4193 if (LU.MaxOffset != LU.MinOffset)
4196 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i)
4197 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, i);
4198 if (
Base.Scale == 1)
4199 GenerateConstantOffsetsImpl(LU, LUIdx,
Base, Worklist, -1,
4205void LSRInstance::GenerateICmpZeroScales(LSRUse &LU,
unsigned LUIdx,
4207 if (LU.Kind != LSRUse::ICmpZero)
return;
4215 if (LU.MinOffset != LU.MaxOffset)
return;
4218 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4220 for (
const SCEV *BaseReg :
Base.BaseRegs)
4221 if (
BaseReg->getType()->isPointerTy())
4223 assert(!
Base.BaseGV &&
"ICmpZero use is not legal!");
4226 for (int64_t Factor : Factors) {
4231 if (
Base.BaseOffset.isMin() && Factor == -1)
4234 if (
Base.BaseOffset.isNonZero() &&
Base.BaseOffset.isScalable())
4236 Immediate NewBaseOffset =
Base.BaseOffset.mulUnsigned(Factor);
4237 assert(Factor != 0 &&
"Zero factor not expected!");
4238 if (NewBaseOffset.getFixedValue() / Factor !=
4239 Base.BaseOffset.getFixedValue())
4247 Immediate
Offset = LU.MinOffset;
4248 if (
Offset.isMin() && Factor == -1)
4251 if (
Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4259 F.BaseOffset = NewBaseOffset;
4266 F.BaseOffset =
F.BaseOffset.addUnsigned(
Offset).subUnsigned(LU.MinOffset);
4271 for (
size_t i = 0, e =
F.BaseRegs.size(); i != e; ++i) {
4285 if (
F.UnfoldedOffset.isNonZero()) {
4286 if (
F.UnfoldedOffset.isMin() && Factor == -1)
4288 F.UnfoldedOffset =
F.UnfoldedOffset.mulUnsigned(Factor);
4289 if (
F.UnfoldedOffset.getFixedValue() / Factor !=
4290 Base.UnfoldedOffset.getFixedValue())
4294 IntTy,
F.UnfoldedOffset.getFixedValue()))
4299 (void)InsertFormula(LU, LUIdx,
F);
4306void LSRInstance::GenerateScales(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4313 if (
Base.Scale != 0 && !
Base.unscale())
4316 assert(
Base.Scale == 0 &&
"unscale did not did its job!");
4319 for (int64_t Factor : Factors) {
4320 Base.Scale = Factor;
4321 Base.HasBaseReg =
Base.BaseRegs.size() > 1;
4323 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4327 if (LU.Kind == LSRUse::Basic &&
4328 isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4329 LU.AccessTy,
Base) &&
4330 LU.AllFixupsOutsideLoop)
4331 LU.Kind = LSRUse::Special;
4337 if (LU.Kind == LSRUse::ICmpZero && !
Base.HasBaseReg &&
4338 Base.BaseOffset.isZero() && !
Base.BaseGV)
4341 for (
size_t i = 0, e =
Base.BaseRegs.size(); i != e; ++i) {
4343 if (AR && (AR->
getLoop() == L || LU.AllFixupsOutsideLoop)) {
4350 if (!Quotient->isZero()) {
4353 F.ScaledReg = Quotient;
4354 F.deleteBaseReg(
F.BaseRegs[i]);
4358 if (
F.Scale == 1 && (
F.BaseRegs.empty() ||
4359 (AR->
getLoop() != L && LU.AllFixupsOutsideLoop)))
4363 if (
F.Scale == 1 && LU.AllFixupsOutsideLoop)
4365 (void)InsertFormula(LU, LUIdx,
F);
4381 const SCEV *Result =
nullptr;
4382 for (
auto &L :
Loops) {
4386 if (!New || (Result && New != Result))
4391 assert(Result &&
"failed to create expression");
4396void LSRInstance::GenerateTruncates(LSRUse &LU,
unsigned LUIdx, Formula
Base) {
4398 if (
Base.BaseGV)
return;
4408 if (
Base.ScaledReg &&
Base.ScaledReg->getType()->isPointerTy())
4411 [](
const SCEV *S) { return S->getType()->isPointerTy(); }))
4415 for (
auto &LF : LU.Fixups)
4416 Loops.push_back(LF.PostIncLoops);
4418 for (
Type *SrcTy : Types) {
4427 const SCEV *NewScaledReg =
4429 if (!NewScaledReg || NewScaledReg->
isZero())
4431 F.ScaledReg = NewScaledReg;
4433 bool HasZeroBaseReg =
false;
4434 for (
const SCEV *&BaseReg :
F.BaseRegs) {
4435 const SCEV *NewBaseReg =
4437 if (!NewBaseReg || NewBaseReg->
isZero()) {
4438 HasZeroBaseReg =
true;
4448 if (!
F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4452 (void)InsertFormula(LU, LUIdx,
F);
4465 const SCEV *OrigReg;
4468 : LUIdx(LI),
Imm(
I), OrigReg(
R) {}
4476#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4478 OS <<
"in formulae referencing " << *OrigReg <<
" in use " << LUIdx
4479 <<
" , add offset " <<
Imm;
4489void LSRInstance::GenerateCrossUseConstantOffsets() {
4491 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4496 for (
const SCEV *
Use : RegUses) {
4499 auto Pair =
Map.try_emplace(Reg);
4502 Pair.first->second.insert(std::make_pair(Imm,
Use));
4503 UsedByIndicesMap[
Reg] |= RegUses.getUsedByIndices(
Use);
4512 for (
const SCEV *Reg : Sequence) {
4513 const ImmMapTy &Imms =
Map.find(Reg)->second;
4516 if (Imms.size() == 1)
4519 LLVM_DEBUG(
dbgs() <<
"Generating cross-use offsets for " << *Reg <<
':';
4520 for (
const auto &Entry
4522 <<
' ' <<
Entry.first;
4526 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4528 const SCEV *OrigReg = J->second;
4530 Immediate JImm = J->first;
4531 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4533 if (!isa<SCEVConstant>(OrigReg) &&
4534 UsedByIndicesMap[Reg].
count() == 1) {
4542 Immediate
First = Imms.begin()->first;
4543 Immediate
Last = std::prev(Imms.end())->first;
4544 if (!
First.isCompatibleImmediate(
Last)) {
4551 bool Scalable =
First.isScalable() ||
Last.isScalable();
4552 int64_t FI =
First.getKnownMinValue();
4553 int64_t LI =
Last.getKnownMinValue();
4556 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4559 Avg = Avg + ((FI ^ LI) & ((
uint64_t)Avg >> 63));
4560 ImmMapTy::const_iterator OtherImms[] = {
4561 Imms.begin(), std::prev(Imms.end()),
4562 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4563 for (
const auto &M : OtherImms) {
4564 if (M == J || M == JE)
continue;
4565 if (!JImm.isCompatibleImmediate(
M->first))
4569 Immediate
Imm = JImm.subUnsigned(
M->first);
4570 for (
unsigned LUIdx : UsedByIndices.
set_bits())
4572 if (UniqueItems.
insert(std::make_pair(LUIdx, Imm)).second)
4580 UsedByIndicesMap.
clear();
4581 UniqueItems.
clear();
4584 for (
const WorkItem &WI : WorkItems) {
4585 size_t LUIdx = WI.LUIdx;
4586 LSRUse &LU =
Uses[LUIdx];
4587 Immediate
Imm = WI.Imm;
4588 const SCEV *OrigReg = WI.OrigReg;
4591 const SCEV *NegImmS =
Imm.getNegativeSCEV(SE, IntTy);
4595 for (
size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4596 Formula
F = LU.Formulae[
L];
4603 if (
F.ScaledReg == OrigReg) {
4604 if (!
F.BaseOffset.isCompatibleImmediate(Imm))
4606 Immediate
Offset =
F.BaseOffset.addUnsigned(
Imm.mulUnsigned(
F.Scale));
4608 const SCEV *S =
Offset.getNegativeSCEV(SE, IntTy);
4609 if (
F.referencesReg(S))
4612 NewF.BaseOffset =
Offset;
4613 if (!
isLegalUse(
TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4616 NewF.ScaledReg = SE.
getAddExpr(NegImmS, NewF.ScaledReg);
4621 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
4625 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4627 if (
C->getValue()->isNegative() !=
4628 (NewF.BaseOffset.isLessThanZero()) &&
4630 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4635 NewF.canonicalize(*this->L);
4636 (void)InsertFormula(LU, LUIdx, NewF);
4639 for (
size_t N = 0, NE =
F.BaseRegs.size();
N !=
NE; ++
N) {
4641 if (BaseReg != OrigReg)
4644 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4645 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4646 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4648 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4650 LU.Kind, LU.AccessTy, NewF)) {
4654 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4658 NewF.UnfoldedOffset = NewUnfoldedOffset;
4660 NewF.BaseRegs[
N] = SE.
getAddExpr(NegImmS, BaseReg);
4665 for (
const SCEV *NewReg : NewF.BaseRegs)
4666 if (
const SCEVConstant *
C = dyn_cast<SCEVConstant>(NewReg)) {
4667 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4669 if ((
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4671 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4672 (
C->getAPInt() + NewF.BaseOffset.getFixedValue())
4674 (
unsigned)llvm::countr_zero<uint64_t>(
4675 NewF.BaseOffset.getFixedValue()))
4680 NewF.canonicalize(*this->L);
4681 (void)InsertFormula(LU, LUIdx, NewF);
4692LSRInstance::GenerateAllReuseFormulae() {
4695 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4696 LSRUse &LU =
Uses[LUIdx];
4697 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4698 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4699 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4700 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4702 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4703 LSRUse &LU =
Uses[LUIdx];
4704 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4705 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4706 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4707 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4708 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4709 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4710 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4711 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4713 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4714 LSRUse &LU =
Uses[LUIdx];
4715 for (
size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4716 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4719 GenerateCrossUseConstantOffsets();
4722 "After generating reuse formulae:\n";
4723 print_uses(
dbgs()));
4728void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4733 bool ChangedFormulae =
false;
4740 BestFormulaeTy BestFormulae;
4742 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4743 LSRUse &LU =
Uses[LUIdx];
4748 for (
size_t FIdx = 0, NumForms = LU.Formulae.size();
4749 FIdx != NumForms; ++FIdx) {
4750 Formula &
F = LU.Formulae[FIdx];
4761 CostF.RateFormula(
F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4763 if (CostF.isLoser()) {
4775 for (
const SCEV *Reg :
F.BaseRegs) {
4776 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4780 RegUses.isRegUsedByUsesOtherThan(
F.ScaledReg, LUIdx))
4781 Key.push_back(
F.ScaledReg);
4786 std::pair<BestFormulaeTy::const_iterator, bool>
P =
4787 BestFormulae.insert(std::make_pair(Key, FIdx));
4791 Formula &Best = LU.Formulae[
P.first->second];
4793 Cost CostBest(L, SE,
TTI, AMK);
4795 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4796 HardwareLoopProfitable);
4797 if (CostF.isLess(CostBest))
4801 " in favor of formula ";
4802 Best.print(
dbgs());
dbgs() <<
'\n');
4805 ChangedFormulae =
true;
4807 LU.DeleteFormula(
F);
4815 LU.RecomputeRegs(LUIdx, RegUses);
4818 BestFormulae.clear();
4823 "After filtering out undesirable candidates:\n";
4831size_t LSRInstance::EstimateSearchSpaceComplexity()
const {
4833 for (
const LSRUse &LU :
Uses) {
4834 size_t FSize = LU.Formulae.size();
4849void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4853 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by eliminating formulae "
4854 "which use a superset of registers used by other "
4857 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4858 LSRUse &LU =
Uses[LUIdx];
4860 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4861 Formula &
F = LU.Formulae[i];
4862 if (
F.BaseOffset.isNonZero() &&
F.BaseOffset.isScalable())
4868 I =
F.BaseRegs.begin(), E =
F.BaseRegs.end();
I != E; ++
I) {
4874 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4875 (
uint64_t)
C->getValue()->getSExtValue());
4876 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4877 (
I -
F.BaseRegs.begin()));
4878 if (LU.HasFormulaWithSameRegs(NewF)) {
4881 LU.DeleteFormula(
F);
4887 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*
I)) {
4888 if (
GlobalValue *GV = dyn_cast<GlobalValue>(
U->getValue()))
4892 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4893 (
I -
F.BaseRegs.begin()));
4894 if (LU.HasFormulaWithSameRegs(NewF)) {
4897 LU.DeleteFormula(
F);
4908 LU.RecomputeRegs(LUIdx, RegUses);
4917void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4922 dbgs() <<
"The search space is too complex.\n"
4923 "Narrowing the search space by assuming that uses separated "
4924 "by a constant offset will use the same registers.\n");
4928 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
4929 LSRUse &LU =
Uses[LUIdx];
4930 for (
const Formula &
F : LU.Formulae) {
4931 if (
F.BaseOffset.isZero() || (
F.Scale != 0 &&
F.Scale != 1))
4934 LSRUse *LUThatHas = FindUseWithSimilarFormula(
F, LU);
4938 if (!reconcileNewOffset(*LUThatHas,
F.BaseOffset,
false,
4939 LU.Kind, LU.AccessTy))
4944 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4947 for (LSRFixup &
Fixup : LU.Fixups) {
4948 Fixup.Offset +=
F.BaseOffset;
4949 LUThatHas->pushFixup(
Fixup);
4955 for (
size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4956 Formula &
F = LUThatHas->Formulae[i];
4957 if (!
isLegalUse(
TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4958 LUThatHas->Kind, LUThatHas->AccessTy,
F)) {
4960 LUThatHas->DeleteFormula(
F);
4968 LUThatHas->RecomputeRegs(LUThatHas - &
Uses.front(), RegUses);
4971 DeleteUse(LU, LUIdx);
4984void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4988 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by re-filtering out "
4989 "undesirable dedicated registers.\n");
4991 FilterOutUndesirableDedicatedRegisters();
5006void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5011 dbgs() <<
"The search space is too complex.\n"
5012 "Narrowing the search space by choosing the best Formula "
5013 "from the Formulae with the same Scale and ScaledReg.\n");
5018 BestFormulaeTy BestFormulae;
5020 bool ChangedFormulae =
false;
5025 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5026 LSRUse &LU =
Uses[LUIdx];
5031 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5036 size_t FARegNum = 0;
5037 for (
const SCEV *Reg : FA.BaseRegs) {
5038 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5039 FARegNum += (NumUses - UsedByIndices.
count() + 1);
5041 size_t FBRegNum = 0;
5042 for (
const SCEV *Reg : FB.BaseRegs) {
5043 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5044 FBRegNum += (NumUses - UsedByIndices.
count() + 1);
5046 if (FARegNum != FBRegNum)
5047 return FARegNum < FBRegNum;
5054 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5056 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5057 return CostFA.isLess(CostFB);
5061 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5063 Formula &
F = LU.Formulae[FIdx];
5066 auto P = BestFormulae.insert({{
F.ScaledReg,
F.Scale}, FIdx});
5070 Formula &Best = LU.Formulae[
P.first->second];
5071 if (IsBetterThan(
F, Best))
5075 " in favor of formula ";
5076 Best.print(
dbgs());
dbgs() <<
'\n');
5078 ChangedFormulae =
true;
5080 LU.DeleteFormula(
F);
5086 LU.RecomputeRegs(LUIdx, RegUses);
5089 BestFormulae.clear();
5094 "After filtering out undesirable candidates:\n";
5101void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5108 "Narrowing the search space by choosing the lowest "
5109 "register Formula for PostInc Uses.\n");
5111 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5112 LSRUse &LU =
Uses[LUIdx];
5114 if (LU.Kind != LSRUse::Address)
5120 size_t MinRegs = std::numeric_limits<size_t>::max();
5121 for (
const Formula &
F : LU.Formulae)
5122 MinRegs = std::min(
F.getNumRegs(), MinRegs);
5125 for (
size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5127 Formula &
F = LU.Formulae[FIdx];
5128 if (
F.getNumRegs() > MinRegs) {
5131 LU.DeleteFormula(
F);
5138 LU.RecomputeRegs(LUIdx, RegUses);
5189void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5202 DenseMap <const SCEV *, float> RegNumMap;
5203 for (
const SCEV *Reg : RegUses) {
5204 if (UniqRegs.
count(Reg))
5207 for (
const LSRUse &LU :
Uses) {
5208 if (!LU.Regs.count(Reg))
5210 float P = LU.getNotSelectedProbability(Reg);
5216 RegNumMap.
insert(std::make_pair(Reg, PNotSel));
5220 dbgs() <<
"Narrowing the search space by deleting costly formulas\n");
5223 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5224 LSRUse &LU =
Uses[LUIdx];
5226 if (LU.Formulae.size() < 2)
5231 float FMinRegNum = LU.Formulae[0].getNumRegs();
5232 float FMinARegNum = LU.Formulae[0].getNumRegs();
5234 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5235 Formula &
F = LU.Formulae[i];
5238 for (
const SCEV *BaseReg :
F.BaseRegs) {
5239 if (UniqRegs.
count(BaseReg))
5241 FRegNum += RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5242 if (isa<SCEVAddRecExpr>(BaseReg))
5244 RegNumMap[
BaseReg] / LU.getNotSelectedProbability(BaseReg);
5246 if (
const SCEV *ScaledReg =
F.ScaledReg) {
5247 if (!UniqRegs.
count(ScaledReg)) {
5249 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5250 if (isa<SCEVAddRecExpr>(ScaledReg))
5252 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5255 if (FMinRegNum > FRegNum ||
5256 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5257 FMinRegNum = FRegNum;
5258 FMinARegNum = FARegNum;
5263 dbgs() <<
" with min reg num " << FMinRegNum <<
'\n');
5265 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5266 while (LU.Formulae.size() != 1) {
5269 LU.Formulae.pop_back();
5271 LU.RecomputeRegs(LUIdx, RegUses);
5272 assert(LU.Formulae.size() == 1 &&
"Should be exactly 1 min regs formula");
5273 Formula &
F = LU.Formulae[0];
5289 MemAccessTy AccessType) {
5290 if (Best->
getType() != Reg->getType() ||
5291 (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
5292 cast<SCEVAddRecExpr>(Best)->getLoop() !=
5293 cast<SCEVAddRecExpr>(Reg)->getLoop()))
5300 AccessType.MemTy,
nullptr,
5301 Diff->getSExtValue(),
5302 true, 0, AccessType.AddrSpace) &&
5304 AccessType.MemTy,
nullptr,
5305 -Diff->getSExtValue(),
5306 true, 0, AccessType.AddrSpace);
5312void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5323 const SCEV *Best =
nullptr;
5324 unsigned BestNum = 0;
5325 for (
const SCEV *Reg : RegUses) {
5326 if (Taken.
count(Reg))
5330 BestNum = RegUses.getUsedByIndices(Reg).count();
5332 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5333 if (Count > BestNum) {
5341 if (Count == BestNum) {
5342 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5343 if (LUIdx >= 0 &&
Uses[LUIdx].Kind == LSRUse::Address &&
5345 Uses[LUIdx].AccessTy)) {
5352 assert(Best &&
"Failed to find best LSRUse candidate");
5354 LLVM_DEBUG(
dbgs() <<
"Narrowing the search space by assuming " << *Best
5355 <<
" will yield profitable reuse.\n");
5360 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx) {
5361 LSRUse &LU =
Uses[LUIdx];
5362 if (!LU.Regs.count(Best))
continue;
5365 for (
size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5366 Formula &
F = LU.Formulae[i];
5367 if (!
F.referencesReg(Best)) {
5369 LU.DeleteFormula(
F);
5373 assert(e != 0 &&
"Use has no formulae left! Is Regs inconsistent?");
5379 LU.RecomputeRegs(LUIdx, RegUses);
5390void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5391 NarrowSearchSpaceByDetectingSupersets();
5392 NarrowSearchSpaceByCollapsingUnrolledCode();
5393 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5395 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5396 NarrowSearchSpaceByFilterPostInc();
5398 NarrowSearchSpaceByDeletingCostlyFormulas();
5400 NarrowSearchSpaceByPickingWinnerRegs();
5407 const Cost &CurCost,
5420 const LSRUse &LU =
Uses[Workspace.
size()];
5427 for (
const SCEV *S : CurRegs)
5428 if (LU.Regs.count(S))
5432 Cost NewCost(L, SE,
TTI, AMK);
5433 for (
const Formula &
F : LU.Formulae) {
5441 int NumReqRegsToFind = std::min(
F.getNumRegs(), ReqRegs.
size());
5442 for (
const SCEV *Reg : ReqRegs) {
5443 if ((
F.ScaledReg &&
F.ScaledReg == Reg) ||
5446 if (NumReqRegsToFind == 0)
5450 if (NumReqRegsToFind != 0) {
5461 NewCost.RateFormula(
F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5462 if (NewCost.isLess(SolutionCost)) {
5464 if (Workspace.
size() !=
Uses.size()) {
5465 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5466 NewRegs, VisitedRegs);
5467 if (
F.getNumRegs() == 1 && Workspace.
size() == 1)
5468 VisitedRegs.
insert(
F.ScaledReg ?
F.ScaledReg :
F.BaseRegs[0]);
5471 dbgs() <<
".\nRegs:\n";
5472 for (
const SCEV *S : NewRegs)
dbgs()
5473 <<
"- " << *S <<
"\n";
5476 SolutionCost = NewCost;
5477 Solution = Workspace;
5488 Cost SolutionCost(L, SE,
TTI, AMK);
5489 SolutionCost.Lose();
5490 Cost CurCost(L, SE,
TTI, AMK);
5496 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5497 CurRegs, VisitedRegs);
5498 if (Solution.
empty()) {
5505 "The chosen solution requires ";
5506 SolutionCost.print(
dbgs());
dbgs() <<
":\n";
5507 for (
size_t i = 0, e =
Uses.size(); i != e; ++i) {
5512 Solution[i]->print(
dbgs());
5518 const bool EnableDropUnprofitableSolution = [&] {
5530 if (BaselineCost.isLess(SolutionCost)) {
5531 if (!EnableDropUnprofitableSolution)
5533 dbgs() <<
"Baseline is more profitable than chosen solution, "
5534 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5537 "solution, dropping LSR solution.\n";);
5552 bool AllDominate =
true;
5556 if (isa<CatchSwitchInst>(Tentative))
5560 if (Inst == Tentative || !DT.
dominates(Inst, Tentative)) {
5561 AllDominate =
false;
5566 if (Tentative->
getParent() == Inst->getParent() &&
5567 (!BetterPos || !DT.
dominates(Inst, BetterPos)))
5577 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5578 unsigned IPLoopDepth = IPLoop ? IPLoop->
getLoopDepth() : 0;
5582 if (!Rung)
return IP;
5583 Rung = Rung->getIDom();
5584 if (!Rung)
return IP;
5585 IDom = Rung->getBlock();
5588 const Loop *IDomLoop = LI.getLoopFor(IDom);
5589 unsigned IDomDepth = IDomLoop ? IDomLoop->
getLoopDepth() : 0;
5590 if (IDomDepth <= IPLoopDepth &&
5591 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5609 if (
Instruction *
I = dyn_cast<Instruction>(LF.OperandValToReplace))
5611 if (LU.Kind == LSRUse::ICmpZero)
5613 dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
5615 if (LF.PostIncLoops.count(L)) {
5616 if (LF.isUseFullyOutsideLoop(L))
5617 Inputs.
push_back(
L->getLoopLatch()->getTerminator());
5623 for (
const Loop *PIL : LF.PostIncLoops) {
5624 if (PIL == L)
continue;
5629 if (!ExitingBlocks.
empty()) {
5631 for (
unsigned i = 1, e = ExitingBlocks.
size(); i != e; ++i)
5637 assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() &&
5638 "Insertion point must be a normal instruction");
5645 while (isa<PHINode>(IP)) ++IP;
5648 while (IP->isEHPad()) ++IP;
5653 while (
Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5661Value *LSRInstance::Expand(
const LSRUse &LU,
const LSRFixup &LF,
5664 if (LU.RigidFormula)
5665 return LF.OperandValToReplace;
5669 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5674 Rewriter.setPostInc(LF.PostIncLoops);
5677 Type *OpTy = LF.OperandValToReplace->getType();
5679 Type *Ty =
F.getType();
5693 for (
const SCEV *Reg :
F.BaseRegs) {
5694 assert(!
Reg->isZero() &&
"Zero allocated in a base register!");
5702 Value *ICmpScaledV =
nullptr;
5704 const SCEV *ScaledS =
F.ScaledReg;
5710 if (LU.Kind == LSRUse::ICmpZero) {
5720 "The only scale supported by ICmpZero uses is -1!");
5721 ICmpScaledV =
Rewriter.expandCodeFor(ScaledS,
nullptr);
5729 if (!Ops.
empty() && LU.Kind == LSRUse::Address &&
5765 assert(
F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5766 "Expanding mismatched offsets\n");
5768 Immediate
Offset =
F.BaseOffset.addUnsigned(LF.Offset);
5769 if (
Offset.isNonZero()) {
5770 if (LU.Kind == LSRUse::ICmpZero) {
5778 ICmpScaledV = ConstantInt::get(IntTy,
Offset.getFixedValue());
5788 Immediate UnfoldedOffset =
F.UnfoldedOffset;
5789 if (UnfoldedOffset.isNonZero()) {
5791 Ops.
push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5806 if (LU.Kind == LSRUse::ICmpZero) {
5807 ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5808 if (
auto *OperandIsInstr = dyn_cast<Instruction>(CI->
getOperand(1)))
5810 assert(!
F.BaseGV &&
"ICmp does not support folding a global value and "
5811 "a scale at the same time!");
5812 if (
F.Scale == -1) {
5813 if (ICmpScaledV->
getType() != OpTy) {
5823 assert((
F.Scale == 0 ||
F.Scale == 1) &&
5824 "ICmp does not support folding a global value and "
5825 "a scale at the same time!");
5828 if (
C->getType() != OpTy) {
5832 assert(
C &&
"Cast of ConstantInt should have folded");
5845void LSRInstance::RewriteForPHI(
PHINode *PN,
const LSRUse &LU,
5846 const LSRFixup &LF,
const Formula &
F,
5852 bool needUpdateFixups =
false;
5863 Loop *PNLoop = LI.getLoopFor(Parent);
5864 if (!PNLoop || Parent != PNLoop->
getHeader()) {
5871 .setMergeIdenticalEdges()
5872 .setKeepOneInputPHIs());
5886 if (
L->contains(BB) && !
L->contains(PN))
5894 needUpdateFixups =
true;
5899 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> Pair =
5908 Type *OpTy = LF.OperandValToReplace->getType();
5912 LF.OperandValToReplace->getType(),
"tmp",
5918 if (
auto *
I = dyn_cast<Instruction>(FullV))
5919 if (
L->contains(
I) && !
L->contains(BB))
5920 InsertedNonLCSSAInsts.insert(
I);
5923 Pair.first->second = FullV;
5930 if (needUpdateFixups) {
5931 for (LSRUse &LU :
Uses)
5932 for (LSRFixup &
Fixup : LU.Fixups)
5936 if (
Fixup.UserInst == PN) {
5939 bool foundInOriginalPHI =
false;
5941 if (val ==
Fixup.OperandValToReplace) {
5942 foundInOriginalPHI =
true;
5947 if (foundInOriginalPHI)
5958 if (val ==
Fixup.OperandValToReplace)
5959 Fixup.UserInst = NewPN;
5969void LSRInstance::Rewrite(
const LSRUse &LU,
const LSRFixup &LF,
5974 if (
PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5975 RewriteForPHI(PN, LU, LF,
F, DeadInsts);
5977 Value *FullV = Expand(LU, LF,
F, LF.UserInst->getIterator(), DeadInsts);
5980 Type *OpTy = LF.OperandValToReplace->getType();
5981 if (FullV->
getType() != OpTy) {
5984 FullV, OpTy,
"tmp", LF.UserInst->getIterator());
5993 if (LU.Kind == LSRUse::ICmpZero)
5996 LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
5999 if (
auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
6009 if (LU.Kind != LSRUse::Address)
6016 if (IVIncInsertPos->
getParent() == LHeader)
6019 if (!
Fixup.OperandValToReplace ||
6021 Instruction *UI = cast<Instruction>(U);
6022 return UI->getParent() != LHeader;
6027 Type *Ty =
I->getType();
6034void LSRInstance::ImplementSolution(
6041 for (
const IVChain &Chain : IVChainVec) {
6042 if (
PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
6047 for (
size_t LUIdx = 0, NumUses =
Uses.size(); LUIdx != NumUses; ++LUIdx)
6048 for (
const LSRFixup &
Fixup :
Uses[LUIdx].Fixups) {
6051 ?
L->getHeader()->getTerminator()
6053 Rewriter.setIVIncInsertPos(L, InsertPos);
6054 Rewrite(
Uses[LUIdx],
Fixup, *Solution[LUIdx], DeadInsts);
6058 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6061 for (
const IVChain &Chain : IVChainVec) {
6062 GenerateIVChain(Chain, DeadInsts);
6068 ScalarEvolutionIVs.push_back(
IV);
6084 for (
PHINode &PN :
L->getHeader()->phis()) {
6086 Value *Start =
nullptr, *Step =
nullptr;
6091 case Instruction::Sub:
6096 case Instruction::Add:
6102 if (!isa<Constant>(Step))
6107 if (BO->
getParent() == IVIncInsertPos->getParent())
6113 [&](
Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6115 BO->
moveBefore(IVIncInsertPos->getIterator());
6126 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI),
TTI(
TTI),
L(
L),
6129 :
TTI.getPreferredAddressingMode(
L, &SE)),
6131 BaselineCost(
L, SE,
TTI, AMK) {
6133 if (!
L->isLoopSimplifyForm())
6137 if (IU.
empty())
return;
6141 unsigned NumUsers = 0;
6145 LLVM_DEBUG(
dbgs() <<
"LSR skipping loop, too many IV Users in " << U
6152 if (
auto *PN = dyn_cast<PHINode>(
U.getUser())) {
6153 auto FirstNonPHI = PN->
getParent()->getFirstNonPHIIt();
6154 if (isa<FuncletPadInst>(FirstNonPHI) ||
6155 isa<CatchSwitchInst>(FirstNonPHI))
6157 if (isa<CatchSwitchInst>(PredBB->getFirstNonPHIIt()))
6163 L->getHeader()->printAsOperand(
dbgs(),
false);
6169 HardwareLoopProfitable =
6174#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6182 OptimizeLoopTermCond();
6185 if (IU.empty())
return;
6188 if (!
L->isInnermost()) {
6201 CollectInterestingTypesAndFactors();
6202 CollectFixupsAndInitialFormulae();
6203 CollectLoopInvariantFixupsAndFormulae();
6209 print_uses(
dbgs()));
6211 BaselineCost.print(
dbgs());
dbgs() <<
"\n");
6215 GenerateAllReuseFormulae();
6217 FilterOutUndesirableDedicatedRegisters();
6218 NarrowSearchSpaceUsingHeuristics();
6228 if (Solution.
empty())
6233 for (
const LSRUse &LU :
Uses) {
6234 for (
const Formula &
F : LU.Formulae)
6236 F) &&
"Illegal formula generated!");
6241 ImplementSolution(Solution);
6244#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6245void LSRInstance::print_factors_and_types(
raw_ostream &
OS)
const {
6246 if (Factors.empty() &&
Types.empty())
return;
6248 OS <<
"LSR has identified the following interesting factors and types: ";
6251 for (int64_t Factor : Factors) {
6254 OS <<
'*' << Factor;
6257 for (
Type *Ty : Types) {
6260 OS <<
'(' << *Ty <<
')';
6266 OS <<
"LSR is examining the following fixup sites:\n";
6267 for (
const LSRUse &LU :
Uses)
6268 for (
const LSRFixup &LF : LU.Fixups) {
6276 OS <<
"LSR is examining the following uses:\n";
6277 for (
const LSRUse &LU :
Uses) {
6281 for (
const Formula &
F : LU.Formulae) {
6290 print_factors_and_types(
OS);
6302class LoopStrengthReduce :
public LoopPass {
6306 LoopStrengthReduce();
6315LoopStrengthReduce::LoopStrengthReduce() :
LoopPass(
ID) {
6319void LoopStrengthReduce::getAnalysisUsage(
AnalysisUsage &AU)
const {
6351 return {Begin,
End};
6354struct SCEVDbgValueBuilder {
6355 SCEVDbgValueBuilder() =
default;
6356 SCEVDbgValueBuilder(
const SCEVDbgValueBuilder &
Base) { clone(
Base); }
6358 void clone(
const SCEVDbgValueBuilder &
Base) {
6359 LocationOps =
Base.LocationOps;
6364 LocationOps.clear();
6381 unsigned ArgIndex = 0;
6382 if (It != LocationOps.
end()) {
6383 ArgIndex = std::distance(LocationOps.
begin(), It);
6385 ArgIndex = LocationOps.
size();
6397 if (
C->getAPInt().getSignificantBits() > 64)
6399 Expr.
push_back(llvm::dwarf::DW_OP_consts);
6400 Expr.
push_back(
C->getAPInt().getSExtValue());
6407 return ToDwarfOpIter(Expr);
6414 assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6415 "Expected arithmetic SCEV type");
6417 unsigned EmitOperator = 0;
6418 for (
const auto &
Op : CommExpr->
operands()) {
6421 if (EmitOperator >= 1)
6422 pushOperator(DwarfOp);
6433 bool Success = pushSCEV(Inner);
6435 IsSigned ? llvm::dwarf::DW_ATE_signed
6436 : llvm::dwarf::DW_ATE_unsigned};
6437 for (
const auto &
Op : CastOps)
6445 if (
const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
6446 Success &= pushConst(StartInt);
6448 }
else if (
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6451 pushLocation(
U->getValue());
6453 }
else if (
const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6454 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6456 }
else if (
const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6457 Success &= pushSCEV(UDiv->getLHS());
6458 Success &= pushSCEV(UDiv->getRHS());
6459 pushOperator(llvm::dwarf::DW_OP_div);
6461 }
else if (
const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6463 assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6464 isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6465 "Unexpected cast type in SCEV.");
6466 Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6468 }
else if (
const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6469 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6471 }
else if (isa<SCEVAddRecExpr>(S)) {
6486 if (
C->getAPInt().getSignificantBits() > 64)
6488 int64_t
I =
C->getAPInt().getSExtValue();
6490 case llvm::dwarf::DW_OP_plus:
6491 case llvm::dwarf::DW_OP_minus:
6493 case llvm::dwarf::DW_OP_mul:
6494 case llvm::dwarf::DW_OP_div:
6513 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6514 if (!pushSCEV(Stride))
6516 pushOperator(llvm::dwarf::DW_OP_mul);
6518 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6519 if (!pushSCEV(Start))
6521 pushOperator(llvm::dwarf::DW_OP_plus);
6527 void createOffsetExpr(int64_t
Offset,
Value *OffsetValue) {
6528 pushLocation(OffsetValue);
6531 dbgs() <<
"scev-salvage: Generated IV offset expression. Offset: "
6532 << std::to_string(
Offset) <<
"\n");
6538 bool createIterCountExpr(
const SCEV *S,
6539 const SCEVDbgValueBuilder &IterationCount,
6546 if (!isa<SCEVAddRecExpr>(S))
6549 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Location to salvage SCEV: " << *S
6552 const auto *Rec = cast<SCEVAddRecExpr>(S);
6553 if (!Rec->isAffine())
6561 clone(IterationCount);
6562 if (!SCEVToValueExpr(*Rec, SE))
6580 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6581 if (!pushSCEV(Start))
6583 pushOperator(llvm::dwarf::DW_OP_minus);
6585 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6586 if (!pushSCEV(Stride))
6588 pushOperator(llvm::dwarf::DW_OP_div);
6599 "Expected the locations vector to contain the IV");
6604 "Expected the location ops to contain the IV.");
6608 for (
const auto &
Op : LocationOps) {
6609 auto It =
find(DestLocations,
Op);
6610 if (It != DestLocations.
end()) {
6612 DestIndexMap.
push_back(std::distance(DestLocations.
begin(), It));
6620 for (
const auto &
Op : expr_ops()) {
6622 Op.appendToVector(DestExpr);
6629 uint64_t NewIndex = DestIndexMap[
Op.getArg(0)];
6637struct DVIRecoveryRec {
6639 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(
false) {}
6643 bool HadLocationArgList;
6649 for (
auto &RE : RecoveryExprs)
6651 RecoveryExprs.
clear();
6654 ~DVIRecoveryRec() { clear(); }
6662 auto expr_ops = ToDwarfOpIter(Expr);
6664 for (
auto Op : expr_ops)
6673template <
typename T>
6677 "contain any DW_OP_llvm_arg operands.");
6679 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6680 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6684template <
typename T>
6689 "Expected expression that references DIArglist locations using "
6690 "DW_OP_llvm_arg operands.");
6692 for (
Value *V : Locations)
6696 DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6709 if (NumLLVMArgs == 0) {
6716 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6728 if (!DVIRec.Expr->isComplex() && SalvageExpr->
isComplex()) {
6746 LLVM_DEBUG(
dbgs() <<
"scev-salvage: restore dbg.value to pre-LSR state\n"
6747 <<
"scev-salvage: post-LSR: " << *DbgVal <<
'\n');
6748 assert(DVIRec.Expr &&
"Expected an expression");
6753 if (!DVIRec.HadLocationArgList) {
6754 assert(DVIRec.LocationOps.size() == 1 &&
6755 "Unexpected number of location ops.");
6759 Value *CachedValue =
6764 for (
WeakVH VH : DVIRec.LocationOps) {
6772 LLVM_DEBUG(
dbgs() <<
"scev-salvage: pre-LSR: " << *DbgVal <<
'\n');
6777 const SCEV *SCEVInductionVar,
6778 SCEVDbgValueBuilder IterCountExpr) {
6780 if (!DVIRec.DbgRef->isKillLocation())
6792 LocationOpIndexMap.
assign(DVIRec.LocationOps.size(), -1);
6794 NewLocationOps.
push_back(LSRInductionVar);
6796 for (
unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6797 WeakVH VH = DVIRec.LocationOps[i];
6801 if (VH && !isa<UndefValue>(VH)) {
6803 LocationOpIndexMap[i] = NewLocationOps.
size() - 1;
6805 <<
" now at index " << LocationOpIndexMap[i] <<
"\n");
6813 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV for location at index: " << i
6814 <<
" refers to a location that is now undef or erased. "
6815 "Salvage abandoned.\n");
6819 LLVM_DEBUG(
dbgs() <<
"scev-salvage: salvaging location at index " << i
6820 <<
" with SCEV: " << *DVIRec.SCEVs[i] <<
"\n");
6822 DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6823 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6827 if (std::optional<APInt>
Offset =
6829 if (
Offset->getSignificantBits() <= 64)
6830 SalvageExpr->createOffsetExpr(
Offset->getSExtValue(), LSRInductionVar);
6833 }
else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6841 if (DVIRec.Expr->getNumElements() == 0) {
6842 assert(DVIRec.RecoveryExprs.size() == 1 &&
6843 "Expected only a single recovery expression for an empty "
6845 assert(DVIRec.RecoveryExprs[0] &&
6846 "Expected a SCEVDbgSalvageBuilder for location 0");
6847 SCEVDbgValueBuilder *
B = DVIRec.RecoveryExprs[0].get();
6848 B->appendToVectors(
NewExpr, NewLocationOps);
6850 for (
const auto &
Op : DVIRec.Expr->expr_ops()) {
6858 SCEVDbgValueBuilder *DbgBuilder =
6859 DVIRec.RecoveryExprs[LocationArgIndex].get();
6865 assert(LocationOpIndexMap[
Op.getArg(0)] != -1 &&
6866 "Expected a positive index for the location-op position.");
6867 NewExpr.push_back(LocationOpIndexMap[
Op.getArg(0)]);
6871 DbgBuilder->appendToVectors(
NewExpr, NewLocationOps);
6875 LLVM_DEBUG(
dbgs() <<
"scev-salvage: Updated DVI: " << *DVIRec.DbgRef <<
"\n");
6883 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6884 if (DVIToUpdate.empty())
6888 assert(SCEVInductionVar &&
6889 "Anticipated a SCEV for the post-LSR induction variable");
6892 dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
6893 if (!IVAddRec->isAffine())
6901 SCEVDbgValueBuilder IterCountExpr;
6902 IterCountExpr.pushLocation(LSRInductionVar);
6903 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6906 LLVM_DEBUG(
dbgs() <<
"scev-salvage: IV SCEV: " << *SCEVInductionVar
6909 for (
auto &DVIRec : DVIToUpdate) {
6910 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6921 SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
6922 for (
const auto &
B : L->getBlocks()) {
6923 for (
auto &
I : *
B) {
6925 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6930 if (DbgVal.isKillLocation())
6935 const auto &HasTranslatableLocationOps =
6937 for (
const auto LocOp : DbgValToTranslate.location_ops()) {
6951 if (!HasTranslatableLocationOps(DbgVal))
6954 std::unique_ptr<DVIRecoveryRec> NewRec =
6955 std::make_unique<DVIRecoveryRec>(&DbgVal);
6959 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6960 for (
const auto LocOp : DbgVal.location_ops()) {
6961 NewRec->SCEVs.push_back(SE.
getSCEV(LocOp));
6962 NewRec->LocationOps.push_back(LocOp);
6963 NewRec->HadLocationArgList = DbgVal.hasArgList();
6965 SalvageableDVISCEVs.push_back(std::move(NewRec));
6975 const LSRInstance &LSR) {
6977 auto IsSuitableIV = [&](
PHINode *
P) {
6988 for (
const WeakVH &
IV : LSR.getScalarEvolutionIVs()) {
6995 if (IsSuitableIV(
P))
6999 for (
PHINode &
P : L.getHeader()->phis()) {
7000 if (IsSuitableIV(&
P))
7017 bool Changed =
false;
7018 std::unique_ptr<MemorySSAUpdater> MSSAU;
7020 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
7023 const LSRInstance &Reducer =
7024 LSRInstance(L, IU, SE, DT, LI,
TTI, AC, TLI, MSSAU.get());
7025 Changed |= Reducer.getChanged();
7031 const DataLayout &
DL = L->getHeader()->getDataLayout();
7033#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7036 unsigned numFolded =
Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &
TTI);
7050 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7052 const DataLayout &
DL = L->getHeader()->getDataLayout();
7065 if (SalvageableDVIRecords.
empty())
7071 for (
const auto &L : LI) {
7075 LLVM_DEBUG(
dbgs() <<
"scev-salvage: SCEV salvaging not possible. An IV "
7076 "could not be identified.\n");
7080 for (
auto &Rec : SalvageableDVIRecords)
7082 SalvageableDVIRecords.
clear();
7090 auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7091 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7092 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7093 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7094 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7095 *
L->getHeader()->getParent());
7096 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7097 *
L->getHeader()->getParent());
7098 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7099 *
L->getHeader()->getParent());
7100 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7103 MSSA = &MSSAAnalysis->getMSSA();
7120char LoopStrengthReduce::ID = 0;
7123 "Loop Strength Reduction",
false,
false)
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode")))
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, Loop *L)
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static SymbolRef::Type getType(const Symbol *Sym)
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Conditional or Unconditional Branch instruction.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
This is the shared class of boolean and integer constants.
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
An iterator for expression operands.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Legacy analysis pass which computes MemorySSA.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This is the base class for unary cast operator classes.
This node is the base class for n'ary commutative operators.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This class represents a signed maximum selection.
This class represents a binary unsigned division operation.
This class represents an unsigned maximum selection.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents a cast from signed integer to floating point.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM_ABI unsigned getIntegerBitWidth() const
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
SCEVBinaryExpr_match< SCEVMulExpr, Op0_t, Op1_t > m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
LLVM_ABI const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
LLVM_ABI Pass * createLoopStrengthReducePass()
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Option class for critical edge splitting.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.