58#define DEBUG_TYPE "machinelicm"
62 cl::desc(
"MachineLICM should avoid speculation"),
67 cl::desc(
"MachineLICM should hoist even cheap instructions"),
83 cl::desc(
"Do not hoist instructions if target"
84 "block is N times hotter than the source."),
91 cl::desc(
"Disable hoisting instructions to"
95 "disable the feature"),
97 "enable the feature when using profile data"),
99 "enable the feature with/wo profile data")));
102 "Number of machine instructions hoisted out of loops");
104 "Number of instructions hoisted in low reg pressure situation");
106 "Number of high latency instructions hoisted");
108 "Number of hoisted machine instructions CSEed");
110 "Number of machine instructions hoisted out of loops post regalloc");
112 "Number of stores of const phys reg hoisted out of loops");
114 "Number of instructions not hoisted due to block frequency");
117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
119 class MachineLICMImpl {
120 const TargetInstrInfo *TII =
nullptr;
121 const TargetLoweringBase *TLI =
nullptr;
122 const TargetRegisterInfo *TRI =
nullptr;
123 const MachineFrameInfo *MFI =
nullptr;
124 MachineRegisterInfo *MRI =
nullptr;
125 TargetSchedModel SchedModel;
126 bool PreRegAlloc =
false;
127 bool HasProfileData =
false;
133 MachineBlockFrequencyInfo *MBFI =
nullptr;
134 MachineLoopInfo *MLI =
nullptr;
135 MachineDomTreeUpdater *MDTU =
nullptr;
138 bool Changed =
false;
139 bool FirstInLoop =
false;
143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
148 bool isExitBlock(MachineLoop *CurLoop,
const MachineBasicBlock *
MBB) {
149 auto [It,
Inserted] = ExitBlockMap.try_emplace(CurLoop);
151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
153 It->second = std::move(ExitBlocks);
159 SmallDenseSet<Register> RegSeen;
160 SmallVector<unsigned, 8> RegPressure;
164 SmallVector<unsigned, 8> RegLimit;
170 DenseMap<MachineBasicBlock *,
171 DenseMap<unsigned, std::vector<MachineInstr *>>>
183 unsigned SpeculationState = SpeculateUnknown;
186 MachineLICMImpl(
bool PreRegAlloc,
Pass *LegacyPass,
188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
189 assert((LegacyPass || MFAM) &&
"LegacyPass or MFAM must be provided");
190 assert(!(LegacyPass && MFAM) &&
191 "LegacyPass and MFAM cannot be provided at the same time");
194 bool run(MachineFunction &MF);
196 void releaseMemory() {
202 ExitBlockMap.clear();
207 struct CandidateInfo {
212 CandidateInfo(MachineInstr *mi,
Register def,
int fi)
213 : MI(mi), Def(def), FI(fi) {}
216 void HoistRegionPostRA(MachineLoop *CurLoop);
218 void HoistPostRA(MachineInstr *
MI,
Register Def, MachineLoop *CurLoop);
220 void ProcessMI(MachineInstr *
MI, BitVector &RUDefs, BitVector &RUClobbers,
221 SmallDenseSet<int> &StoredFIs,
222 SmallVectorImpl<CandidateInfo> &Candidates,
223 MachineLoop *CurLoop);
225 void AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop);
227 bool IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop);
229 bool IsLoopInvariantInst(MachineInstr &
I, MachineLoop *CurLoop);
231 bool HasLoopPHIUse(
const MachineInstr *
MI, MachineLoop *CurLoop);
233 bool HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
Register Reg,
234 MachineLoop *CurLoop)
const;
236 bool IsCheapInstruction(MachineInstr &
MI)
const;
238 bool CanCauseHighRegPressure(
const SmallDenseMap<unsigned, int> &
Cost,
241 void UpdateBackTraceRegPressure(
const MachineInstr *
MI);
243 bool IsProfitableToHoist(MachineInstr &
MI, MachineLoop *CurLoop);
245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
247 bool isTriviallyReMaterializable(
const MachineInstr &
MI)
const;
249 void EnterScope(MachineBasicBlock *
MBB);
251 void ExitScope(MachineBasicBlock *
MBB);
253 void ExitScopeIfDone(
255 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
256 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
260 void InitRegPressure(MachineBasicBlock *BB);
262 SmallDenseMap<unsigned, int> calcRegisterCost(
const MachineInstr *
MI,
264 bool ConsiderUnseenAsDef);
266 void UpdateRegPressure(
const MachineInstr *
MI,
267 bool ConsiderUnseenAsDef =
false);
269 MachineInstr *ExtractHoistableLoad(MachineInstr *
MI, MachineLoop *CurLoop);
271 MachineInstr *LookForDuplicate(
const MachineInstr *
MI,
272 std::vector<MachineInstr *> &PrevMIs);
275 EliminateCSE(MachineInstr *
MI,
276 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI);
278 bool MayCSE(MachineInstr *
MI);
280 unsigned Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
281 MachineLoop *CurLoop);
283 void InitCSEMap(MachineBasicBlock *BB);
285 void InitializeLoadsHoistableLoops();
287 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
288 MachineBasicBlock *TgtBlock);
289 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);
296 MachineLICMBase(
char &
ID,
bool PreRegAlloc)
297 : MachineFunctionPass(
ID), PreRegAlloc(PreRegAlloc) {}
299 bool runOnMachineFunction(MachineFunction &MF)
override;
301 void getAnalysisUsage(AnalysisUsage &AU)
const override {
304 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
312 class MachineLICM :
public MachineLICMBase {
315 MachineLICM() : MachineLICMBase(ID,
false) {
320 class EarlyMachineLICM :
public MachineLICMBase {
323 EarlyMachineLICM() : MachineLICMBase(ID,
true) {
331char EarlyMachineLICM::ID;
337 "Machine Loop Invariant Code Motion",
false,
false)
346 "Early Machine Loop Invariant Code Motion",
false,
false)
352 "Early Machine Loop Invariant Code Motion",
false,
false)
355 if (skipFunction(MF.getFunction()))
358 MachineLICMImpl Impl(PreRegAlloc,
this,
nullptr);
362#define GET_RESULT(RESULT, GETTER, INFIX) \
364 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
365 : &MFAM->getResult<RESULT##Analysis>(MF))
374 MachineDomTreeUpdater::UpdateStrategy::Lazy);
383 TII = ST.getInstrInfo();
384 TLI = ST.getTargetLowering();
385 TRI = ST.getRegisterInfo();
388 SchedModel.
init(&ST);
400 unsigned NumRPS =
TRI->getNumRegPressureSets();
401 RegPressure.resize(NumRPS);
404 for (
unsigned i = 0, e = NumRPS; i != e; ++i)
405 RegLimit[i] =
TRI->getRegPressureSetLimit(MF, i);
409 InitializeLoadsHoistableLoops();
412 while (!Worklist.
empty()) {
416 HoistRegionPostRA(CurLoop);
422 HoistOutOfLoop(
N, CurLoop);
438 if (
MI->memoperands_empty())
441 if (!
MemOp->isStore() || !
MemOp->getPseudoValue())
445 if (
Value->getFrameIndex() == FI)
486 const unsigned NumRegs =
TRI.getNumRegs();
487 const unsigned MaskWords = (NumRegs + 31) / 32;
488 for (
unsigned K = 0; K < MaskWords; ++K) {
490 for (
unsigned Bit = 0; Bit < 32; ++Bit) {
491 const unsigned PhysReg = (K * 32) + Bit;
492 if (PhysReg == NumRegs)
495 if (PhysReg && !((Word >> Bit) & 1)) {
497 RUsFromRegsNotInMask.
set(Unit);
502 RUs |= RUsFromRegsNotInMask;
507void MachineLICMImpl::ProcessMI(MachineInstr *
MI, BitVector &RUDefs,
508 BitVector &RUClobbers,
509 SmallDenseSet<int> &StoredFIs,
510 SmallVectorImpl<CandidateInfo> &Candidates,
511 MachineLoop *CurLoop) {
512 bool RuledOut =
false;
513 bool HasNonInvariantUse =
false;
515 for (
const MachineOperand &MO :
MI->operands()) {
518 int FI = MO.getIndex();
519 if (!StoredFIs.
count(FI) &&
523 HasNonInvariantUse =
true;
529 if (MO.isRegMask()) {
542 if (!HasNonInvariantUse) {
546 if (RUDefs.
test(Unit) || RUClobbers.
test(Unit)) {
547 HasNonInvariantUse =
true;
567 if (RUDefs.
test(Unit)) {
568 RUClobbers.
set(Unit);
570 }
else if (RUClobbers.
test(Unit)) {
582 if (Def && !RuledOut) {
583 int FI = std::numeric_limits<int>::min();
584 if ((!HasNonInvariantUse && IsLICMCandidate(*
MI, CurLoop)) ||
592void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {
593 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
597 unsigned NumRegUnits =
TRI->getNumRegUnits();
598 BitVector RUDefs(NumRegUnits);
599 BitVector RUClobbers(NumRegUnits);
602 SmallDenseSet<int> StoredFIs;
606 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
610 if (
ML &&
ML->getHeader()->isEHPad())
continue;
615 for (
const auto &LI : BB->liveins()) {
621 if (
const uint32_t *Mask = BB->getBeginClobberMask(
TRI))
626 const MachineFunction &MF = *BB->getParent();
631 RUClobbers.
set(Unit);
634 RUClobbers.
set(Unit);
637 SpeculationState = SpeculateUnknown;
638 for (MachineInstr &
MI : *BB)
639 ProcessMI(&
MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
643 BitVector TermRUs(NumRegUnits);
645 if (TI != Preheader->
end()) {
646 for (
const MachineOperand &MO : TI->operands()) {
665 for (CandidateInfo &Candidate : Candidates) {
666 if (Candidate.FI != std::numeric_limits<int>::min() &&
667 StoredFIs.
count(Candidate.FI))
673 if (RUClobbers.
test(Unit) || TermRUs.test(Unit)) {
682 MachineInstr *
MI = Candidate.MI;
683 for (
const MachineOperand &MO :
MI->all_uses()) {
687 if (RUDefs.
test(Unit) || RUClobbers.
test(Unit)) {
700 HoistPostRA(
MI, Candidate.Def, CurLoop);
706void MachineLICMImpl::AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop) {
707 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
708 if (!BB->isLiveIn(
Reg))
710 for (MachineInstr &
MI : *BB) {
711 for (MachineOperand &MO :
MI.all_uses()) {
714 if (
TRI->regsOverlap(
Reg, MO.getReg()))
723void MachineLICMImpl::HoistPostRA(MachineInstr *
MI,
Register Def,
724 MachineLoop *CurLoop) {
734 MachineBasicBlock *
MBB =
MI->getParent();
740 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
746 AddToLiveIns(Def, CurLoop);
754bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
755 MachineLoop *CurLoop) {
756 if (SpeculationState != SpeculateUnknown)
757 return SpeculationState == SpeculateFalse;
761 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
763 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
765 SpeculationState = SpeculateTrue;
770 SpeculationState = SpeculateFalse;
778bool MachineLICMImpl::isTriviallyReMaterializable(
779 const MachineInstr &
MI)
const {
780 if (!
TII->isTriviallyReMaterializable(
MI))
783 for (
const MachineOperand &MO :
MI.all_uses()) {
784 if (MO.getReg().isVirtual())
791void MachineLICMImpl::EnterScope(MachineBasicBlock *
MBB) {
798void MachineLICMImpl::ExitScope(MachineBasicBlock *
MBB) {
806void MachineLICMImpl::ExitScopeIfDone(
808 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
809 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
810 if (OpenChildren[Node])
814 ExitScope(
Node->getBlock());
817 if (!Parent || --OpenChildren[Parent] != 0)
828 MachineLoop *CurLoop) {
829 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
835 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
836 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
840 while (!WorkList.
empty()) {
842 assert(Node &&
"Null dominator tree node?");
843 MachineBasicBlock *BB =
Node->getBlock();
848 if (
ML &&
ML->getHeader()->isEHPad())
856 unsigned NumChildren =
Node->getNumChildren();
864 OpenChildren[
Node] = NumChildren;
870 ParentMap[Child] =
Node;
882 InitRegPressure(Preheader);
886 MachineBasicBlock *
MBB =
Node->getBlock();
891 SpeculationState = SpeculateUnknown;
893 unsigned HoistRes = HoistResult::NotHoisted;
894 HoistRes = Hoist(&
MI, Preheader, CurLoop);
895 if (HoistRes & HoistResult::NotHoisted) {
899 for (MachineLoop *L = MLI->
getLoopFor(
MI.getParent()); L != CurLoop;
900 L =
L->getParentLoop())
903 while (!InnerLoopWorkList.
empty()) {
904 MachineLoop *InnerLoop = InnerLoopWorkList.
pop_back_val();
906 if (InnerLoopPreheader) {
907 HoistRes = Hoist(&
MI, InnerLoopPreheader, InnerLoop);
908 if (HoistRes & HoistResult::Hoisted)
914 if (HoistRes & HoistResult::ErasedMI)
917 UpdateRegPressure(&
MI);
921 ExitScopeIfDone(Node, OpenChildren, ParentMap);
932void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
940 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
946 for (
const MachineInstr &
MI : *BB)
947 UpdateRegPressure(&
MI,
true);
951void MachineLICMImpl::UpdateRegPressure(
const MachineInstr *
MI,
952 bool ConsiderUnseenAsDef) {
953 auto Cost = calcRegisterCost(
MI,
true, ConsiderUnseenAsDef);
954 for (
const auto &RPIdAndCost :
Cost) {
955 unsigned Class = RPIdAndCost.first;
956 if (
static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
969SmallDenseMap<unsigned, int>
970MachineLICMImpl::calcRegisterCost(
const MachineInstr *
MI,
bool ConsiderSeen,
971 bool ConsiderUnseenAsDef) {
972 SmallDenseMap<unsigned, int>
Cost;
973 if (
MI->isImplicitDef())
975 for (
unsigned i = 0, e =
MI->getDesc().getNumOperands(); i != e; ++i) {
976 const MachineOperand &MO =
MI->getOperand(i);
984 bool isNew = ConsiderSeen ? RegSeen.
insert(
Reg).second :
false;
985 const TargetRegisterClass *RC =
MRI->getRegClass(
Reg);
987 RegClassWeight
W =
TRI->getRegClassWeight(RC);
990 RCCost =
W.RegWeight;
993 if (isNew && !isKill && ConsiderUnseenAsDef)
995 RCCost =
W.RegWeight;
996 else if (!isNew && isKill)
997 RCCost = -
W.RegWeight;
1001 const int *PS =
TRI->getRegClassPressureSets(RC);
1002 for (; *PS != -1; ++PS)
1003 Cost[*PS] += RCCost;
1011 assert(
MI.mayLoad() &&
"Expected MI that loads!");
1015 if (
MI.memoperands_empty())
1020 if (PSV->isGOT() || PSV->isConstantPool())
1037 bool FoundCallerPresReg =
false;
1038 if (!
MI.mayStore() ||
MI.hasUnmodeledSideEffects() ||
1039 (
MI.getNumOperands() == 0))
1048 if (
Reg.isVirtual())
1050 if (
Reg.isVirtual())
1052 if (!
TRI->isCallerPreservedPhysReg(
Reg.asMCReg(), *
MI.getMF()))
1055 FoundCallerPresReg =
true;
1056 }
else if (!MO.
isImm()) {
1060 return FoundCallerPresReg;
1078 Register CopySrcReg =
MI.getOperand(1).getReg();
1082 if (!
TRI->isCallerPreservedPhysReg(CopySrcReg.
asMCReg(), *MF))
1085 Register CopyDstReg =
MI.getOperand(0).getReg();
1098bool MachineLICMImpl::IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop) {
1100 bool DontMoveAcrossStore = !
HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1101 if ((!
I.isSafeToMove(DontMoveAcrossStore)) &&
1114 !IsGuaranteedToExecute(
I.getParent(), CurLoop)) {
1123 if (
I.isConvergent())
1126 if (!
TII->shouldHoist(
I, CurLoop))
1133bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &
I,
1134 MachineLoop *CurLoop) {
1135 if (!IsLICMCandidate(
I, CurLoop)) {
1136 LLVM_DEBUG(
dbgs() <<
"LICM: Instruction not a LICM candidate\n");
1144bool MachineLICMImpl::HasLoopPHIUse(
const MachineInstr *
MI,
1145 MachineLoop *CurLoop) {
1148 MI = Work.pop_back_val();
1149 for (
const MachineOperand &MO :
MI->all_defs()) {
1153 for (MachineInstr &
UseMI :
MRI->use_instructions(
Reg)) {
1155 if (
UseMI.isPHI()) {
1169 Work.push_back(&
UseMI);
1172 }
while (!Work.empty());
1178bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
1180 MachineLoop *CurLoop)
const {
1181 if (
MRI->use_nodbg_empty(
Reg))
1184 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(
Reg)) {
1185 if (
UseMI.isCopyLike())
1189 for (
unsigned i = 0, e =
UseMI.getNumOperands(); i != e; ++i) {
1190 const MachineOperand &MO =
UseMI.getOperand(i);
1197 if (
TII->hasHighOperandLatency(SchedModel,
MRI,
MI, DefIdx,
UseMI, i))
1210bool MachineLICMImpl::IsCheapInstruction(MachineInstr &
MI)
const {
1214 bool isCheap =
false;
1215 unsigned NumDefs =
MI.getDesc().getNumDefs();
1216 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
1217 MachineOperand &DefMO =
MI.getOperand(i);
1225 if (!
TII->hasLowDefLatency(SchedModel,
MI, i))
1235bool MachineLICMImpl::CanCauseHighRegPressure(
1236 const SmallDenseMap<unsigned, int> &
Cost,
bool CheapInstr) {
1237 for (
const auto &RPIdAndCost :
Cost) {
1238 if (RPIdAndCost.second <= 0)
1241 unsigned Class = RPIdAndCost.first;
1242 int Limit = RegLimit[
Class];
1249 for (
const auto &RP : BackTrace)
1250 if (
static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1260void MachineLICMImpl::UpdateBackTraceRegPressure(
const MachineInstr *
MI) {
1263 auto Cost = calcRegisterCost(
MI,
false,
1267 for (
auto &RP : BackTrace)
1268 for (
const auto &RPIdAndCost :
Cost)
1269 RP[RPIdAndCost.first] += RPIdAndCost.second;
1274bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &
MI,
1275 MachineLoop *CurLoop) {
1276 if (
MI.isImplicitDef())
1294 bool CheapInstr = IsCheapInstruction(
MI);
1295 bool CreatesCopy = HasLoopPHIUse(&
MI, CurLoop);
1298 if (CheapInstr && CreatesCopy) {
1305 if (isTriviallyReMaterializable(
MI))
1310 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1311 const MachineOperand &MO =
MI.getOperand(i);
1317 if (MO.
isDef() && HasHighOperandLatency(
MI, i,
Reg, CurLoop)) {
1330 auto Cost = calcRegisterCost(&
MI,
false,
1335 if (!CanCauseHighRegPressure(
Cost, CheapInstr)) {
1351 (!IsGuaranteedToExecute(
MI.getParent(), CurLoop) && !MayCSE(&
MI))) {
1359 if (
MI.isCopy() ||
MI.isRegSequence()) {
1363 [
this](
const MachineOperand &UseOp) {
1364 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1365 MRI->isConstantPhysReg(UseOp.getReg());
1367 IsLoopInvariantInst(
MI, CurLoop) &&
1368 any_of(
MRI->use_nodbg_instructions(DefReg),
1369 [&CurLoop,
this, DefReg,
1371 if (!CurLoop->contains(&UseMI))
1378 if (CanCauseHighRegPressure(Cost, false) &&
1379 !CurLoop->isLoopInvariant(UseMI, DefReg))
1389 if (!isTriviallyReMaterializable(
MI) &&
1390 !
MI.isDereferenceableInvariantLoad()) {
1401MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *
MI,
1402 MachineLoop *CurLoop) {
1404 if (
MI->canFoldAsLoad())
1410 if (!
MI->isDereferenceableInvariantLoad())
1414 unsigned LoadRegIndex;
1416 TII->getOpcodeAfterMemoryUnfold(
MI->getOpcode(),
1420 if (NewOpc == 0)
return nullptr;
1421 const MCInstrDesc &MID =
TII->get(NewOpc);
1422 MachineFunction &MF = *
MI->getMF();
1423 const TargetRegisterClass *RC =
TII->getRegClass(MID, LoadRegIndex,
TRI, MF);
1427 SmallVector<MachineInstr *, 2> NewMIs;
1433 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1436 "Unfolded a load into multiple instructions!");
1437 MachineBasicBlock *
MBB =
MI->getParent();
1443 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1444 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1445 NewMIs[0]->eraseFromParent();
1446 NewMIs[1]->eraseFromParent();
1451 UpdateRegPressure(NewMIs[1]);
1456 if (
MI->shouldUpdateAdditionalCallInfo())
1459 MI->eraseFromParent();
1466void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1467 for (MachineInstr &
MI : *BB)
1473void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1479 while (!Worklist.empty()) {
1480 auto *
L = Worklist.pop_back_val();
1481 AllowedToHoistLoads[
L] =
true;
1493 for (
auto *Loop :
reverse(LoopsInPreOrder)) {
1494 for (
auto *
MBB : Loop->blocks()) {
1496 if (!AllowedToHoistLoads[Loop])
1498 for (
auto &
MI : *
MBB) {
1499 if (!
MI.isLoadFoldBarrier() && !
MI.mayStore() && !
MI.isCall() &&
1500 !(
MI.mayLoad() &&
MI.hasOrderedMemoryRef()))
1502 for (MachineLoop *L = Loop;
L !=
nullptr;
L =
L->getParentLoop())
1503 AllowedToHoistLoads[
L] =
false;
1513MachineLICMImpl::LookForDuplicate(
const MachineInstr *
MI,
1514 std::vector<MachineInstr *> &PrevMIs) {
1515 for (MachineInstr *PrevMI : PrevMIs)
1516 if (
TII->produceSameValue(*
MI, *PrevMI, (PreRegAlloc ?
MRI :
nullptr)))
1526bool MachineLICMImpl::EliminateCSE(
1528 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1531 if (
MI->isImplicitDef())
1536 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1539 if (MachineInstr *Dup = LookForDuplicate(
MI, CI->second)) {
1544 SmallVector<unsigned, 2> Defs;
1545 for (
unsigned i = 0, e =
MI->getNumOperands(); i != e; ++i) {
1546 const MachineOperand &MO =
MI->getOperand(i);
1550 MO.
getReg() == Dup->getOperand(i).getReg()) &&
1551 "Instructions with different phys regs are not identical!");
1558 for (
unsigned i = 0, e = Defs.
size(); i != e; ++i) {
1559 unsigned Idx = Defs[i];
1561 Register DupReg = Dup->getOperand(Idx).getReg();
1564 if (!
MRI->constrainRegClass(DupReg,
MRI->getRegClass(
Reg))) {
1566 for (
unsigned j = 0;
j != i; ++
j)
1567 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1572 for (
unsigned Idx : Defs) {
1574 Register DupReg = Dup->getOperand(Idx).getReg();
1575 MRI->replaceRegWith(
Reg, DupReg);
1576 MRI->clearKillFlags(DupReg);
1578 if (!
MRI->use_nodbg_empty(DupReg))
1579 Dup->getOperand(Idx).setIsDead(
false);
1582 MI->eraseFromParent();
1591bool MachineLICMImpl::MayCSE(MachineInstr *
MI) {
1592 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1595 unsigned Opcode =
MI->getOpcode();
1596 for (
auto &Map : CSEMap) {
1599 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1600 Map.second.find(Opcode);
1603 if (CI ==
Map.second.end() ||
MI->isImplicitDef())
1605 if (LookForDuplicate(
MI, CI->second) !=
nullptr)
1616unsigned MachineLICMImpl::Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
1617 MachineLoop *CurLoop) {
1618 MachineBasicBlock *SrcBlock =
MI->getParent();
1623 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1624 ++NumNotHoistedDueToHotness;
1625 return HoistResult::NotHoisted;
1628 bool HasExtractHoistableLoad =
false;
1629 if (!IsLoopInvariantInst(*
MI, CurLoop) ||
1630 !IsProfitableToHoist(*
MI, CurLoop)) {
1632 MI = ExtractHoistableLoad(
MI, CurLoop);
1634 return HoistResult::NotHoisted;
1635 HasExtractHoistableLoad =
true;
1646 dbgs() <<
"Hoisting " << *
MI;
1647 if (
MI->getParent()->getBasicBlock())
1657 InitCSEMap(Preheader);
1658 FirstInLoop =
false;
1662 unsigned Opcode =
MI->getOpcode();
1663 bool HasCSEDone =
false;
1664 for (
auto &Map : CSEMap) {
1667 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1668 Map.second.find(Opcode);
1669 if (CI !=
Map.second.end()) {
1670 if (EliminateCSE(
MI, CI)) {
1685 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
1689 UpdateBackTraceRegPressure(
MI);
1694 for (MachineOperand &MO :
MI->all_defs())
1698 CSEMap[Preheader][Opcode].push_back(
MI);
1704 if (HasCSEDone || HasExtractHoistableLoad)
1705 return HoistResult::Hoisted | HoistResult::ErasedMI;
1706 return HoistResult::Hoisted;
1710MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {
1719 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(
1720 CurLoop->
getHeader(), LegacyPass, MFAM,
nullptr, MDTU);
1723 return NewPreheader;
1731bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1732 MachineBasicBlock *TgtBlock) {
1741 double Ratio = (double)DstBF / SrcBF;
1747template <
typename DerivedT,
bool PreRegAlloc>
1750 bool Changed = MachineLICMImpl(PreRegAlloc,
nullptr, &MFAM).run(MF);