72#include "llvm/Config/llvm-config.h"
100#define DEBUG_TYPE "pipeliner"
102STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
103STATISTIC(NumPipelined,
"Number of loops software pipelined");
104STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
105STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
106STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
107STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
108STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
109STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
110STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
111STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
112STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
116 cl::desc(
"Enable Software Pipelining"));
125 cl::desc(
"Size limit for the MII."),
131 cl::desc(
"Force pipeliner to use specified II."),
137 cl::desc(
"Maximum stages allowed in the generated scheduled."),
144 cl::desc(
"Prune dependences between unrelated Phi nodes."),
151 cl::desc(
"Prune loop carried order dependences."),
169 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
170 "with the generated schedule for feeding into the "
171 "-modulo-schedule-test pass"));
176 "Use the experimental peeling code generator for software pipelining"));
184 cl::desc(
"Limit register pressure of scheduled loop"));
189 cl::desc(
"Margin representing the unused percentage of "
190 "the register pressure limit"));
194 cl::desc(
"Use the MVE code generator for software pipelining"));
201 cl::desc(
"Enable CopyToPhi DAG Mutation"));
206 "pipeliner-force-issue-width",
213 cl::desc(
"Set how to use window scheduling algorithm."),
215 "Turn off window algorithm."),
217 "Use window algorithm after SMS algorithm fails."),
219 "Use window algorithm instead of SMS algorithm.")));
223unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
231 "Modulo Software Pipelining",
false,
false)
251 int64_t MemOpOffset = 0;
256 bool IsAllIdentified =
false;
262 bool isUnknown()
const {
return MemOpValue ==
nullptr; }
273 enum class InstrTag {
282 TaggedSUnit(
SUnit *SU, InstrTag Tag)
285 InstrTag
getTag()
const {
return InstrTag(getInt()); }
289 struct LoadStoreChunk {
293 void append(
SUnit *SU);
298 std::vector<SUnit> &SUnits;
304 std::vector<BitVector> LoopCarried;
317 std::vector<TaggedSUnit> TaggedSUnits;
328 void computeDependencies();
331 return LoopCarried[
Idx];
336 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
338 void addLoopCarriedDepenenciesForChunks(
const LoadStoreChunk &
From,
339 const LoadStoreChunk &To);
350 bool PerformCheapCheck =
false);
352 void computeDependenciesAux();
380 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
381 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
382 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
386 for (
const auto &L : *
MLI)
396bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
397 bool Changed =
false;
398 for (
const auto &InnerLoop : L)
399 Changed |= scheduleLoop(*InnerLoop);
411 setPragmaPipelineOptions(L);
412 if (!canPipelineLoop(L)) {
416 L.getStartLoc(), L.getHeader())
417 <<
"Failed to pipeline loop";
425 if (useSwingModuloScheduler())
426 Changed = swingModuloScheduler(L);
428 if (useWindowScheduler(Changed))
429 Changed = runWindowScheduler(L);
435void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
454 if (LoopID ==
nullptr)
461 MDNode *MD = dyn_cast<MDNode>(MDO);
471 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
473 "Pipeline initiation interval hint metadata should have two operands.");
475 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
477 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
486bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
487 if (
L.getNumBlocks() != 1) {
490 L.getStartLoc(),
L.getHeader())
491 <<
"Not a single basic block: "
492 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
500 L.getStartLoc(),
L.getHeader())
501 <<
"Disabled by Pragma.";
512 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
516 L.getStartLoc(),
L.getHeader())
517 <<
"The branch can't be understood";
526 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
530 L.getStartLoc(),
L.getHeader())
531 <<
"The loop structure is not supported";
536 if (!
L.getLoopPreheader()) {
537 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
541 L.getStartLoc(),
L.getHeader())
542 <<
"No loop preheader found";
548 preprocessPhiNodes(*
L.getHeader());
555 *getAnalysis<LiveIntervalsWrapperPass>().getLIS().getSlotIndexes();
560 auto *RC =
MRI.getRegClass(DefOp.
getReg());
562 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
587bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
588 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
590 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
592 *
this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
RegClassInfo,
613 return SMS.hasNewSchedule();
627bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
633 Context.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
634 Context.LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
635 Context.RegClassInfo->runOnMachineFunction(*
MF);
640bool MachinePipeliner::useSwingModuloScheduler() {
645bool MachinePipeliner::useWindowScheduler(
bool Changed) {
652 "llvm.loop.pipeline.initiationinterval is set.\n");
660void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
663 else if (II_setByPragma > 0)
664 MII = II_setByPragma;
666 MII = std::max(ResMII, RecMII);
669void SwingSchedulerDAG::setMAX_II() {
672 else if (II_setByPragma > 0)
673 MAX_II = II_setByPragma;
683 updatePhiDependences();
690 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
693 dbgs() <<
"===== Loop Carried Edges End =====\n";
697 findCircuits(NodeSets);
701 unsigned ResMII = calculateResMII();
702 unsigned RecMII = calculateRecMII(NodeSets);
710 setMII(ResMII, RecMII);
714 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
720 Pass.ORE->emit([&]() {
723 <<
"Invalid Minimal Initiation Interval: 0";
731 <<
", we don't pipeline large loops\n");
732 NumFailLargeMaxMII++;
733 Pass.ORE->emit([&]() {
736 <<
"Minimal Initiation Interval too large: "
737 <<
ore::NV(
"MII", (
int)MII) <<
" > "
739 <<
"Refer to -pipeliner-max-mii.";
744 computeNodeFunctions(NodeSets);
746 registerPressureFilter(NodeSets);
748 colocateNodeSets(NodeSets);
750 checkNodeSets(NodeSets);
753 for (
auto &
I : NodeSets) {
754 dbgs() <<
" Rec NodeSet ";
761 groupRemainingNodes(NodeSets);
763 removeDuplicateNodes(NodeSets);
766 for (
auto &
I : NodeSets) {
767 dbgs() <<
" NodeSet ";
772 computeNodeOrder(NodeSets);
775 checkValidNodeOrder(Circuits);
778 Scheduled = schedulePipeline(Schedule);
783 Pass.ORE->emit([&]() {
786 <<
"Unable to find schedule";
793 if (numStages == 0) {
796 Pass.ORE->emit([&]() {
799 <<
"No need to pipeline - no overlapped iterations in schedule.";
806 <<
" : too many stages, abort\n");
807 NumFailLargeMaxStage++;
808 Pass.ORE->emit([&]() {
811 <<
"Too many stages in schedule: "
812 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
814 <<
". Refer to -pipeliner-max-stages.";
819 Pass.ORE->emit([&]() {
822 <<
"Pipelined succesfully!";
827 std::vector<MachineInstr *> OrderedInsts;
831 OrderedInsts.push_back(SU->getInstr());
832 Cycles[SU->getInstr()] =
Cycle;
837 for (
auto &KV : NewMIs) {
838 Cycles[KV.first] = Cycles[KV.second];
839 Stages[KV.first] = Stages[KV.second];
840 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
847 "Cannot serialize a schedule with InstrChanges!");
871 for (
auto &KV : NewMIs)
883 assert(Phi.isPHI() &&
"Expecting a Phi.");
887 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
888 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
889 InitVal = Phi.getOperand(i).getReg();
891 LoopVal = Phi.getOperand(i).getReg();
893 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
899 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
900 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
901 return Phi.getOperand(i).getReg();
910 while (!Worklist.
empty()) {
912 for (
const auto &SI : SU->
Succs) {
913 SUnit *SuccSU = SI.getSUnit();
915 if (Visited.
count(SuccSU))
927SUnitWithMemInfo::SUnitWithMemInfo(
SUnit *SU) : SU(SU) {
928 if (!getUnderlyingObjects())
953bool SUnitWithMemInfo::getUnderlyingObjects() {
955 if (!
MI->hasOneMemOperand())
977 if (Src.isTriviallyDisjoint(Dst))
984 if (PerformCheapCheck) {
991 int64_t Offset1, Offset2;
992 bool Offset1IsScalable, Offset2IsScalable;
993 if (
TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
995 TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
998 Offset1IsScalable == Offset2IsScalable &&
999 (
int)Offset1 < (
int)Offset2) {
1001 "What happened to the chain edge?");
1013 if (Src.isUnknown() || Dst.isUnknown())
1015 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1026 for (
const Value *SrcObj : Src.UnderlyingObjs)
1027 for (
const Value *DstObj : Dst.UnderlyingObjs)
1035void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(
SUnit *SU) {
1037 if (!
MI->mayLoadOrStore())
1039 (
MI->mayStore() ? Stores : Loads).emplace_back(SU);
1045 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits),
N(SUnits.
size()),
1050 for (
auto &SU : SUnits) {
1051 auto Tagged = getInstrTag(&SU);
1056 TaggedSUnits.emplace_back(&SU, *Tagged);
1059 computeDependenciesAux();
1062std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1063LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1066 return InstrTag::Barrier;
1068 if (
MI->mayStore() ||
1069 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1070 return InstrTag::LoadOrStore;
1072 if (
MI->mayRaiseFPException())
1073 return InstrTag::FPExceptions;
1075 return std::nullopt;
1078void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1079 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst,
1080 bool PerformCheapCheck) {
1082 if (Src.SU == Dst.SU)
1086 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1089void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1090 const LoadStoreChunk &
From,
const LoadStoreChunk &To) {
1092 for (
const SUnitWithMemInfo &Src :
From.Loads)
1093 for (
const SUnitWithMemInfo &Dst : To.Stores)
1095 addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
1098 for (
const SUnitWithMemInfo &Src :
From.Stores)
1099 for (
const SUnitWithMemInfo &Dst : To.Loads)
1100 addDependenciesBetweenSUs(Src, Dst);
1103 for (
const SUnitWithMemInfo &Src :
From.Stores)
1104 for (
const SUnitWithMemInfo &Dst : To.Stores)
1105 addDependenciesBetweenSUs(Src, Dst);
1108void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1110 for (
const auto &TSU : TaggedSUnits) {
1111 InstrTag
Tag = TSU.getTag();
1112 SUnit *SU = TSU.getPointer();
1114 case InstrTag::Barrier:
1115 Chunks.emplace_back();
1117 case InstrTag::LoadOrStore:
1118 Chunks.back().append(SU);
1120 case InstrTag::FPExceptions:
1129 for (
const LoadStoreChunk &Chunk : Chunks)
1130 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1146 LoopCarriedOrderDepsTracker LCODTracker(
this, &BAA,
TII,
TRI);
1147 LCODTracker.computeDependencies();
1148 for (
unsigned I = 0;
I != SUnits.size();
I++)
1149 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1162void SwingSchedulerDAG::updatePhiDependences() {
1167 for (
SUnit &
I : SUnits) {
1181 UI =
MRI.use_instr_begin(Reg),
1182 UE =
MRI.use_instr_end();
1195 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1200 }
else if (MO.isUse()) {
1203 if (
DefMI ==
nullptr)
1210 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1217 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1226 for (
auto &PI :
I.Preds) {
1229 if (
I.getInstr()->isPHI()) {
1238 for (
const SDep &
D : RemoveDeps)
1245void SwingSchedulerDAG::changeDependences() {
1249 for (
SUnit &
I : SUnits) {
1250 unsigned BasePos = 0, OffsetPos = 0;
1252 int64_t NewOffset = 0;
1253 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1258 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1269 SUnit *LastSU = getSUnit(LastMI);
1273 if (Topo.IsReachable(&
I, LastSU))
1278 for (
const SDep &
P :
I.Preds)
1279 if (
P.getSUnit() == DefSU)
1281 for (
const SDep &
D : Deps) {
1282 Topo.RemovePred(&
I,
D.getSUnit());
1287 for (
auto &
P : LastSU->
Preds)
1290 for (
const SDep &
D : Deps) {
1291 Topo.RemovePred(LastSU,
D.getSUnit());
1298 Topo.AddPred(LastSU, &
I);
1303 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1314 std::vector<MachineInstr *> &OrderedInsts,
1322 Stage <= LastStage; ++Stage) {
1325 Instrs[
Cycle].push_front(SU);
1332 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1334 for (
SUnit *SU : CycleInstrs) {
1336 OrderedInsts.push_back(
MI);
1346struct FuncUnitSorter {
1352 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1360 unsigned min = UINT_MAX;
1361 if (InstrItins && !InstrItins->
isEmpty()) {
1364 InstrItins->
endStage(SchedClass))) {
1367 if (numAlternatives < min) {
1368 min = numAlternatives;
1385 if (!PRE.ReleaseAtCycle)
1389 unsigned NumUnits = ProcResource->
NumUnits;
1390 if (NumUnits < min) {
1392 F = PRE.ProcResourceIdx;
1397 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1406 unsigned SchedClass =
MI.getDesc().getSchedClass();
1407 if (InstrItins && !InstrItins->
isEmpty()) {
1410 InstrItins->
endStage(SchedClass))) {
1413 Resources[FuncUnits]++;
1428 if (!PRE.ReleaseAtCycle)
1430 Resources[PRE.ProcResourceIdx]++;
1434 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1440 unsigned MFUs1 = minFuncUnits(IS1, F1);
1441 unsigned MFUs2 = minFuncUnits(IS2, F2);
1444 return MFUs1 > MFUs2;
1449class HighRegisterPressureDetector {
1454 const unsigned PSetNum;
1460 std::vector<unsigned> InitSetPressure;
1464 std::vector<unsigned> PressureSetLimit;
1471 using OrderedInstsTy = std::vector<MachineInstr *>;
1475 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1476 if (Pressures.size() == 0) {
1480 for (
unsigned P : Pressures) {
1490 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1492 dbgs() << *PSetIter <<
' ';
1497 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1499 auto PSetIter =
MRI.getPressureSets(
Reg);
1500 unsigned Weight = PSetIter.getWeight();
1501 for (; PSetIter.isValid(); ++PSetIter)
1502 Pressure[*PSetIter] += Weight;
1505 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1507 auto PSetIter =
MRI.getPressureSets(
Reg);
1508 unsigned Weight = PSetIter.getWeight();
1509 for (; PSetIter.isValid(); ++PSetIter) {
1510 auto &
P = Pressure[*PSetIter];
1512 "register pressure must be greater than or equal weight");
1534 void computeLiveIn() {
1536 for (
auto &
MI : *OrigMBB) {
1537 if (
MI.isDebugInstr())
1545 if (isReservedRegister(
Reg))
1547 if (isDefinedInThisLoop(
Reg))
1553 for (
auto LiveIn : Used)
1554 increaseRegisterPressure(InitSetPressure, LiveIn);
1559 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1574 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1575 Instr2StageTy &Stages)
const {
1581 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1582 if (isDefinedInThisLoop(
Reg))
1588 UpdateTargetRegs(
Reg);
1590 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses)
1591 UpdateTargetRegs(
Use.RegUnit);
1596 return Stages[
MI] +
MI->isPHI();
1601 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses) {
1609 if (InstrScore(Orig) < InstrScore(New))
1615 Instr2LastUsesTy LastUses;
1616 for (
auto &Entry : LastUseMI)
1633 std::vector<unsigned>
1634 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1635 Instr2StageTy &Stages,
1636 const unsigned StageCount)
const {
1643 auto CurSetPressure = InitSetPressure;
1644 auto MaxSetPressure = InitSetPressure;
1645 auto LastUses = computeLastUses(OrderedInsts, Stages);
1648 dbgs() <<
"Ordered instructions:\n";
1650 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1655 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1665 increaseRegisterPressure(CurSetPressure,
Reg);
1669 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1675 if (!RegSet.contains(
Reg))
1680 decreaseRegisterPressure(CurSetPressure,
Reg);
1684 for (
unsigned I = 0;
I < StageCount;
I++) {
1686 const auto Stage = Stages[
MI];
1690 const unsigned Iter =
I - Stage;
1692 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1693 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1695 for (
auto LastUse : LastUses[
MI]) {
1698 EraseReg(LiveRegSets[Iter - 1], LastUse);
1700 EraseReg(LiveRegSets[Iter], LastUse);
1704 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1705 MaxSetPressure[PSet] =
1706 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1709 dbgs() <<
"CurSetPressure=";
1710 dumpRegisterPressures(CurSetPressure);
1711 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1717 return MaxSetPressure;
1723 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1724 TRI(MF.getSubtarget().getRegisterInfo()),
1725 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1726 PressureSetLimit(PSetNum, 0) {}
1732 if (
MI.isDebugInstr())
1734 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1738 computePressureSetLimit(RCI);
1744 const unsigned MaxStage)
const {
1746 "the percentage of the margin must be between 0 to 100");
1748 OrderedInstsTy OrderedInsts;
1749 Instr2StageTy Stages;
1751 const auto MaxSetPressure =
1752 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1755 dbgs() <<
"Dump MaxSetPressure:\n";
1756 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1757 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1762 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1763 unsigned Limit = PressureSetLimit[PSet];
1766 <<
" Margin=" << Margin <<
"\n");
1767 if (Limit < MaxSetPressure[PSet] + Margin) {
1770 <<
"Rejected the schedule because of too high register pressure\n");
1786unsigned SwingSchedulerDAG::calculateResMII() {
1789 return RM.calculateResMII();
1798unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1799 unsigned RecMII = 0;
1801 for (
NodeSet &Nodes : NodeSets) {
1805 unsigned Delay = Nodes.getLatency();
1806 unsigned Distance = 1;
1809 unsigned CurMII = (Delay + Distance - 1) / Distance;
1810 Nodes.setRecMII(CurMII);
1811 if (CurMII > RecMII)
1819void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1823 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1826 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1829 if (OE.isOutputDep()) {
1830 int N = OE.getDst()->NodeNum;
1832 auto Dep = OutputDeps.
find(BackEdge);
1833 if (Dep != OutputDeps.
end()) {
1834 BackEdge = Dep->second;
1835 OutputDeps.
erase(Dep);
1837 OutputDeps[
N] = BackEdge;
1840 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1852 int N = OE.getDst()->NodeNum;
1854 AdjK[i].push_back(
N);
1860 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1865 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1866 int N = Src->NodeNum;
1868 AdjK[i].push_back(
N);
1875 for (
auto &OD : OutputDeps)
1876 if (!
Added.test(OD.second)) {
1877 AdjK[OD.first].push_back(OD.second);
1878 Added.set(OD.second);
1884bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1892 for (
auto W : AdjK[V]) {
1893 if (NumPaths > MaxPaths)
1904 if (!Blocked.test(W)) {
1905 if (circuit(W, S, NodeSets, DAG,
1906 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1914 for (
auto W : AdjK[V]) {
1925void SwingSchedulerDAG::Circuits::unblock(
int U) {
1928 while (!BU.
empty()) {
1930 assert(SI != BU.
end() &&
"Invalid B set.");
1933 if (Blocked.test(
W->NodeNum))
1934 unblock(
W->NodeNum);
1940void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1941 Circuits Cir(SUnits, Topo);
1943 Cir.createAdjacencyStructure(
this);
1944 for (
int I = 0, E = SUnits.size();
I != E; ++
I) {
1946 Cir.circuit(
I,
I, NodeSets,
this);
1979 for (
auto &Dep : SU.
Preds) {
1980 SUnit *TmpSU = Dep.getSUnit();
1992 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2000 for (
auto &Dep : PHISUs[Index]->Succs) {
2004 SUnit *TmpSU = Dep.getSUnit();
2014 if (UseSUs.
size() == 0)
2019 for (
auto *
I : UseSUs) {
2020 for (
auto *Src : SrcSUs) {
2036void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2037 ScheduleInfo.resize(SUnits.size());
2040 for (
int I : Topo) {
2041 const SUnit &SU = SUnits[
I];
2048 for (
int I : Topo) {
2050 int zeroLatencyDepth = 0;
2052 for (
const auto &IE : DDG->getInEdges(SU)) {
2054 if (
IE.getLatency() == 0)
2056 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2057 if (
IE.ignoreDependence(
true))
2059 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2060 IE.getDistance() * MII));
2062 maxASAP = std::max(maxASAP, asap);
2063 ScheduleInfo[
I].ASAP = asap;
2064 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2070 int zeroLatencyHeight = 0;
2072 for (
const auto &OE : DDG->getOutEdges(SU)) {
2073 SUnit *Succ = OE.getDst();
2076 if (OE.getLatency() == 0)
2078 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2079 if (OE.ignoreDependence(
true))
2081 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2082 OE.getDistance() * MII));
2085 ScheduleInfo[
I].ALAP = alap;
2086 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2091 I.computeNodeSetInfo(
this);
2094 for (
unsigned i = 0; i < SUnits.size(); i++) {
2095 dbgs() <<
"\tNode " << i <<
":\n";
2096 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2097 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2098 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2099 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2100 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2101 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2102 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2117 SUnit *PredSU = IE.getSrc();
2118 if (S && S->count(PredSU) == 0)
2120 if (IE.ignoreDependence(
true))
2131 SUnit *SuccSU = OE.getDst();
2132 if (!OE.isAntiDep())
2134 if (S && S->count(SuccSU) == 0)
2140 return !Preds.
empty();
2153 SUnit *SuccSU = OE.getDst();
2154 if (S && S->count(SuccSU) == 0)
2156 if (OE.ignoreDependence(
false))
2167 SUnit *PredSU = IE.getSrc();
2168 if (!IE.isAntiDep())
2170 if (S && S->count(PredSU) == 0)
2176 return !Succs.
empty();
2192 if (!Visited.
insert(Cur).second)
2193 return Path.contains(Cur);
2194 bool FoundPath =
false;
2196 if (!OE.ignoreDependence(
false))
2198 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2200 if (IE.isAntiDep() && IE.getDistance() == 0)
2202 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2217 for (
SUnit *SU : NS) {
2223 if (Reg.isVirtual())
2225 else if (
MRI.isAllocatable(Reg))
2226 Uses.insert_range(
TRI->regunits(Reg.asMCReg()));
2229 for (
SUnit *SU : NS)
2233 if (Reg.isVirtual()) {
2234 if (!
Uses.count(Reg))
2236 }
else if (
MRI.isAllocatable(Reg)) {
2238 if (!
Uses.count(Unit))
2247void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2248 for (
auto &NS : NodeSets) {
2254 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2256 RecRPTracker.closeBottom();
2258 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2260 return A->NodeNum >
B->NodeNum;
2263 for (
auto &SU : SUnits) {
2269 RecRPTracker.setPos(std::next(CurInstI));
2273 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2278 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2281 NS.setExceedPressure(SU);
2284 RecRPTracker.recede();
2291void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2292 unsigned Colocate = 0;
2293 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2298 for (
int j = i + 1;
j <
e; ++
j) {
2319void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2324 for (
auto &NS : NodeSets) {
2325 if (NS.getRecMII() > 2)
2327 if (NS.getMaxDepth() > MII)
2336void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2348 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2355 if (
succ_L(NodesAdded,
N, DDG.get())) {
2359 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2371 if (
succ_L(NodesAdded,
N, DDG.get()))
2373 addConnectedNodes(
I, NewSet, NodesAdded);
2374 if (!NewSet.
empty())
2375 NodeSets.push_back(NewSet);
2380 if (
pred_L(NodesAdded,
N, DDG.get()))
2382 addConnectedNodes(
I, NewSet, NodesAdded);
2383 if (!NewSet.
empty())
2384 NodeSets.push_back(NewSet);
2388 for (
SUnit &SU : SUnits) {
2389 if (NodesAdded.
count(&SU) == 0) {
2391 addConnectedNodes(&SU, NewSet, NodesAdded);
2392 if (!NewSet.
empty())
2393 NodeSets.push_back(NewSet);
2399void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
2403 for (
auto &OE : DDG->getOutEdges(SU)) {
2405 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2407 addConnectedNodes(
Successor, NewSet, NodesAdded);
2409 for (
auto &IE : DDG->getInEdges(SU)) {
2410 SUnit *Predecessor =
IE.getSrc();
2411 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2412 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2421 for (
SUnit *SU : Set1) {
2422 if (Set2.
count(SU) != 0)
2425 return !Result.empty();
2429void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2430 for (NodeSetType::iterator
I = NodeSets.begin(), E = NodeSets.end();
I != E;
2433 for (NodeSetType::iterator J =
I + 1; J != E;) {
2438 for (
SUnit *SU : *J)
2450void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2451 for (NodeSetType::iterator
I = NodeSets.begin(), E = NodeSets.end();
I != E;
2453 for (NodeSetType::iterator J =
I + 1; J != E;) {
2454 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
2469void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2473 for (
auto &Nodes : NodeSets) {
2491 }
else if (NodeSets.size() == 1) {
2492 for (
const auto &
N : Nodes)
2493 if (
N->Succs.size() == 0)
2499 SUnit *maxASAP =
nullptr;
2500 for (
SUnit *SU : Nodes) {
2501 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2502 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2510 while (!
R.empty()) {
2511 if (Order == TopDown) {
2515 while (!
R.empty()) {
2516 SUnit *maxHeight =
nullptr;
2518 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2520 else if (getHeight(
I) == getHeight(maxHeight) &&
2521 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2523 else if (getHeight(
I) == getHeight(maxHeight) &&
2524 getZeroLatencyHeight(
I) ==
2525 getZeroLatencyHeight(maxHeight) &&
2526 getMOV(
I) < getMOV(maxHeight))
2531 R.remove(maxHeight);
2532 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2533 SUnit *SU = OE.getDst();
2534 if (Nodes.count(SU) == 0)
2538 if (OE.ignoreDependence(
false))
2547 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2549 if (!
IE.isAntiDep())
2551 if (Nodes.count(SU) == 0)
2567 while (!
R.empty()) {
2568 SUnit *maxDepth =
nullptr;
2570 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2572 else if (getDepth(
I) == getDepth(maxDepth) &&
2573 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2575 else if (getDepth(
I) == getDepth(maxDepth) &&
2576 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2577 getMOV(
I) < getMOV(maxDepth))
2583 if (Nodes.isExceedSU(maxDepth)) {
2586 R.insert(Nodes.getNode(0));
2589 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2591 if (Nodes.count(SU) == 0)
2602 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2603 SUnit *SU = OE.getDst();
2604 if (!OE.isAntiDep())
2606 if (Nodes.count(SU) == 0)
2624 dbgs() <<
"Node order: ";
2626 dbgs() <<
" " <<
I->NodeNum <<
" ";
2633bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2640 bool scheduleFound =
false;
2641 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2644 std::make_unique<HighRegisterPressureDetector>(
Loop.
getHeader(), MF);
2645 HRPDetector->init(RegClassInfo);
2648 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2660 int EarlyStart = INT_MIN;
2661 int LateStart = INT_MAX;
2670 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2672 if (EarlyStart > LateStart)
2673 scheduleFound =
false;
2674 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2676 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2677 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2679 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2680 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2681 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2690 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2692 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2695 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2696 FirstCycle + getASAP(SU) +
II - 1,
II);
2704 scheduleFound =
false;
2708 dbgs() <<
"\tCan't schedule\n";
2710 }
while (++NI != NE && scheduleFound);
2715 scheduleFound = DDG->isValidSchedule(Schedule);
2737 if (scheduleFound) {
2738 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2743 if (scheduleFound) {
2745 Pass.ORE->emit([&]() {
2748 <<
"Schedule found with Initiation Interval: "
2750 <<
", MaxStageCount: "
2764 if (!Reg.isVirtual())
2766 if (
MRI.getVRegDef(Reg)->getParent() !=
MI.getParent())
2778 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2806 if (Def->getParent() != LoopBB)
2809 if (Def->isCopy()) {
2811 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2813 CurReg = Def->getOperand(1).getReg();
2814 }
else if (Def->isPHI()) {
2828 bool OffsetIsScalable;
2829 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2832 CurReg = BaseOp->
getReg();
2844 if (CurReg == OrgReg)
2848 if (!Phi || !Increment)
2856bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
2860 bool OffsetIsScalable;
2861 if (!
TII->getMemOperandWithOffset(
MI, BaseOp,
Offset, OffsetIsScalable,
TRI))
2865 if (OffsetIsScalable)
2868 if (!BaseOp->
isReg())
2883 unsigned &OffsetPos,
2889 unsigned BasePosLd, OffsetPosLd;
2897 if (!Phi || !
Phi->isPHI())
2906 if (!PrevDef || PrevDef ==
MI)
2912 unsigned BasePos1 = 0, OffsetPos1 = 0;
2918 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2923 MF.deleteMachineInstr(NewMI);
2928 BasePos = BasePosLd;
2929 OffsetPos = OffsetPosLd;
2941 InstrChanges.
find(SU);
2942 if (It != InstrChanges.
end()) {
2943 std::pair<Register, int64_t> RegAndOffset = It->second;
2944 unsigned BasePos, OffsetPos;
2947 Register BaseReg =
MI->getOperand(BasePos).getReg();
2953 if (BaseStageNum < DefStageNum) {
2955 int OffsetDiff = DefStageNum - BaseStageNum;
2956 if (DefCycleNum < BaseCycleNum) {
2962 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2965 MISUnitMap[NewMI] = SU;
2977 while (Def->isPHI()) {
2978 if (!Visited.
insert(Def).second)
2980 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2981 if (Def->getOperand(i + 1).getMBB() == BB) {
2982 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
2993 int DeltaB, DeltaO, Delta;
3000 int64_t OffsetB, OffsetO;
3001 bool OffsetBIsScalable, OffsetOIsScalable;
3003 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3004 OffsetBIsScalable,
TRI) ||
3005 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3006 OffsetOIsScalable,
TRI))
3009 if (OffsetBIsScalable || OffsetOIsScalable)
3019 if (!RegB.
isVirtual() || !RegO.isVirtual())
3024 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3049 dbgs() <<
"Overlap check:\n";
3050 dbgs() <<
" BaseMI: ";
3052 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3053 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3054 dbgs() <<
" OtherMI: ";
3056 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3057 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3065 int64_t BaseMinAddr = OffsetB;
3066 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3067 if (BaseMinAddr > OhterNextIterMaxAddr) {
3072 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3073 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3074 if (BaseMaxAddr < OtherNextIterMinAddr) {
3088 if ((!
Edge.isOrderDep() && !
Edge.isOutputDep()) ||
Edge.isArtificial() ||
3089 Edge.getDst()->isBoundaryNode())
3095 if (
Edge.isOutputDep())
3100 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3114 return mayOverlapInLaterIter(DI, SI);
3117void SwingSchedulerDAG::postProcessDAG() {
3118 for (
auto &M : Mutations)
3128 bool forward =
true;
3130 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3131 << EndCycle <<
" II: " <<
II <<
"\n";
3133 if (StartCycle > EndCycle)
3137 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3138 for (
int curCycle = StartCycle; curCycle != termCycle;
3139 forward ? ++curCycle : --curCycle) {
3142 ProcItinResources.canReserveResources(*SU, curCycle)) {
3144 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3149 ProcItinResources.reserveResources(*SU, curCycle);
3150 ScheduledInstrs[curCycle].push_back(SU);
3151 InstrToCycle.insert(std::make_pair(SU, curCycle));
3152 if (curCycle > LastCycle)
3153 LastCycle = curCycle;
3154 if (curCycle < FirstCycle)
3155 FirstCycle = curCycle;
3159 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3172 int EarlyCycle = INT_MAX;
3173 while (!Worklist.
empty()) {
3176 if (Visited.
count(PrevSU))
3178 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3179 if (it == InstrToCycle.end())
3181 EarlyCycle = std::min(EarlyCycle, it->second);
3182 for (
const auto &IE : DDG->
getInEdges(PrevSU))
3183 if (IE.isOrderDep() || IE.isOutputDep())
3196 int LateCycle = INT_MIN;
3197 while (!Worklist.
empty()) {
3202 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3203 if (it == InstrToCycle.end())
3205 LateCycle = std::max(LateCycle, it->second);
3207 if (OE.isOrderDep() || OE.isOutputDep())
3218 for (
auto &
P : SU->
Preds)
3219 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3220 for (
auto &S :
P.getSUnit()->Succs)
3221 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3222 return P.getSUnit();
3235 for (
int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3236 for (
SUnit *
I : getInstructions(cycle)) {
3238 if (IE.getSrc() ==
I) {
3242 int End = earliestCycleInChain(IE, DDG) + (
II - 1);
3243 *MinLateStart = std::min(*MinLateStart,
End);
3245 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3246 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3251 if (OE.getDst() ==
I) {
3255 int Start = latestCycleInChain(OE, DDG) + 1 -
II;
3256 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3258 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3259 *MinLateStart = std::min(*MinLateStart, LateStart);
3264 for (
const auto &Dep : SU->
Preds) {
3267 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3269 *MinLateStart = std::min(*MinLateStart, cycle);
3279 std::deque<SUnit *> &Insts)
const {
3281 bool OrderBeforeUse =
false;
3282 bool OrderAfterDef =
false;
3283 bool OrderBeforeDef =
false;
3284 unsigned MoveDef = 0;
3285 unsigned MoveUse = 0;
3286 int StageInst1 = stageScheduled(SU);
3290 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3293 if (!MO.isReg() || !MO.getReg().isVirtual())
3297 unsigned BasePos, OffsetPos;
3298 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3299 if (
MI->getOperand(BasePos).getReg() == Reg)
3303 std::tie(Reads,
Writes) =
3304 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3305 if (MO.isDef() && Reads && stageScheduled(*
I) <= StageInst1) {
3306 OrderBeforeUse =
true;
3309 }
else if (MO.isDef() && Reads && stageScheduled(*
I) > StageInst1) {
3311 OrderAfterDef =
true;
3313 }
else if (MO.isUse() &&
Writes && stageScheduled(*
I) == StageInst1) {
3314 if (cycleScheduled(*
I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3315 OrderBeforeUse =
true;
3319 OrderAfterDef =
true;
3322 }
else if (MO.isUse() &&
Writes && stageScheduled(*
I) > StageInst1) {
3323 OrderBeforeUse =
true;
3327 OrderAfterDef =
true;
3330 }
else if (MO.isUse() &&
Writes && stageScheduled(*
I) < StageInst1) {
3332 OrderBeforeUse =
true;
3335 }
else if (MO.isUse() && stageScheduled(*
I) == StageInst1 &&
3336 isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3338 OrderBeforeDef =
true;
3346 if (OE.getDst() != *
I)
3348 if (OE.isOrderDep() && stageScheduled(*
I) == StageInst1) {
3349 OrderBeforeUse =
true;
3356 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3357 stageScheduled(*
I) == StageInst1) {
3358 OrderBeforeUse =
true;
3359 if ((MoveUse == 0) || (Pos < MoveUse))
3364 if (IE.getSrc() != *
I)
3366 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3367 stageScheduled(*
I) == StageInst1) {
3368 OrderAfterDef =
true;
3375 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3376 OrderBeforeUse =
false;
3381 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3385 if (OrderBeforeUse && OrderAfterDef) {
3386 SUnit *UseSU = Insts.at(MoveUse);
3387 SUnit *DefSU = Insts.at(MoveDef);
3388 if (MoveUse > MoveDef) {
3389 Insts.erase(Insts.begin() + MoveUse);
3390 Insts.erase(Insts.begin() + MoveDef);
3392 Insts.erase(Insts.begin() + MoveDef);
3393 Insts.erase(Insts.begin() + MoveUse);
3395 orderDependence(SSD, UseSU, Insts);
3396 orderDependence(SSD, SU, Insts);
3397 orderDependence(SSD, DefSU, Insts);
3403 Insts.push_front(SU);
3405 Insts.push_back(SU);
3413 assert(Phi.isPHI() &&
"Expecting a Phi.");
3415 unsigned DefCycle = cycleScheduled(DefSU);
3416 int DefStage = stageScheduled(DefSU);
3420 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3426 unsigned LoopCycle = cycleScheduled(UseSU);
3427 int LoopStage = stageScheduled(UseSU);
3428 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3446 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3448 if (!isLoopCarried(SSD, *Phi))
3452 if (DMO.getReg() == LoopReg)
3463 if (InstrToCycle.count(IE.getSrc()))
3474 for (
auto &SU : SSD->
SUnits)
3479 while (!Worklist.
empty()) {
3481 if (DoNotPipeline.
count(SU))
3484 DoNotPipeline.
insert(SU);
3491 if (OE.getDistance() == 1)
3494 return DoNotPipeline;
3503 int NewLastCycle = INT_MIN;
3507 if (!DNP.
contains(&SU) || stageScheduled(&SU) == 0) {
3508 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3513 int NewCycle = getFirstCycle();
3515 if (IE.getDistance() == 0)
3516 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3521 if (OE.getDistance() == 1)
3522 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3524 int OldCycle = InstrToCycle[&SU];
3525 if (OldCycle != NewCycle) {
3526 InstrToCycle[&SU] = NewCycle;
3527 auto &OldS = getInstructions(OldCycle);
3529 getInstructions(NewCycle).emplace_back(&SU);
3531 <<
") is not pipelined; moving from cycle " << OldCycle
3532 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3557 if (FirstCycle + InitiationInterval <= NewCycle)
3560 NewLastCycle = std::max(NewLastCycle, NewCycle);
3562 LastCycle = NewLastCycle;
3578 int StageDef = stageScheduled(&SU);
3579 int CycleDef = InstrToCycle[&SU];
3580 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3582 SUnit *Dst = OE.getDst();
3583 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3584 if (OE.getReg().isPhysical()) {
3585 if (stageScheduled(Dst) != StageDef)
3587 if (InstrToCycle[Dst] <= CycleDef)
3605void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3608 typedef std::pair<SUnit *, unsigned> UnitIndex;
3609 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3611 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3612 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3614 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3615 return std::get<0>(i1) < std::get<0>(i2);
3628 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3632 bool PredBefore =
false;
3633 bool SuccBefore =
false;
3640 for (
const auto &IE : DDG->getInEdges(SU)) {
3641 SUnit *PredSU = IE.getSrc();
3642 unsigned PredIndex = std::get<1>(
3651 for (
const auto &OE : DDG->getOutEdges(SU)) {
3652 SUnit *SuccSU = OE.getDst();
3658 unsigned SuccIndex = std::get<1>(
3671 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3676 NumNodeOrderIssues++;
3680 <<
" are scheduled before node " << SU->
NodeNum
3687 dbgs() <<
"Invalid node order found!\n";
3700 for (
SUnit *SU : Instrs) {
3702 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3710 InstrChanges.
find(SU);
3711 if (It != InstrChanges.
end()) {
3712 unsigned BasePos, OffsetPos;
3718 MI->getOperand(OffsetPos).getImm() - It->second.second;
3721 MISUnitMap[NewMI] = SU;
3731 unsigned TiedUseIdx = 0;
3732 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3734 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3736 NewBaseReg =
MI->getOperand(i).getReg();
3745 const std::deque<SUnit *> &Instrs)
const {
3746 std::deque<SUnit *> NewOrderPhi;
3747 for (
SUnit *SU : Instrs) {
3749 NewOrderPhi.push_back(SU);
3751 std::deque<SUnit *> NewOrderI;
3752 for (
SUnit *SU : Instrs) {
3754 orderDependence(SSD, SU, NewOrderI);
3765 for (
int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3766 for (
int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3768 std::deque<SUnit *> &cycleInstrs =
3769 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3771 ScheduledInstrs[cycle].push_front(SU);
3777 for (
int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3778 ScheduledInstrs.erase(cycle);
3787 for (
int Cycle = getFirstCycle(), E = getFinalCycle();
Cycle <= E; ++
Cycle) {
3788 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3789 cycleInstrs = reorderInstructions(SSD, cycleInstrs);
3797 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3798 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3799 for (
const auto &
I : Nodes)
3800 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3804#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3808 for (
int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3811 for (
SUnit *CI : cycleInstrs->second) {
3812 os <<
"cycle " << cycle <<
" (" << stageScheduled(CI) <<
") ";
3813 os <<
"(" << CI->
NodeNum <<
") ";
3824void ResourceManager::dumpMRT()
const {
3828 std::stringstream SS;
3830 SS << std::setw(4) <<
"Slot";
3831 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3832 SS << std::setw(3) <<
I;
3833 SS << std::setw(7) <<
"#Mops"
3835 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3836 SS << std::setw(4) << Slot;
3837 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3838 SS << std::setw(3) << MRT[Slot][
I];
3839 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3848 unsigned ProcResourceID = 0;
3853 "Too many kinds of resources, unsupported");
3859 if (
Desc.SubUnitsIdxBegin)
3861 Masks[
I] = 1ULL << ProcResourceID;
3867 if (!
Desc.SubUnitsIdxBegin)
3869 Masks[
I] = 1ULL << ProcResourceID;
3870 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3871 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3876 dbgs() <<
"ProcResourceDesc:\n";
3879 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3880 ProcResource->
Name,
I, Masks[
I],
3883 dbgs() <<
" -----------------\n";
3891 dbgs() <<
"canReserveResources:\n";
3894 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3900 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3906 reserveResources(SCDesc,
Cycle);
3907 bool Result = !isOverbooked();
3908 unreserveResources(SCDesc,
Cycle);
3917 dbgs() <<
"reserveResources:\n";
3920 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3926 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3932 reserveResources(SCDesc,
Cycle);
3937 dbgs() <<
"reserveResources: done!\n\n";
3948 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3951 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3960 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3963 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3966bool ResourceManager::isOverbooked()
const {
3968 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3969 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3971 if (MRT[Slot][
I] >
Desc->NumUnits)
3974 if (NumScheduledMops[Slot] > IssueWidth)
3980int ResourceManager::calculateResMIIDFA()
const {
3985 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3987 FUS.calcCriticalResources(*SU.
getInstr());
3998 while (!FuncUnitOrder.empty()) {
4000 FuncUnitOrder.pop();
4001 if (
TII->isZeroCost(
MI->getOpcode()))
4007 unsigned ReservedCycles = 0;
4008 auto *RI = Resources.
begin();
4009 auto *RE = Resources.
end();
4011 dbgs() <<
"Trying to reserve resource for " << NumCycles
4012 <<
" cycles for \n";
4015 for (
unsigned C = 0;
C < NumCycles; ++
C)
4017 if ((*RI)->canReserveResources(*
MI)) {
4018 (*RI)->reserveResources(*
MI);
4025 <<
", NumCycles:" << NumCycles <<
"\n");
4027 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4029 <<
"NewResource created to reserve resources"
4032 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4033 NewResource->reserveResources(*
MI);
4034 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4038 int Resmii = Resources.
size();
4045 return calculateResMIIDFA();
4064 <<
" WriteProcRes: ";
4074 SM.getProcResource(PRE.ProcResourceIdx);
4075 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4078 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4083 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4086 dbgs() <<
"#Mops: " << NumMops <<
", "
4087 <<
"IssueWidth: " << IssueWidth <<
", "
4088 <<
"Cycles: " << Result <<
"\n";
4093 std::stringstream SS;
4094 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4095 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4100 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4102 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4105 std::stringstream SS;
4106 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4107 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4108 << std::setw(10) << Cycles <<
"\n";
4112 if (Cycles > Result)
4119 InitiationInterval =
II;
4120 DFAResources.clear();
4121 DFAResources.resize(
II);
4122 for (
auto &
I : DFAResources)
4123 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4126 NumScheduledMops.clear();
4127 NumScheduledMops.resize(
II);
4131 if (Pred.isArtificial() || Dst->isBoundaryNode())
4136 return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0);
4139SwingSchedulerDDG::SwingSchedulerDDGEdges &
4140SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4142 return EntrySUEdges;
4148const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4149SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4151 return EntrySUEdges;
4157void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4160 "Validation-only edges are not expected here.");
4161 auto &Edges = getEdges(SU);
4162 if (
Edge.getSrc() == SU)
4163 Edges.Succs.push_back(Edge);
4165 Edges.Preds.push_back(Edge);
4168void SwingSchedulerDDG::initEdges(
SUnit *SU) {
4169 for (
const auto &PI : SU->
Preds) {
4175 for (
const auto &SI : SU->
Succs) {
4184 : EntrySU(EntrySU), ExitSU(ExitSU) {
4185 EdgesVec.resize(SUnits.size());
4190 for (
auto &SU : SUnits)
4194 for (
SUnit &SU : SUnits) {
4199 for (
SUnit *Dst : *OD) {
4202 Edge.setDistance(1);
4203 ValidationOnlyEdges.push_back(
Edge);
4211 return getEdges(SU).Preds;
4216 return getEdges(SU).Succs;
4223 auto ExpandCycle = [&](
SUnit *SU) {
4232 if (!Src->isInstr() || !Dst->isInstr())
4234 int CycleSrc = ExpandCycle(Src);
4235 int CycleDst = ExpandCycle(Dst);
4236 int MaxLateStart = CycleDst +
Edge.getDistance() *
II -
Edge.getLatency();
4237 if (CycleSrc > MaxLateStart) {
4239 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4240 << Dst->NodeNum <<
"\n";
4250 for (
SUnit &SU : SUnits) {
4279 !
TII->isGlobalMemoryObject(FromMI) &&
4297 const auto DumpSU = [](
const SUnit *SU) {
4298 std::ostringstream OSS;
4299 OSS <<
"SU(" << SU->
NodeNum <<
")";
4303 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4305 for (
SUnit *Dst : *Order)
4306 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
SmallVector< uint32_t, 0 > Writes
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD, bool PerformCheapCheck)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
static unsigned getSize(unsigned Kind)
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A private abstract base class describing the concept of an individual alias analysis implementation.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM_ABI bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
A possibly irreducible generalization of a Loop.
bool getIncrementValue(const MachineInstr &MI, int &Value) const override
If the instruction is an increment of a constant value, return the amount.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
LLVM_ABI StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool isMVEExpanderSupported()
Return true if the target can expand pipelined schedule with modulo variable expansion.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
unsigned getPosition() const
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Description of the encoding of one expression Op.
These values represent a non-pipelined step in the execution of an instruction.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineInstr * LoopInductionVar
SmallVector< MachineOperand, 4 > BrCond
MachineInstr * LoopCompare
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Store the effects of a change in pressure on things that MI scheduler cares about.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.