51#include "llvm/Config/llvm-config.h"
75#define DEBUG_TYPE "machine-scheduler"
77STATISTIC(NumClustered,
"Number of load/store pairs clustered");
83 cl::desc(
"Pre reg-alloc list scheduling direction"),
87 "Force top-down pre reg-alloc list scheduling"),
89 "Force bottom-up pre reg-alloc list scheduling"),
91 "Force bidirectional pre reg-alloc list scheduling")));
95 cl::desc(
"Post reg-alloc list scheduling direction"),
99 "Force top-down post reg-alloc list scheduling"),
101 "Force bottom-up post reg-alloc list scheduling"),
103 "Force bidirectional post reg-alloc list scheduling")));
107 cl::desc(
"Print critical path length to stdout"));
111 cl::desc(
"Verify machine instrs before and after machine scheduling"));
116 cl::desc(
"Pop up a window to show MISched dags after they are processed"));
121 cl::desc(
"Dump resource usage at schedule boundary."));
124 cl::desc(
"Show details of invoking getNextResoufceCycle."));
129#ifdef LLVM_ENABLE_DUMP
140 cl::desc(
"Hide nodes with more predecessor/successor than cutoff"));
146 cl::desc(
"Only schedule this function"));
148 cl::desc(
"Only schedule this MBB#"));
163 cl::desc(
"Enable memop clustering."),
167 cl::desc(
"Switch to fast cluster algorithm with the lost "
168 "of some fusion opportunities"),
172 cl::desc(
"The threshold for fast cluster"),
175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
178 cl::desc(
"Dump resource usage at schedule boundary."));
181 cl::desc(
"Set width of the columns with "
182 "the resources and schedule units"),
186 cl::desc(
"Set width of the columns showing resource booking."),
190 cl::desc(
"Sort the resources printed in the dump trace"));
201void MachineSchedStrategy::anchor() {}
203void ScheduleDAGMutation::anchor() {}
218namespace impl_detail {
247 const RequiredAnalyses &Analyses);
271 const RequiredAnalyses &Analyses);
290 MachineSchedulerLegacy();
302 PostMachineSchedulerLegacy();
311char MachineSchedulerLegacy::ID = 0;
316 "Machine Instruction Scheduler",
false,
false)
329void MachineSchedulerLegacy::getAnalysisUsage(
AnalysisUsage &AU)
const {
342char PostMachineSchedulerLegacy::ID = 0;
347 "PostRA Machine Instruction Scheduler",
false,
false)
354PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
359void PostMachineSchedulerLegacy::getAnalysisUsage(
AnalysisUsage &AU)
const {
382 cl::desc(
"Machine instruction scheduler to use"));
390 cl::desc(
"Enable the machine instruction scheduling pass."),
cl::init(
true),
394 "enable-post-misched",
395 cl::desc(
"Enable the post-ra machine instruction scheduling pass."),
402 assert(
I != Beg &&
"reached the top of the region, cannot decrement");
404 if (!
I->isDebugOrPseudoInstr())
423 for(;
I !=
End; ++
I) {
424 if (!
I->isDebugOrPseudoInstr())
465 const char *MSchedBanner =
"Before machine scheduling.";
467 MF->verify(
P, MSchedBanner, &
errs());
469 MF->verify(*MFAM, MSchedBanner, &
errs());
471 RegClassInfo->runOnMachineFunction(*MF);
475 std::unique_ptr<ScheduleDAGInstrs>
Scheduler(createMachineScheduler());
480 const char *MSchedBanner =
"After machine scheduling.";
482 MF->verify(
P, MSchedBanner, &
errs());
484 MF->verify(*MFAM, MSchedBanner, &
errs());
511 const char *PostMSchedBanner =
"Before post machine scheduling.";
513 MF->verify(
P, PostMSchedBanner, &
errs());
515 MF->verify(*MFAM, PostMSchedBanner, &
errs());
520 std::unique_ptr<ScheduleDAGInstrs>
Scheduler(createPostMachineScheduler());
524 const char *PostMSchedBanner =
"After post machine scheduling.";
526 MF->verify(
P, PostMSchedBanner, &
errs());
528 MF->verify(*MFAM, PostMSchedBanner, &
errs());
549bool MachineSchedulerLegacy::runOnMachineFunction(
MachineFunction &MF) {
562 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
563 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
564 auto &TM = getAnalysis<TargetPassConfig>().getTM<
TargetMachine>();
565 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
566 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
567 Impl.setLegacyPass(
this);
568 return Impl.run(MF, TM, {MLI, MDT, AA, LIS});
600 Impl->setMFAM(&MFAM);
601 bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS});
612bool PostMachineSchedulerLegacy::runOnMachineFunction(
MachineFunction &MF) {
624 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
626 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
627 Impl.setLegacyPass(
this);
628 return Impl.run(MF, TM, {MLI, AA});
647 Impl->setMFAM(&MFAM);
648 bool Changed = Impl->run(MF, *TM, {MLI, AA});
685 unsigned NumRegionInstrs;
689 RegionBegin(
B), RegionEnd(
E), NumRegionInstrs(
N) {}
698 bool RegionsTopDown) {
704 RegionEnd !=
MBB->
begin(); RegionEnd =
I) {
707 if (RegionEnd !=
MBB->
end() ||
714 unsigned NumRegionInstrs = 0;
720 if (!
MI.isDebugOrPseudoInstr()) {
729 if (NumRegionInstrs != 0)
730 Regions.
push_back(SchedRegion(
I, RegionEnd, NumRegionInstrs));
734 std::reverse(Regions.
begin(), Regions.
end());
773 for (
const SchedRegion &R : MBBRegions) {
776 unsigned NumRegionInstrs = R.NumRegionInstrs;
783 if (
I == RegionEnd ||
I == std::prev(RegionEnd)) {
794 else dbgs() <<
"End\n";
795 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
819#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
821 dbgs() <<
"Queue " << Name <<
": ";
822 for (
const SUnit *SU : Queue)
823 dbgs() << SU->NodeNum <<
" ";
852 dbgs() <<
"*** Scheduling failed! ***\n";
854 dbgs() <<
" has been released too many times!\n";
889 dbgs() <<
"*** Scheduling failed! ***\n";
891 dbgs() <<
" has been released too many times!\n";
928 unsigned regioninstrs)
938 else if (
SchedImpl->getPolicy().OnlyBottomUp)
966#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
971 ++NumInstrsScheduled;
1003 bool IsTopNode =
false;
1005 LLVM_DEBUG(
dbgs() <<
"** ScheduleDAGMI::schedule picking next node\n");
1024 if (&*priorII ==
MI)
1046 dbgs() <<
"*** Final schedule for "
1063 assert(!SU.isBoundaryNode() &&
"Boundary node should not be in SUnits");
1066 SU.biasCriticalPath();
1069 if (!SU.NumPredsLeft)
1072 if (!SU.NumSuccsLeft)
1088 for (
SUnit *SU : TopRoots)
1127 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
1129 std::pair<MachineInstr *, MachineInstr *>
P = *std::prev(DI);
1140#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1152 dbgs() <<
" * Schedule table (TopDown):\n";
1170 for (
unsigned C = FirstCycle;
C <= LastCycle; ++
C)
1177 dbgs() <<
"Missing SUnit\n";
1180 std::string NodeName(
"SU(");
1181 NodeName += std::to_string(SU->
NodeNum) +
")";
1183 unsigned C = FirstCycle;
1184 for (;
C <= LastCycle; ++
C) {
1201 return LHS.AcquireAtCycle <
RHS.AcquireAtCycle ||
1202 (
LHS.AcquireAtCycle ==
RHS.AcquireAtCycle &&
1203 LHS.ReleaseAtCycle <
RHS.ReleaseAtCycle);
1207 const std::string ResName =
1213 for (
unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle;
I != E;
1216 while (
C++ <= LastCycle)
1233 dbgs() <<
" * Schedule table (BottomUp):\n";
1246 if ((
int)SU->
BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1247 LastCycle = (int)SU->
BotReadyCycle - PI->ReleaseAtCycle + 1;
1252 for (
int C = FirstCycle;
C >= LastCycle; --
C)
1259 dbgs() <<
"Missing SUnit\n";
1262 std::string NodeName(
"SU(");
1263 NodeName += std::to_string(SU->
NodeNum) +
")";
1266 for (;
C >= LastCycle; --
C) {
1282 return LHS.AcquireAtCycle <
RHS.AcquireAtCycle ||
1283 (
LHS.AcquireAtCycle ==
RHS.AcquireAtCycle &&
1284 LHS.ReleaseAtCycle <
RHS.ReleaseAtCycle);
1288 const std::string ResName =
1294 for (
unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle;
I != E;
1297 while (
C-- >= LastCycle)
1306#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1314 dbgs() <<
"* Schedule table (Bidirectional): not implemented\n";
1316 dbgs() <<
"* Schedule table: DumpDirection not set.\n";
1324 dbgs() <<
"Missing SUnit\n";
1349 if (!Reg.isVirtual())
1354 bool FoundDef =
false;
1356 if (MO2.getReg() == Reg && !MO2.isDead()) {
1383 unsigned regioninstrs)
1397 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1448 dbgs() <<
"Bottom Pressure: ";
1454 "Can't find the region bottom");
1471 dbgs() <<
"Excess PSets: ";
1472 for (const PressureChange &RCPS : RegionCriticalPSets)
1473 dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) <<
" ";
1481 const std::vector<unsigned> &NewMaxPressure) {
1487 unsigned ID = PC.getPSet();
1492 && NewMaxPressure[
ID] <= (
unsigned)std::numeric_limits<int16_t>::max())
1496 if (NewMaxPressure[
ID] >= Limit - 2) {
1498 << NewMaxPressure[
ID]
1499 << ((NewMaxPressure[
ID] > Limit) ?
" > " :
" <= ")
1512 if (!Reg.isVirtual())
1520 bool Decrement =
P.LaneMask.any();
1524 SUnit &SU = *V2SU.SU;
1534 <<
" UpdateRegPressure: SU(" << SU.
NodeNum <<
") "
1557 assert(VNI &&
"No live value at use.");
1560 SUnit *SU = V2SU.SU;
1584#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1590 dbgs() <<
" Pressure Diff : ";
1593 dbgs() <<
" Single Issue : ";
1637 bool IsTopNode =
false;
1639 LLVM_DEBUG(
dbgs() <<
"** ScheduleDAGMILive::schedule picking next node\n");
1668 dbgs() <<
"*** Final schedule for "
1740 unsigned MaxCyclicLatency = 0;
1744 if (!Reg.isVirtual())
1756 unsigned LiveOutHeight = DefSU->
getHeight();
1761 SUnit *SU = V2SU.SU;
1773 unsigned CyclicLatency = 0;
1775 CyclicLatency = LiveOutDepth - SU->
getDepth();
1778 if (LiveInHeight > LiveOutHeight) {
1779 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1780 CyclicLatency = LiveInHeight - LiveOutHeight;
1785 << SU->
NodeNum <<
") = " << CyclicLatency <<
"c\n");
1786 if (CyclicLatency > MaxCyclicLatency)
1787 MaxCyclicLatency = CyclicLatency;
1790 LLVM_DEBUG(
dbgs() <<
"Cyclic Critical Path: " << MaxCyclicLatency <<
"c\n");
1791 return MaxCyclicLatency;
1844 if (&*priorII ==
MI)
1896 bool OffsetIsScalable;
1900 : SU(SU), BaseOps(BaseOps),
Offset(
Offset), Width(Width),
1901 OffsetIsScalable(OffsetIsScalable) {}
1905 if (
A->getType() !=
B->getType())
1906 return A->getType() <
B->getType();
1908 return A->getReg() <
B->getReg();
1914 return StackGrowsDown ?
A->getIndex() >
B->getIndex()
1915 :
A->getIndex() <
B->getIndex();
1925 if (std::lexicographical_compare(BaseOps.
begin(), BaseOps.
end(),
1926 RHS.BaseOps.begin(),
RHS.BaseOps.end(),
1929 if (std::lexicographical_compare(
RHS.BaseOps.begin(),
RHS.BaseOps.end(),
1930 BaseOps.
begin(), BaseOps.
end(), Compare))
1941 bool ReorderWhileClustering;
1946 bool ReorderWhileClustering)
1947 :
TII(tii),
TRI(tri), IsLoad(IsLoad),
1948 ReorderWhileClustering(ReorderWhileClustering) {}
1955 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1961class StoreClusterMutation :
public BaseMemOpClusterMutation {
1965 bool ReorderWhileClustering)
1966 : BaseMemOpClusterMutation(tii, tri,
false, ReorderWhileClustering) {}
1969class LoadClusterMutation :
public BaseMemOpClusterMutation {
1972 bool ReorderWhileClustering)
1973 : BaseMemOpClusterMutation(tii, tri,
true, ReorderWhileClustering) {}
1980std::unique_ptr<ScheduleDAGMutation>
1983 bool ReorderWhileClustering) {
1985 TII,
TRI, ReorderWhileClustering)
1989std::unique_ptr<ScheduleDAGMutation>
1992 bool ReorderWhileClustering) {
1994 TII,
TRI, ReorderWhileClustering)
2005void BaseMemOpClusterMutation::clusterNeighboringMemOps(
2015 auto MemOpa = MemOpRecords[
Idx];
2018 unsigned NextIdx =
Idx + 1;
2019 for (; NextIdx <
End; ++NextIdx)
2022 if (!SUnit2ClusterInfo.
count(MemOpRecords[NextIdx].SU->NodeNum) &&
2024 (!DAG->
IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
2025 !DAG->
IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
2030 auto MemOpb = MemOpRecords[NextIdx];
2031 unsigned ClusterLength = 2;
2032 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
2033 MemOpb.Width.getValue().getKnownMinValue();
2034 if (SUnit2ClusterInfo.
count(MemOpa.SU->NodeNum)) {
2035 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
2036 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
2037 MemOpb.Width.getValue().getKnownMinValue();
2040 if (!
TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
2041 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
2042 MemOpb.Offset, MemOpb.OffsetIsScalable,
2043 ClusterLength, CurrentClusterBytes))
2046 SUnit *SUa = MemOpa.SU;
2047 SUnit *SUb = MemOpb.SU;
2087 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
2088 CurrentClusterBytes};
2091 <<
", Curr cluster bytes: " << CurrentClusterBytes
2096void BaseMemOpClusterMutation::collectMemOpRecords(
2098 for (
auto &SU : SUnits) {
2106 bool OffsetIsScalable;
2109 OffsetIsScalable, Width,
TRI)) {
2114 MemOpInfo(&SU, BaseOps,
Offset, OffsetIsScalable, Width));
2117 <<
Offset <<
", OffsetIsScalable: " << OffsetIsScalable
2118 <<
", Width: " << Width <<
"\n");
2121 for (
const auto *
Op : BaseOps)
2127bool BaseMemOpClusterMutation::groupMemOps(
2134 for (
const auto &
MemOp : MemOps) {
2135 unsigned ChainPredID = DAG->
SUnits.size();
2137 for (
const SDep &Pred :
MemOp.SU->Preds) {
2161 collectMemOpRecords(DAG->
SUnits, MemOpRecords);
2163 if (MemOpRecords.
size() < 2)
2170 bool FastCluster = groupMemOps(MemOpRecords, DAG,
Groups);
2172 for (
auto &Group :
Groups) {
2178 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2212std::unique_ptr<ScheduleDAGMutation>
2215 return std::make_unique<CopyConstrain>(
TII,
TRI);
2260 unsigned LocalReg = SrcReg;
2261 unsigned GlobalReg = DstReg;
2263 if (!LocalLI->
isLocal(RegionBeginIdx, RegionEndIdx)) {
2267 if (!LocalLI->
isLocal(RegionBeginIdx, RegionEndIdx))
2278 if (GlobalSegment == GlobalLI->
end())
2285 if (GlobalSegment->contains(LocalLI->
beginIndex()))
2288 if (GlobalSegment == GlobalLI->
end())
2292 if (GlobalSegment != GlobalLI->
begin()) {
2295 GlobalSegment->start)) {
2306 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2307 "Disconnected LRG within the scheduling region.");
2323 for (
const SDep &Succ : LastLocalSU->
Succs) {
2338 for (
const SDep &Pred : GlobalSU->
Preds) {
2341 if (Pred.
getSUnit() == FirstLocalSU)
2349 for (
SUnit *LU : LocalUses) {
2350 LLVM_DEBUG(
dbgs() <<
" Local use SU(" << LU->NodeNum <<
") -> SU("
2351 << GlobalSU->
NodeNum <<
")\n");
2354 for (
SUnit *GU : GlobalUses) {
2355 LLVM_DEBUG(
dbgs() <<
" Global use SU(" << GU->NodeNum <<
") -> SU("
2356 << FirstLocalSU->
NodeNum <<
")\n");
2368 if (FirstPos == DAG->
end())
2396 unsigned Latency,
bool AfterSchedNode) {
2397 int ResCntFactor = (int)(Count - (
Latency * LFactor));
2399 return ResCntFactor >= (int)LFactor;
2401 return ResCntFactor > (int)LFactor;
2414 CheckPending =
false;
2417 MinReadyCycle = std::numeric_limits<unsigned>::max();
2418 ExpectedLatency = 0;
2419 DependentLatency = 0;
2421 MaxExecutedResCount = 0;
2423 IsResourceLimited =
false;
2424 ReservedCycles.clear();
2425 ReservedResourceSegments.clear();
2426 ReservedCyclesIndex.
clear();
2427 ResourceGroupSubUnitMasks.clear();
2428#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2432 MaxObservedStall = 0;
2435 ExecutedResCounts.
resize(1);
2436 assert(!ExecutedResCounts[0] &&
"nonzero count for bad resource");
2452 unsigned PIdx = PI->ProcResourceIdx;
2454 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2456 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2469 ReservedCyclesIndex.
resize(ResourceCount);
2470 ExecutedResCounts.
resize(ResourceCount);
2471 ResourceGroupSubUnitMasks.resize(ResourceCount,
APInt(ResourceCount, 0));
2472 unsigned NumUnits = 0;
2474 for (
unsigned i = 0; i < ResourceCount; ++i) {
2475 ReservedCyclesIndex[i] = NumUnits;
2481 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2501 if (ReadyCycle > CurrCycle)
2502 return ReadyCycle - CurrCycle;
2509 unsigned ReleaseAtCycle,
2510 unsigned AcquireAtCycle) {
2513 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2514 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2516 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2517 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2520 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2526 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2527 return NextUnreserved;
2533std::pair<unsigned, unsigned>
2535 unsigned ReleaseAtCycle,
2536 unsigned AcquireAtCycle) {
2538 LLVM_DEBUG(
dbgs() <<
" Resource booking (@" << CurrCycle <<
"c): \n");
2540 LLVM_DEBUG(
dbgs() <<
" getNextResourceCycle (@" << CurrCycle <<
"c): \n");
2543 unsigned InstanceIdx = 0;
2544 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2546 assert(NumberOfInstances > 0 &&
2547 "Cannot have zero instances of a ProcResource");
2564 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2566 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2570 for (
unsigned I = 0,
End = NumberOfInstances;
I <
End; ++
I) {
2571 unsigned NextUnreserved, NextInstanceIdx;
2572 std::tie(NextUnreserved, NextInstanceIdx) =
2574 if (MinNextUnreserved > NextUnreserved) {
2575 InstanceIdx = NextInstanceIdx;
2576 MinNextUnreserved = NextUnreserved;
2579 return std::make_pair(MinNextUnreserved, InstanceIdx);
2582 for (
unsigned I = StartIndex,
End = StartIndex + NumberOfInstances;
I <
End;
2584 unsigned NextUnreserved =
2588 << NextUnreserved <<
"c\n");
2589 if (MinNextUnreserved > NextUnreserved) {
2591 MinNextUnreserved = NextUnreserved;
2596 <<
"[" << InstanceIdx - StartIndex <<
"]"
2597 <<
" available @" << MinNextUnreserved <<
"c"
2599 return std::make_pair(MinNextUnreserved, InstanceIdx);
2632 << (
isTop() ?
"begin" :
"end") <<
" group\n");
2641 unsigned ResIdx = PE.ProcResourceIdx;
2642 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2643 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2644 unsigned NRCycle, InstanceIdx;
2645 std::tie(NRCycle, InstanceIdx) =
2647 if (NRCycle > CurrCycle) {
2648#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2649 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2653 <<
'[' << InstanceIdx - ReservedCyclesIndex[ResIdx] <<
']'
2654 <<
"=" << NRCycle <<
"c\n");
2665 SUnit *LateSU =
nullptr;
2666 unsigned RemLatency = 0;
2667 for (
SUnit *SU : ReadySUs) {
2669 if (L > RemLatency) {
2676 << LateSU->
NodeNum <<
") " << RemLatency <<
"c\n");
2695 PIdx != PEnd; ++PIdx) {
2697 if (OtherCount > OtherCritCount) {
2698 OtherCritCount = OtherCount;
2699 OtherCritIdx = PIdx;
2708 return OtherCritCount;
2715#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2719 if (ReadyCycle > CurrCycle)
2720 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2723 if (ReadyCycle < MinReadyCycle)
2724 MinReadyCycle = ReadyCycle;
2729 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2732 if (!HazardDetected) {
2747 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2748 "MinReadyCycle uninitialized");
2749 if (MinReadyCycle > NextCycle)
2750 NextCycle = MinReadyCycle;
2754 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2757 if ((NextCycle - CurrCycle) > DependentLatency)
2758 DependentLatency = 0;
2760 DependentLatency -= (NextCycle - CurrCycle);
2764 CurrCycle = NextCycle;
2767 for (; CurrCycle != NextCycle; ++CurrCycle) {
2774 CheckPending =
true;
2784 ExecutedResCounts[PIdx] += Count;
2785 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2786 MaxExecutedResCount = ExecutedResCounts[PIdx];
2800 unsigned ReleaseAtCycle,
2802 unsigned AcquireAtCycle) {
2804 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2806 << ReleaseAtCycle <<
"x" << Factor <<
"u\n");
2816 ZoneCritResIdx = PIdx;
2823 unsigned NextAvailable, InstanceIdx;
2824 std::tie(NextAvailable, InstanceIdx) =
2826 if (NextAvailable > CurrCycle) {
2829 <<
'[' << InstanceIdx - ReservedCyclesIndex[PIdx] <<
']'
2830 <<
" reserved until @" << NextAvailable <<
"\n");
2832 return NextAvailable;
2846 CheckPending =
true;
2854 "Cannot schedule this instruction's MicroOps in the current cycle.");
2859 unsigned NextCycle = CurrCycle;
2862 assert(ReadyCycle <= CurrCycle &&
"Broken PendingQueue");
2865 if (ReadyCycle > NextCycle) {
2866 NextCycle = ReadyCycle;
2867 LLVM_DEBUG(
dbgs() <<
" *** Stall until: " << ReadyCycle <<
"\n");
2876 NextCycle = ReadyCycle;
2879 RetiredMOps += IncMOps;
2886 if (ZoneCritResIdx) {
2888 unsigned ScaledMOps =
2905 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2906 PI->AcquireAtCycle);
2907 if (RCycle > NextCycle)
2918 unsigned PIdx = PI->ProcResourceIdx;
2922 unsigned ReservedUntil, InstanceIdx;
2924 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2926 ReservedResourceSegments[InstanceIdx].add(
2928 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2931 ReservedResourceSegments[InstanceIdx].add(
2933 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2938 unsigned ReservedUntil, InstanceIdx;
2940 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2942 ReservedCycles[InstanceIdx] =
2943 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2945 ReservedCycles[InstanceIdx] = NextCycle;
2952 unsigned &TopLatency =
isTop() ? ExpectedLatency : DependentLatency;
2953 unsigned &BotLatency =
isTop() ? DependentLatency : ExpectedLatency;
2957 << SU->
NodeNum <<
") " << TopLatency <<
"c\n");
2962 << SU->
NodeNum <<
") " << BotLatency <<
"c\n");
2965 if (NextCycle > CurrCycle)
2978 CurrMOps += IncMOps;
2992 LLVM_DEBUG(
dbgs() <<
" *** Max MOps " << CurrMOps <<
" at cycle "
2993 << CurrCycle <<
'\n');
3004 MinReadyCycle = std::numeric_limits<unsigned>::max();
3012 if (ReadyCycle < MinReadyCycle)
3013 MinReadyCycle = ReadyCycle;
3024 CheckPending =
false;
3070#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3080 unsigned StartIdx = 0;
3082 for (
unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
3085 for (
unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
3086 dbgs() << ResName <<
"(" << UnitIdx <<
") = ";
3088 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
3089 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
3093 dbgs() << ReservedCycles[StartIdx + UnitIdx] <<
"\n";
3095 StartIdx += NumUnits;
3104 if (ZoneCritResIdx) {
3109 ResCount = RetiredMOps * ResFactor;
3113 <<
" Retired: " << RetiredMOps;
3115 dbgs() <<
"\n Critical: " << ResCount / LFactor <<
"c, "
3116 << ResCount / ResFactor <<
" "
3118 <<
"\n ExpectedLatency: " << ExpectedLatency <<
"c\n"
3119 << (IsResourceLimited ?
" - Resource" :
" - Latency")
3165 RemLatency = std::max(RemLatency,
3167 RemLatency = std::max(RemLatency,
3174bool GenericSchedulerBase::shouldReduceLatency(
const CandPolicy &Policy,
3176 bool ComputeRemLatency,
3177 unsigned &RemLatency)
const {
3187 if (ComputeRemLatency)
3203 unsigned OtherCritIdx = 0;
3204 unsigned OtherCount =
3207 bool OtherResLimited =
false;
3208 unsigned RemLatency = 0;
3209 bool RemLatencyComputed =
false;
3212 RemLatencyComputed =
true;
3214 OtherCount, RemLatency,
false);
3220 if (!OtherResLimited &&
3221 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3225 <<
" RemainingLatency " << RemLatency <<
" + "
3234 dbgs() <<
" " << CurrZone.Available.getName() <<
" ResourceLimited: "
3235 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) <<
"\n";
3236 }
if (OtherResLimited)
dbgs()
3237 <<
" RemainingLimit: "
3240 <<
" Latency limited both directions.\n");
3245 if (OtherResLimited)
3253 case NoCand:
return "NOCAND ";
3254 case Only1:
return "ONLY1 ";
3255 case PhysReg:
return "PHYS-REG ";
3258 case Stall:
return "STALL ";
3259 case Cluster:
return "CLUSTER ";
3260 case Weak:
return "WEAK ";
3261 case RegMax:
return "REG-MAX ";
3276 unsigned ResIdx = 0;
3312 <<
":" <<
P.getUnitInc() <<
" ";
3335 if (TryVal < CandVal) {
3339 if (TryVal > CandVal) {
3340 if (Cand.
Reason > Reason)
3351 if (TryVal > CandVal) {
3355 if (TryVal < CandVal) {
3356 if (Cand.
Reason > Reason)
3408 "(PreRA)GenericScheduler needs vreg liveness");
3414 DAG->computeDFSResult();
3425 if (!Top.HazardRec) {
3428 if (!Bot.HazardRec) {
3431 TopCand.SU =
nullptr;
3432 BotCand.SU =
nullptr;
3438 unsigned NumRegionInstrs) {
3447 for (
unsigned VT = MVT::i64; VT > (
unsigned)MVT::i1; --VT) {
3484#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3485 dbgs() <<
"GenericScheduler RegionPolicy: "
3507 unsigned IterCount =
3513 unsigned InFlightCount =
3515 unsigned BufferLimit =
3521 dbgs() <<
"IssueCycles="
3524 <<
"c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3534 for (
const SUnit *SU : Bot.Available) {
3545 checkAcyclicLatency();
3572 if (TryPSet == CandPSet) {
3577 int TryRank = TryP.
isValid() ?
TRI->getRegPressureSetScore(MF, TryPSet) :
3578 std::numeric_limits<int>::max();
3580 int CandRank = CandP.
isValid() ?
TRI->getRegPressureSetScore(MF, CandPSet) :
3581 std::numeric_limits<int>::max();
3586 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3604 unsigned ScheduledOper = isTop ? 1 : 0;
3605 unsigned UnscheduledOper = isTop ? 0 : 1;
3608 if (
MI->getOperand(ScheduledOper).getReg().isPhysical())
3613 if (
MI->getOperand(UnscheduledOper).getReg().isPhysical())
3614 return AtBoundary ? -1 : 1;
3617 if (
MI->isMoveImmediate()) {
3623 if (
Op.isReg() && !
Op.getReg().isPhysical()) {
3630 return isTop ? -1 : 1;
3643 if (DAG->isTrackingPressure()) {
3648 DAG->getRegionCriticalPSets(),
3649 DAG->getRegPressure().MaxSetPressure);
3654 &DAG->getPressureDiff(Cand.
SU),
3656 DAG->getRegionCriticalPSets(),
3657 DAG->getRegPressure().MaxSetPressure);
3661 DAG->getPressureDiff(Cand.
SU),
3663 DAG->getRegionCriticalPSets(),
3664 DAG->getRegPressure().MaxSetPressure);
3669 <<
" Try SU(" << Cand.
SU->
NodeNum <<
") "
3718 bool SameBoundary = Zone !=
nullptr;
3739 const SUnit *CandNextClusterSU =
3741 const SUnit *TryCandNextClusterSU =
3744 Cand.
SU == CandNextClusterSU,
3752 TryCand, Cand,
Weak))
3804 for (
SUnit *SU : Q) {
3807 initCandidate(TryCand, SU, Zone.
isTop(), RPTracker, TempTracker);
3810 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3824 if (
SUnit *SU = Bot.pickOnlyChoice()) {
3829 if (
SUnit *SU = Top.pickOnlyChoice()) {
3845 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3846 BotCand.Policy != BotPolicy) {
3848 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3849 assert(BotCand.Reason !=
NoCand &&
"failed to find the first candidate");
3856 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3858 "Last pick result should correspond to re-picking right now");
3865 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3866 TopCand.Policy != TopPolicy) {
3868 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3869 assert(TopCand.Reason !=
NoCand &&
"failed to find the first candidate");
3876 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3878 "Last pick result should correspond to re-picking right now");
3884 assert(BotCand.isValid());
3885 assert(TopCand.isValid());
3888 if (tryCandidate(Cand, TopCand,
nullptr)) {
3893 IsTopNode = Cand.
AtTop;
3901 assert(Top.Available.empty() && Top.Pending.empty() &&
3902 Bot.Available.empty() && Bot.Pending.empty() &&
"ReadyQ garbage");
3908 SU = Top.pickOnlyChoice();
3911 TopCand.reset(NoPolicy);
3912 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3913 assert(TopCand.Reason !=
NoCand &&
"failed to find a candidate");
3919 SU = Bot.pickOnlyChoice();
3922 BotCand.reset(NoPolicy);
3923 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3924 assert(BotCand.Reason !=
NoCand &&
"failed to find a candidate");
3930 SU = pickNodeBidirectional(IsTopNode);
3950 Top.removeReady(SU);
3952 Bot.removeReady(SU);
3967 for (
SDep &Dep : Deps) {
3971 SUnit *DepSU = Dep.getSUnit();
3972 if (isTop ? DepSU->
Succs.size() > 1 : DepSU->
Preds.size() > 1)
3975 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3995 reschedulePhysReg(SU,
true);
4000 reschedulePhysReg(SU,
false);
4019 if (!MacroFusions.empty())
4048 if (!Top.HazardRec) {
4051 if (!Bot.HazardRec) {
4058 unsigned NumRegionInstrs) {
4086 for (
const SUnit *SU : Bot.Available) {
4110 if (
tryLess(Top.getLatencyStallCycles(TryCand.
SU),
4111 Top.getLatencyStallCycles(Cand.
SU), TryCand, Cand,
Stall))
4115 const SUnit *CandNextClusterSU =
4117 const SUnit *TryCandNextClusterSU =
4120 Cand.
SU == CandNextClusterSU, TryCand, Cand,
Cluster))
4153 for (
SUnit *SU : Q) {
4158 if (tryCandidate(Cand, TryCand)) {
4172 if (
SUnit *SU = Bot.pickOnlyChoice()) {
4177 if (
SUnit *SU = Top.pickOnlyChoice()) {
4193 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4194 BotCand.Policy != BotPolicy) {
4196 pickNodeFromQueue(Bot, BotCand);
4197 assert(BotCand.Reason !=
NoCand &&
"failed to find the first candidate");
4204 pickNodeFromQueue(Bot, BotCand);
4206 "Last pick result should correspond to re-picking right now");
4213 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4214 TopCand.Policy != TopPolicy) {
4216 pickNodeFromQueue(Top, TopCand);
4217 assert(TopCand.Reason !=
NoCand &&
"failed to find the first candidate");
4224 pickNodeFromQueue(Top, TopCand);
4226 "Last pick result should correspond to re-picking right now");
4232 assert(BotCand.isValid());
4233 assert(TopCand.isValid());
4236 if (tryCandidate(Cand, TopCand)) {
4241 IsTopNode = Cand.
AtTop;
4249 assert(Top.Available.empty() && Top.Pending.empty() &&
4250 Bot.Available.empty() && Bot.Pending.empty() &&
"ReadyQ garbage");
4256 SU = Bot.pickOnlyChoice();
4261 BotCand.reset(NoPolicy);
4264 setPolicy(BotCand.Policy,
true, Bot,
nullptr);
4265 pickNodeFromQueue(Bot, BotCand);
4266 assert(BotCand.Reason !=
NoCand &&
"failed to find a candidate");
4272 SU = Top.pickOnlyChoice();
4277 TopCand.reset(NoPolicy);
4280 setPolicy(TopCand.Policy,
true, Top,
nullptr);
4281 pickNodeFromQueue(Top, TopCand);
4282 assert(TopCand.Reason !=
NoCand &&
"failed to find a candidate");
4288 SU = pickNodeBidirectional(IsTopNode);
4293 Top.removeReady(SU);
4295 Bot.removeReady(SU);
4321 if (!MacroFusions.empty())
4335 const BitVector *ScheduledTrees =
nullptr;
4338 ILPOrder(
bool MaxILP) : MaximizeILP(MaxILP) {}
4343 bool operator()(
const SUnit *
A,
const SUnit *
B)
const {
4346 if (SchedTreeA != SchedTreeB) {
4348 if (ScheduledTrees->
test(SchedTreeA) != ScheduledTrees->
test(SchedTreeB))
4349 return ScheduledTrees->
test(SchedTreeB);
4370 std::vector<SUnit*> ReadyQ;
4373 ILPScheduler(
bool MaximizeILP) :
Cmp(MaximizeILP) {}
4384 void registerRoots()
override {
4386 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4393 SUnit *pickNode(
bool &IsTopNode)
override {
4394 if (ReadyQ.empty())
return nullptr;
4395 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4396 SUnit *SU = ReadyQ.back();
4400 <<
"SU(" << SU->
NodeNum <<
") "
4407 <<
"Scheduling " << *SU->
getInstr());
4412 void scheduleTree(
unsigned SubtreeID)
override {
4413 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4418 void schedNode(
SUnit *SU,
bool IsTopNode)
override {
4419 assert(!IsTopNode &&
"SchedDFSResult needs bottom-up");
4422 void releaseTopNode(
SUnit *)
override { }
4424 void releaseBottomNode(
SUnit *SU)
override {
4425 ReadyQ.push_back(SU);
4426 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4453template<
bool IsReverse>
4457 return A->NodeNum >
B->NodeNum;
4459 return A->NodeNum <
B->NodeNum;
4479 InstructionShuffler(
bool alternate,
bool topdown)
4480 : IsAlternating(alternate), IsTopDown(topdown) {}
4490 SUnit *pickNode(
bool &IsTopNode)
override {
4494 if (TopQ.empty())
return nullptr;
4501 if (BottomQ.empty())
return nullptr;
4508 IsTopDown = !IsTopDown;
4512 void schedNode(
SUnit *SU,
bool IsTopNode)
override {}
4514 void releaseTopNode(
SUnit *SU)
override {
4517 void releaseBottomNode(
SUnit *SU)
override {
4529 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4533 "shuffle",
"Shuffle machine instructions alternating directions",
4552 return std::string(
G->MF.getName());
4572 return "color=cyan,style=dashed";
4574 return "color=blue,style=dashed";
4591 return G->getGraphNodeLabel(SU);
4595 std::string Str(
"shape=Mrecord");
4600 Str +=
",style=filled,fillcolor=\"#";
4617 errs() <<
"ScheduleDAGMI::viewGraph is only available in debug builds on "
4618 <<
"systems with Graphviz or gv!\n";
4624 viewGraph(getDAGName(),
"Scheduling-Units Graph for " + getDAGName());
4633 return A.first <
B.first;
4636unsigned ResourceSegments::getFirstAvailableAt(
4637 unsigned CurrCycle,
unsigned AcquireAtCycle,
unsigned ReleaseAtCycle,
4639 IntervalBuilder)
const {
4640 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4642 "Cannot execute on an un-sorted set of intervals.");
4646 if (AcquireAtCycle == ReleaseAtCycle)
4649 unsigned RetCycle = CurrCycle;
4651 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4652 for (
auto &
Interval : _Intervals) {
4659 "Invalid intervals configuration.");
4661 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4667 const unsigned CutOff) {
4668 assert(
A.first <=
A.second &&
"Cannot add negative resource usage");
4669 assert(CutOff > 0 &&
"0-size interval history has no use.");
4675 if (
A.first ==
A.second)
4682 "A resource is being overwritten");
4683 _Intervals.push_back(
A);
4689 while (_Intervals.size() > CutOff)
4690 _Intervals.pop_front();
4695 assert(
A.first <=
A.second &&
"Invalid interval");
4696 assert(
B.first <=
B.second &&
"Invalid interval");
4699 if ((
A.first ==
B.first) || (
A.second ==
B.second))
4704 if ((
A.first >
B.first) && (
A.second <
B.second))
4709 if ((
A.first >
B.first) && (
A.first <
B.second) && (
A.second >
B.second))
4714 if ((
A.first <
B.first) && (
B.first <
A.second) && (
B.second >
B.first))
4720void ResourceSegments::sortAndMerge() {
4721 if (_Intervals.size() <= 1)
4728 auto next = std::next(std::begin(_Intervals));
4729 auto E = std::end(_Intervals);
4730 for (; next !=
E; ++next) {
4731 if (std::prev(next)->second >= next->first) {
4732 next->first = std::prev(next)->first;
4733 _Intervals.erase(std::prev(next));
MachineInstrBuilder MachineInstrBuilder & DefMI
static const Function * getParent(const Value *V)
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Class recording the (high level) value of a variable.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
reverse_iterator rend() const
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
reverse_iterator rbegin() const
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void clear()
clear - Removes all bits from the bitvector.
Represents analyses that only rely on functions' control flow.
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
MachineSchedulerPass(const TargetMachine *TM)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PostMachineSchedulerPass(const TargetMachine *TM)
~PostMachineSchedulerPass()
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
List of PressureChanges in order of increasing, unique PSetID.
void dump(const TargetRegisterInfo &TRI) const
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
void clear()
clear - Erase all elements from the queue.
Helpers for implementing custom MachineSchedStrategy classes.
ArrayRef< SUnit * > elements()
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
StringRef getName() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
@ Weak
Arbitrary weak DAG edge.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
unsigned getReg() const
Returns the register associated with this edge.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
bool isArtificialDep() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Scheduling unit. This is a node in the scheduling DAG.
bool isCall
Is a function call.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
unsigned short Latency
Node latency.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
bool hasPhysRegDefs
Has physreg defs that are being used.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
SmallVector< SDep, 4 > Succs
All sunit successors.
bool hasReservedResource
Uses a reserved resource.
bool isBottomReady() const
bool hasPhysRegUses
Has physreg uses.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
void dumpScheduledState() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
void clear()
Clear the results.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
MachineInstr * FirstDbgValue
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool ShouldTrackLaneMasks
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
PressureDiffs SUPressureDiffs
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void collectVRegUses(SUnit &SU)
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
~ScheduleDAGMILive() override
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
const SUnit * NextClusterSucc
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
const SUnit * getNextClusterPred() const
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
std::reverse_iterator< const_iterator > const_reverse_iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
iterator_base< SparseMultiSet * > iterator
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
bool enableIntervals() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
int getNumOccurrences() const
Base class for the machine scheduler classes.
void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags)
Main driver for both MachineScheduler and PostMachineScheduler.
Impl class for MachineScheduler.
void setMFAM(MachineFunctionAnalysisManager *MFAM)
void setLegacyPass(MachineFunctionPass *P)
bool run(MachineFunction &MF, const TargetMachine &TM, const RequiredAnalyses &Analyses)
ScheduleDAGInstrs * createMachineScheduler()
Instantiate a ScheduleDAGInstrs that will be owned by the caller.
Impl class for PostMachineScheduler.
bool run(MachineFunction &Func, const TargetMachine &TM, const RequiredAnalyses &Analyses)
void setMFAM(MachineFunctionAnalysisManager *MFAM)
PostMachineSchedulerImpl()
ScheduleDAGInstrs * createPostMachineScheduler()
Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by the caller.
void setLegacyPass(MachineFunctionPass *P)
A raw_ostream that writes to an std::string.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
unsigned getWeakLeft(const SUnit *SU, bool isTop)
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
void initializePostMachineSchedulerLegacyPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
cl::opt< bool > VerifyScheduling
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< bool > ViewMISchedDAGs
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
void sort(IteratorTy Start, IteratorTy End)
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void initializeMachineSchedulerLegacyPass(PassRegistry &)
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
cl::opt< MISched::Direction > PreRADirection
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
DOTGraphTraits(bool isSimple=false)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static bool renderGraphFromBottomUp()
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
static constexpr LaneBitmask getNone()
const unsigned * SubUnitsIdxBegin
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
virtual ~MachineSchedContext()
bool DisableLatencyHeuristic
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
PressureChange CriticalMax
PressureChange CurrentMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< VRegMaskOrUnit, 8 > LiveOutRegs
SmallVector< VRegMaskOrUnit, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Summarize the unscheduled region.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
bool IsAcyclicLatencyLimited
An individual mapping from virtual register number to SUnit.
MachineDominatorTree & MDT