36#define DEBUG_TYPE "machine-scheduler"
41 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
42 cl::desc(
"Disable unclustered high register pressure "
43 "reduction scheduling stage."),
47 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
48 cl::desc(
"Disable clustered low occupancy "
49 "rescheduling for ILP scheduling stage."),
55 "Sets the bias which adds weight to occupancy vs latency. Set it to "
56 "100 to chase the occupancy only."),
61 cl::desc(
"Relax occupancy targets for kernels which are memory "
62 "bound (amdgpu-membound-threshold), or "
63 "Wave Limited (amdgpu-limit-wave-threshold)."),
68 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
86 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
88 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
110 "VGPRCriticalLimit calculation method.\n");
114 unsigned Addressable =
117 VGPRBudget = std::max(VGPRBudget, Granule);
156 if (!
Op.isReg() ||
Op.isImplicit())
158 if (
Op.getReg().isPhysical() ||
159 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
167 std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
182 Pressure.resize(4, 0);
193 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.
getSGPRNum();
194 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
196 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.
getAGPRNum();
203 unsigned SGPRPressure,
204 unsigned VGPRPressure,
bool IsBottomUp) {
208 if (!
DAG->isTrackingPressure())
231 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
232 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
234 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
240 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
243#ifdef EXPENSIVE_CHECKS
244 std::vector<unsigned> CheckPressure, CheckMaxPressure;
247 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
248 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
249 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
250 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
251 errs() <<
"Register Pressure is inaccurate when calculated through "
253 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
255 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
256 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
258 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
264 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
265 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
275 const unsigned MaxVGPRPressureInc = 16;
276 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
277 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
308 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
310 if (SGPRDelta > VGPRDelta) {
331 unsigned SGPRPressure = 0;
332 unsigned VGPRPressure = 0;
333 if (
DAG->isTrackingPressure()) {
335 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
336 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
341 SGPRPressure =
T->getPressure().getSGPRNum();
342 VGPRPressure =
T->getPressure().getArchVGPRNum();
346 for (
SUnit *SU : Q) {
350 VGPRPressure, IsBottomUp);
369 if (
SUnit *SU =
Bot.pickOnlyChoice()) {
373 if (
SUnit *SU =
Top.pickOnlyChoice()) {
403 "Last pick result should correspond to re-picking right now");
425 "Last pick result should correspond to re-picking right now");
441 IsTopNode = Cand.AtTop;
448 if (
DAG->top() ==
DAG->bottom()) {
450 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
456 SU =
Top.pickOnlyChoice();
467 SU =
Bot.pickOnlyChoice();
552 if (
DAG->isTrackingPressure() &&
562 bool SameBoundary = Zone !=
nullptr;
597 bool CandIsClusterSucc =
599 bool TryCandIsClusterSucc =
601 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
606 if (
DAG->isTrackingPressure() &&
612 if (
DAG->isTrackingPressure() &&
658 if (
DAG->isTrackingPressure()) {
674 bool CandIsClusterSucc =
676 bool TryCandIsClusterSucc =
678 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
687 bool SameBoundary = Zone !=
nullptr;
704 if (TryMayLoad || CandMayLoad) {
705 bool TryLongLatency =
707 bool CandLongLatency =
711 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
729 if (
DAG->isTrackingPressure() &&
748 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
766 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
767 RegionLiveOuts(this,
true) {
773 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
775 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
776 if (MinOccupancy != StartingOccupancy)
777 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
782std::unique_ptr<GCNSchedStage>
784 switch (SchedStageID) {
786 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
788 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
790 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
792 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
794 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
796 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
810GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
813 &LiveIns[RegionIdx]);
819 auto REnd = RegionEnd == RegionBegin->getParent()->end()
820 ? std::prev(RegionEnd)
825void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
837 const MachineBasicBlock *OnlySucc =
nullptr;
840 if (!Candidate->empty() && Candidate->pred_size() == 1) {
841 SlotIndexes *Ind =
LIS->getSlotIndexes();
843 OnlySucc = Candidate;
848 size_t CurRegion = RegionIdx;
849 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
850 if (Regions[CurRegion].first->getParent() !=
MBB)
855 auto LiveInIt = MBBLiveIns.find(
MBB);
856 auto &Rgn = Regions[CurRegion];
858 if (LiveInIt != MBBLiveIns.end()) {
859 auto LiveIn = std::move(LiveInIt->second);
861 MBBLiveIns.erase(LiveInIt);
864 auto LRS = BBLiveInMap.lookup(NonDbgMI);
865#ifdef EXPENSIVE_CHECKS
874 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
875 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
879 if (Regions[CurRegion].second ==
I) {
880 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
881 if (CurRegion-- == RegionIdx)
883 auto &Rgn = Regions[CurRegion];
896 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
901GCNScheduleDAGMILive::getRegionLiveInMap()
const {
903 std::vector<MachineInstr *> RegionFirstMIs;
904 RegionFirstMIs.reserve(Regions.size());
906 RegionFirstMIs.push_back(
913GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
915 std::vector<MachineInstr *> RegionLastMIs;
916 RegionLastMIs.reserve(Regions.size());
924 IdxToInstruction.clear();
927 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
928 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
932 : &*DAG->Regions[
I].first;
933 IdxToInstruction[
I] = RegionKey;
941 LiveIns.resize(Regions.size());
942 Pressure.resize(Regions.size());
943 RegionsWithHighRP.resize(Regions.size());
944 RegionsWithExcessRP.resize(Regions.size());
945 RegionsWithIGLPInstrs.resize(Regions.size());
946 RegionsWithHighRP.reset();
947 RegionsWithExcessRP.reset();
948 RegionsWithIGLPInstrs.reset();
953void GCNScheduleDAGMILive::runSchedStages() {
954 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
956 if (!Regions.
empty()) {
957 BBLiveInMap = getRegionLiveInMap();
965 if (!Stage->initGCNSchedStage())
968 for (
auto Region : Regions) {
972 if (!Stage->initGCNRegion()) {
973 Stage->advanceRegion();
982 &LiveIns[Stage->getRegionIdx()];
984 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
985 ->reset(
MRI, *RegionLiveIns);
986 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
987 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
988 Stage->getRegionIdx()));
992 Stage->finalizeGCNRegion();
995 Stage->finalizeGCNSchedStage();
1003 OS <<
"Max Occupancy Initial Schedule";
1006 OS <<
"Unclustered High Register Pressure Reschedule";
1009 OS <<
"Clustered Low Occupancy Reschedule";
1012 OS <<
"Pre-RA Rematerialize";
1015 OS <<
"Max ILP Initial Schedule";
1018 OS <<
"Max memory clause Initial Schedule";
1045 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1052 InitialOccupancy =
DAG.MinOccupancy;
1055 S.SGPRLimitBias =
S.HighRPSGPRBias;
1056 S.VGPRLimitBias =
S.HighRPVGPRBias;
1057 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1058 MFI.increaseOccupancy(
MF, ++
DAG.MinOccupancy);
1062 <<
"Retrying function scheduling without clustering. "
1063 "Aggressivly try to reduce register pressure to achieve occupancy "
1064 <<
DAG.MinOccupancy <<
".\n");
1079 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1083 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1084 <<
DAG.MinOccupancy <<
".\n");
1089#define REMAT_PREFIX "[PreRARemat] "
1090#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1103 const unsigned NumRegions =
DAG.Regions.size();
1104 RegionBB.reserve(NumRegions);
1105 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1108 MIRegion.insert({&*
MI,
I});
1112 if (!canIncreaseOccupancyOrReduceSpill())
1118 DAG.RegionLiveOuts.buildLiveRegMap();
1120 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1121 << AchievedOcc <<
" from rematerializing (original was "
1122 <<
DAG.MinOccupancy;
1124 dbgs() <<
", target was " << *TargetOcc;
1128 if (AchievedOcc >
DAG.MinOccupancy) {
1129 DAG.MinOccupancy = AchievedOcc;
1131 MFI.increaseOccupancy(
MF,
DAG.MinOccupancy);
1143 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1144 if (
DAG.MinOccupancy > InitialOccupancy) {
1146 <<
" stage successfully increased occupancy to "
1147 <<
DAG.MinOccupancy <<
'\n');
1158 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1162 if (
DAG.begin() ==
DAG.end() ||
DAG.begin() == std::prev(
DAG.end()))
1168 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1170 else dbgs() <<
"End";
1171 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1179 for (
auto &
I :
DAG) {
1192 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1194 <<
"Region live-in pressure: "
1198 S.HasHighPressure =
false;
1220 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1222 (
DAG.MinOccupancy <= InitialOccupancy ||
1223 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1262 if (
S.HasHighPressure)
1284 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1295 unsigned TargetOccupancy = std::min(
1296 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1297 unsigned WavesAfter = std::min(
1298 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1299 unsigned WavesBefore = std::min(
1301 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1302 <<
", after " << WavesAfter <<
".\n");
1308 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1312 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1313 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1314 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1315 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1316 NewOccupancy = WavesAfter;
1319 if (NewOccupancy <
DAG.MinOccupancy) {
1320 DAG.MinOccupancy = NewOccupancy;
1321 MFI.limitOccupancy(
DAG.MinOccupancy);
1323 <<
DAG.MinOccupancy <<
".\n");
1327 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1330 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1331 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1353 unsigned ReadyCycle = CurrCycle;
1354 for (
auto &
D : SU.
Preds) {
1355 if (
D.isAssignedRegDep()) {
1358 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1359 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1362 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1369 std::pair<MachineInstr *, unsigned>
B)
const {
1370 return A.second <
B.second;
1376 if (ReadyCycles.empty())
1378 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1379 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1380 <<
" ##################\n# Cycle #\t\t\tInstruction "
1384 for (
auto &
I : ReadyCycles) {
1385 if (
I.second > IPrev + 1)
1386 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1387 <<
" CYCLES DETECTED ******************************\n\n";
1388 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1401 unsigned SumBubbles = 0;
1403 unsigned CurrCycle = 0;
1404 for (
auto &SU : InputSchedule) {
1405 unsigned ReadyCycle =
1407 SumBubbles += ReadyCycle - CurrCycle;
1409 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1411 CurrCycle = ++ReadyCycle;
1434 unsigned SumBubbles = 0;
1436 unsigned CurrCycle = 0;
1437 for (
auto &
MI :
DAG) {
1441 unsigned ReadyCycle =
1443 SumBubbles += ReadyCycle - CurrCycle;
1445 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
1447 CurrCycle = ++ReadyCycle;
1464 if (WavesAfter <
DAG.MinOccupancy)
1468 if (
DAG.MFI.isDynamicVGPREnabled()) {
1470 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1473 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1475 if (BlocksAfter > BlocksBefore)
1512 <<
"\n\t *** In shouldRevertScheduling ***\n"
1513 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
1517 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
1519 unsigned OldMetric = MBefore.
getMetric();
1520 unsigned NewMetric = MAfter.
getMetric();
1521 unsigned WavesBefore = std::min(
1522 S.getTargetOccupancy(),
1529 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
1530 << MAfter <<
"Profit: " << Profit <<
"\n");
1560 unsigned WavesAfter) {
1567 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
1576 DAG.RegionEnd =
DAG.RegionBegin;
1577 int SkippedDebugInstr = 0;
1579 if (
MI->isDebugInstr()) {
1580 ++SkippedDebugInstr;
1584 if (
MI->getIterator() !=
DAG.RegionEnd) {
1586 if (!
MI->isDebugInstr())
1587 DAG.LIS->handleMove(*
MI,
true);
1591 for (
auto &
Op :
MI->all_defs())
1592 Op.setIsUndef(
false);
1595 if (!
MI->isDebugInstr()) {
1596 if (
DAG.ShouldTrackLaneMasks) {
1598 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1605 DAG.RegionEnd =
MI->getIterator();
1613 while (SkippedDebugInstr-- > 0)
1619 DAG.RegionBegin =
Unsched.front()->getIterator();
1620 if (
DAG.RegionBegin->isDebugInstr()) {
1622 if (
MI->isDebugInstr())
1624 DAG.RegionBegin =
MI->getIterator();
1631 DAG.placeDebugValues();
1636bool PreRARematStage::allUsesAvailableAt(
const MachineInstr *InstToRemat,
1643 RematIdx = std::max(RematIdx, RematIdx.
getRegSlot(
true));
1645 if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1648 if (!MO.getReg().isVirtual()) {
1675 unsigned SubReg = MO.getSubReg();
1677 :
MRI.getMaxLaneMaskForVReg(MO.getReg());
1679 if ((SR.LaneMask & LM).none())
1681 if (!SR.liveAt(RematIdx))
1694bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
1700 DenseMap<unsigned, GCNRPTarget> OptRegions;
1701 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
1702 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
1703 auto ResetTargetRegions = [&]() {
1705 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
1706 const GCNRegPressure &
RP =
DAG.Pressure[
I];
1707 GCNRPTarget
Target(MaxSGPRs, MaxVGPRs,
MF, RP);
1713 ResetTargetRegions();
1714 if (!OptRegions.
empty() ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
1717 TargetOcc = std::nullopt;
1721 TargetOcc =
DAG.MinOccupancy + 1;
1722 unsigned VGPRBlockSize =
1724 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
1725 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
1726 ResetTargetRegions();
1729 dbgs() <<
"Analyzing ";
1730 MF.getFunction().printAsOperand(
dbgs(),
false);
1732 if (OptRegions.
empty()) {
1733 dbgs() <<
"no objective to achieve, occupancy is maximal at "
1734 <<
MFI.getMaxWavesPerEU();
1735 }
else if (!TargetOcc) {
1736 dbgs() <<
"reduce spilling (minimum target occupancy is "
1737 <<
MFI.getMinWavesPerEU() <<
')';
1739 dbgs() <<
"increase occupancy from " <<
DAG.MinOccupancy <<
" to "
1743 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
1744 if (
auto OptIt = OptRegions.
find(
I); OptIt != OptRegions.
end()) {
1750 if (OptRegions.
empty())
1757 auto ReduceRPInRegion = [&](
auto OptIt,
Register Reg, LaneBitmask
Mask,
1758 bool &Progress) ->
bool {
1759 GCNRPTarget &
Target = OptIt->getSecond();
1765 OptRegions.
erase(OptIt->getFirst());
1766 return OptRegions.
empty();
1771 DAG.RegionLiveOuts.buildLiveRegMap();
1774 DenseSet<unsigned> RematRegs;
1777 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
1782 if (!isTriviallyReMaterializable(
DefMI))
1793 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
1796 auto UseRegion = MIRegion.find(
UseMI);
1797 if (UseRegion != MIRegion.end() && UseRegion->second ==
I)
1806 if (Rematerializations.contains(
UseMI) ||
1808 return MO.isReg() && RematRegs.contains(MO.getReg());
1815 SlotIndex DefIdx =
DAG.LIS->getInstructionIndex(
DefMI);
1816 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
1817 if (!allUsesAvailableAt(&
DefMI, DefIdx, UseIdx))
1821 RematInstruction &Remat =
1822 Rematerializations.try_emplace(&
DefMI,
UseMI).first->second;
1824 bool RematUseful =
false;
1825 if (
auto It = OptRegions.
find(
I); It != OptRegions.
end()) {
1831 LaneBitmask
Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I)[
Reg];
1832 if (ReduceRPInRegion(It,
Reg, Mask, RematUseful))
1836 for (
unsigned LIRegion = 0; LIRegion !=
E; ++LIRegion) {
1839 auto It =
DAG.LiveIns[LIRegion].find(
Reg);
1840 if (It ==
DAG.LiveIns[LIRegion].end() || It->second.none())
1842 Remat.LiveInRegions.insert(LIRegion);
1850 if (
auto It = OptRegions.
find(LIRegion); It != OptRegions.
end()) {
1852 if (ReduceRPInRegion(It,
Reg,
DAG.LiveIns[LIRegion][
Reg],
1861 Rematerializations.pop_back();
1862 REMAT_DEBUG(
dbgs() <<
" No impact, not rematerializing instruction\n");
1872 Rematerializations.clear();
1876 return !Rematerializations.empty();
1879void PreRARematStage::rematerialize() {
1880 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
1884 DenseSet<unsigned> RecomputeRP;
1887 for (
auto &[
DefMI, Remat] : Rematerializations) {
1890 unsigned DefRegion = MIRegion.at(
DefMI);
1893 TII->reMaterialize(*InsertPos->getParent(), InsertPos,
Reg,
1894 AMDGPU::NoSubRegister, *
DefMI, *
DAG.TRI);
1895 Remat.RematMI = &*std::prev(InsertPos);
1896 DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
1901 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion],
DefMI,
nullptr);
1902 auto UseRegion = MIRegion.find(Remat.UseMI);
1903 if (UseRegion != MIRegion.end()) {
1904 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], InsertPos,
1907 DAG.LIS->RemoveMachineInstrFromMaps(*
DefMI);
1912 for (
unsigned I : Remat.LiveInRegions) {
1913 ImpactedRegions.insert({
I,
DAG.Pressure[
I]});
1916#ifdef EXPENSIVE_CHECKS
1927 LiveInterval &LI =
DAG.LIS->getInterval(
UseReg);
1928 LaneBitmask LM =
DAG.MRI.getMaxLaneMaskForVReg(MO.
getReg());
1932 LaneBitmask LiveInMask = RegionLiveIns.
at(
UseReg);
1933 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
1937 if (UncoveredLanes.
any()) {
1939 for (LiveInterval::SubRange &SR : LI.
subranges())
1940 assert((SR.LaneMask & UncoveredLanes).none());
1950 LaneBitmask PrevMask = RegionLiveIns[
Reg];
1952 RegMasks.insert({{
I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
1953 if (Remat.UseMI->getParent() !=
DAG.Regions[
I].first->getParent())
1960 ImpactedRegions.insert({DefRegion,
DAG.Pressure[DefRegion]});
1961 RecomputeRP.
insert(DefRegion);
1964 Register RematReg = Remat.RematMI->getOperand(0).getReg();
1965 DAG.LIS->removeInterval(RematReg);
1966 DAG.LIS->createAndComputeVirtRegInterval(RematReg);
1972 unsigned DynamicVGPRBlockSize =
1974 AchievedOcc =
MFI.getMaxWavesPerEU();
1975 for (
auto &[
I, OriginalRP] : ImpactedRegions) {
1976 bool IsEmptyRegion =
DAG.Regions[
I].first ==
DAG.Regions[
I].second;
1977 RescheduleRegions[
I] = !IsEmptyRegion;
1982 if (IsEmptyRegion) {
1985 GCNDownwardRPTracker RPT(*
DAG.LIS);
1987 DAG.Regions[
I].second);
1988 if (NonDbgMI ==
DAG.Regions[
I].second) {
1992 RPT.reset(*NonDbgMI, &
DAG.LiveIns[
I]);
1993 RPT.advance(
DAG.Regions[
I].second);
1994 RP = RPT.moveMaxPressure();
1999 std::min(AchievedOcc,
RP.getOccupancy(
ST, DynamicVGPRBlockSize));
2005bool PreRARematStage::isTriviallyReMaterializable(
const MachineInstr &
MI) {
2006 if (!
DAG.TII->isTriviallyReMaterializable(
MI))
2009 for (
const MachineOperand &MO :
MI.all_uses()) {
2013 if (
DAG.MRI.isConstantPhysReg(MO.
getReg()) ||
DAG.TII->isIgnorableUse(MO))
2028 unsigned MaxOcc = std::max(AchievedOcc,
DAG.MinOccupancy);
2029 if (!TargetOcc || MaxOcc >= *TargetOcc)
2033 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2036 for (
const auto &[
DefMI, Remat] : Rematerializations) {
2037 MachineInstr &RematMI = *Remat.RematMI;
2038 unsigned DefRegion = MIRegion.at(
DefMI);
2040 MachineBasicBlock *
MBB = RegionBB[DefRegion];
2046 TII->reMaterialize(*
MBB, InsertPos,
Reg, AMDGPU::NoSubRegister, RematMI,
2048 MachineInstr *NewMI = &*std::prev(InsertPos);
2049 DAG.LIS->InsertMachineInstrInMaps(*NewMI);
2051 auto UseRegion = MIRegion.find(Remat.UseMI);
2052 if (UseRegion != MIRegion.end()) {
2053 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], RematMI,
2056 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion], InsertPos, NewMI);
2059 DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
2063 DAG.LIS->removeInterval(
Reg);
2064 DAG.LIS->createAndComputeVirtRegInterval(
Reg);
2067 for (
unsigned LIRegion : Remat.LiveInRegions)
2068 DAG.LiveIns[LIRegion].insert({
Reg, RegMasks.at({LIRegion,
Reg})});
2072 for (
auto &[
I, OriginalRP] : ImpactedRegions)
2073 DAG.Pressure[
I] = OriginalRP;
2078void GCNScheduleDAGMILive::updateRegionBoundaries(
2081 assert((!NewMI || NewMI != RegionBounds.second) &&
2082 "cannot remove at region end");
2084 if (RegionBounds.first == RegionBounds.second) {
2085 assert(NewMI &&
"cannot remove from an empty region");
2086 RegionBounds.first = NewMI;
2092 if (
MI != RegionBounds.first)
2095 RegionBounds.first = std::next(
MI);
2097 RegionBounds.first = NewMI;
2114 if (HasIGLPInstrs) {
2115 SavedMutations.clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
SUnit * pickNodeBidirectional(bool &IsTopNode)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
GCNSchedStageID getCurrentStage()
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
A live range for subregisters.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
LiveInterval & getInterval(Register Reg)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
succ_iterator succ_begin()
unsigned succ_size() const
MachineInstrBundleIterator< MachineInstr > iterator
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
virtual bool isIgnorableUse(const MachineOperand &MO) const
Given MO is a PhysReg use return if it can be ignored for the purpose of instruction rematerializatio...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
unsigned getDynamicVGPRBlockSize(const Function &F)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI cl::opt< bool > VerifyScheduling
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool none() const
constexpr bool any() const
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
PressureChange CriticalMax
PressureChange CurrentMax