LLVM 22.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
52#include "llvm/Config/llvm-config.h"
54#include "llvm/MC/LaneBitmask.h"
55#include "llvm/Pass.h"
58#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <limits>
68#include <memory>
69#include <string>
70#include <tuple>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76#define DEBUG_TYPE "machine-scheduler"
77
78STATISTIC(NumInstrsInSourceOrderPreRA,
79 "Number of instructions in source order after pre-RA scheduling");
80STATISTIC(NumInstrsInSourceOrderPostRA,
81 "Number of instructions in source order after post-RA scheduling");
82STATISTIC(NumInstrsScheduledPreRA,
83 "Number of instructions scheduled by pre-RA scheduler");
84STATISTIC(NumInstrsScheduledPostRA,
85 "Number of instructions scheduled by post-RA scheduler");
86STATISTIC(NumClustered, "Number of load/store pairs clustered");
87
88STATISTIC(NumTopPreRA,
89 "Number of scheduling units chosen from top queue pre-RA");
90STATISTIC(NumBotPreRA,
91 "Number of scheduling units chosen from bottom queue pre-RA");
92STATISTIC(NumNoCandPreRA,
93 "Number of scheduling units chosen for NoCand heuristic pre-RA");
94STATISTIC(NumOnly1PreRA,
95 "Number of scheduling units chosen for Only1 heuristic pre-RA");
96STATISTIC(NumPhysRegPreRA,
97 "Number of scheduling units chosen for PhysReg heuristic pre-RA");
98STATISTIC(NumRegExcessPreRA,
99 "Number of scheduling units chosen for RegExcess heuristic pre-RA");
100STATISTIC(NumRegCriticalPreRA,
101 "Number of scheduling units chosen for RegCritical heuristic pre-RA");
102STATISTIC(NumStallPreRA,
103 "Number of scheduling units chosen for Stall heuristic pre-RA");
104STATISTIC(NumClusterPreRA,
105 "Number of scheduling units chosen for Cluster heuristic pre-RA");
106STATISTIC(NumWeakPreRA,
107 "Number of scheduling units chosen for Weak heuristic pre-RA");
108STATISTIC(NumRegMaxPreRA,
109 "Number of scheduling units chosen for RegMax heuristic pre-RA");
111 NumResourceReducePreRA,
112 "Number of scheduling units chosen for ResourceReduce heuristic pre-RA");
114 NumResourceDemandPreRA,
115 "Number of scheduling units chosen for ResourceDemand heuristic pre-RA");
117 NumTopDepthReducePreRA,
118 "Number of scheduling units chosen for TopDepthReduce heuristic pre-RA");
120 NumTopPathReducePreRA,
121 "Number of scheduling units chosen for TopPathReduce heuristic pre-RA");
123 NumBotHeightReducePreRA,
124 "Number of scheduling units chosen for BotHeightReduce heuristic pre-RA");
126 NumBotPathReducePreRA,
127 "Number of scheduling units chosen for BotPathReduce heuristic pre-RA");
128STATISTIC(NumNodeOrderPreRA,
129 "Number of scheduling units chosen for NodeOrder heuristic pre-RA");
130STATISTIC(NumFirstValidPreRA,
131 "Number of scheduling units chosen for FirstValid heuristic pre-RA");
132
133STATISTIC(NumTopPostRA,
134 "Number of scheduling units chosen from top queue post-RA");
135STATISTIC(NumBotPostRA,
136 "Number of scheduling units chosen from bottom queue post-RA");
137STATISTIC(NumNoCandPostRA,
138 "Number of scheduling units chosen for NoCand heuristic post-RA");
139STATISTIC(NumOnly1PostRA,
140 "Number of scheduling units chosen for Only1 heuristic post-RA");
141STATISTIC(NumPhysRegPostRA,
142 "Number of scheduling units chosen for PhysReg heuristic post-RA");
143STATISTIC(NumRegExcessPostRA,
144 "Number of scheduling units chosen for RegExcess heuristic post-RA");
146 NumRegCriticalPostRA,
147 "Number of scheduling units chosen for RegCritical heuristic post-RA");
148STATISTIC(NumStallPostRA,
149 "Number of scheduling units chosen for Stall heuristic post-RA");
150STATISTIC(NumClusterPostRA,
151 "Number of scheduling units chosen for Cluster heuristic post-RA");
152STATISTIC(NumWeakPostRA,
153 "Number of scheduling units chosen for Weak heuristic post-RA");
154STATISTIC(NumRegMaxPostRA,
155 "Number of scheduling units chosen for RegMax heuristic post-RA");
157 NumResourceReducePostRA,
158 "Number of scheduling units chosen for ResourceReduce heuristic post-RA");
160 NumResourceDemandPostRA,
161 "Number of scheduling units chosen for ResourceDemand heuristic post-RA");
163 NumTopDepthReducePostRA,
164 "Number of scheduling units chosen for TopDepthReduce heuristic post-RA");
166 NumTopPathReducePostRA,
167 "Number of scheduling units chosen for TopPathReduce heuristic post-RA");
169 NumBotHeightReducePostRA,
170 "Number of scheduling units chosen for BotHeightReduce heuristic post-RA");
172 NumBotPathReducePostRA,
173 "Number of scheduling units chosen for BotPathReduce heuristic post-RA");
174STATISTIC(NumNodeOrderPostRA,
175 "Number of scheduling units chosen for NodeOrder heuristic post-RA");
176STATISTIC(NumFirstValidPostRA,
177 "Number of scheduling units chosen for FirstValid heuristic post-RA");
178
179namespace llvm {
180
182 "misched-prera-direction", cl::Hidden,
183 cl::desc("Pre reg-alloc list scheduling direction"),
186 clEnumValN(MISched::TopDown, "topdown",
187 "Force top-down pre reg-alloc list scheduling"),
188 clEnumValN(MISched::BottomUp, "bottomup",
189 "Force bottom-up pre reg-alloc list scheduling"),
190 clEnumValN(MISched::Bidirectional, "bidirectional",
191 "Force bidirectional pre reg-alloc list scheduling")));
192
194 "misched-postra-direction", cl::Hidden,
195 cl::desc("Post reg-alloc list scheduling direction"),
198 clEnumValN(MISched::TopDown, "topdown",
199 "Force top-down post reg-alloc list scheduling"),
200 clEnumValN(MISched::BottomUp, "bottomup",
201 "Force bottom-up post reg-alloc list scheduling"),
202 clEnumValN(MISched::Bidirectional, "bidirectional",
203 "Force bidirectional post reg-alloc list scheduling")));
204
205static cl::opt<bool>
207 cl::desc("Print critical path length to stdout"));
208
210 "verify-misched", cl::Hidden,
211 cl::desc("Verify machine instrs before and after machine scheduling"));
212
213#ifndef NDEBUG
215 "view-misched-dags", cl::Hidden,
216 cl::desc("Pop up a window to show MISched dags after they are processed"));
217cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
218 cl::desc("Print schedule DAGs"));
220 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
221 cl::desc("Dump resource usage at schedule boundary."));
223 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
224 cl::desc("Show details of invoking getNextResoufceCycle."));
225#else
226const bool ViewMISchedDAGs = false;
227const bool PrintDAGs = false;
228const bool MischedDetailResourceBooking = false;
229#ifdef LLVM_ENABLE_DUMP
230const bool MISchedDumpReservedCycles = false;
231#endif // LLVM_ENABLE_DUMP
232#endif // NDEBUG
233
234} // end namespace llvm
235
236#ifndef NDEBUG
237/// In some situations a few uninteresting nodes depend on nearly all other
238/// nodes in the graph, provide a cutoff to hide them.
239static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
240 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
241
243 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
244
246 cl::desc("Only schedule this function"));
247static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
248 cl::desc("Only schedule this MBB#"));
249#endif // NDEBUG
250
251/// Avoid quadratic complexity in unusually large basic blocks by limiting the
252/// size of the ready lists.
254 cl::desc("Limit ready list to N instructions"), cl::init(256));
255
256static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
257 cl::desc("Enable register pressure scheduling."), cl::init(true));
258
259static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
260 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
261
263 cl::desc("Enable memop clustering."),
264 cl::init(true));
265static cl::opt<bool>
266 ForceFastCluster("force-fast-cluster", cl::Hidden,
267 cl::desc("Switch to fast cluster algorithm with the lost "
268 "of some fusion opportunities"),
269 cl::init(false));
271 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
272 cl::desc("The threshold for fast cluster"),
273 cl::init(1000));
274
275#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
277 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
278 cl::desc("Dump resource usage at schedule boundary."));
280 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
281 cl::desc("Set width of the columns with "
282 "the resources and schedule units"),
283 cl::init(19));
285 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
286 cl::desc("Set width of the columns showing resource booking."),
287 cl::init(5));
289 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
290 cl::desc("Sort the resources printed in the dump trace"));
291#endif
292
294 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
295 cl::desc("Number of intervals to track"), cl::init(10));
296
297// DAG subtrees must have at least this many nodes.
298static const unsigned MinSubtreeSize = 8;
299
300// Pin the vtables to this file.
301void MachineSchedStrategy::anchor() {}
302
303void ScheduleDAGMutation::anchor() {}
304
305//===----------------------------------------------------------------------===//
306// Machine Instruction Scheduling Pass and Registry
307//===----------------------------------------------------------------------===//
308
311}
312
314 delete RegClassInfo;
315}
316
317namespace llvm {
318namespace impl_detail {
319
320/// Base class for the machine scheduler classes.
322protected:
323 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
324};
325
326/// Impl class for MachineScheduler.
328 // These are only for using MF.verify()
329 // remove when verify supports passing in all analyses
330 MachineFunctionPass *P = nullptr;
331 MachineFunctionAnalysisManager *MFAM = nullptr;
332
333public:
339 };
340
342 // Migration only
343 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
344 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
345
346 bool run(MachineFunction &MF, const TargetMachine &TM,
347 const RequiredAnalyses &Analyses);
348
349protected:
351};
352
353/// Impl class for PostMachineScheduler.
355 // These are only for using MF.verify()
356 // remove when verify supports passing in all analyses
357 MachineFunctionPass *P = nullptr;
358 MachineFunctionAnalysisManager *MFAM = nullptr;
359
360public:
364 };
366 // Migration only
367 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
368 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
369
370 bool run(MachineFunction &Func, const TargetMachine &TM,
371 const RequiredAnalyses &Analyses);
372
373protected:
375};
376
377} // namespace impl_detail
378} // namespace llvm
379
383
384namespace {
385/// MachineScheduler runs after coalescing and before register allocation.
386class MachineSchedulerLegacy : public MachineFunctionPass {
388
389public:
390 MachineSchedulerLegacy();
391 void getAnalysisUsage(AnalysisUsage &AU) const override;
392 bool runOnMachineFunction(MachineFunction&) override;
393
394 static char ID; // Class identification, replacement for typeinfo
395};
396
397/// PostMachineScheduler runs after shortly before code emission.
398class PostMachineSchedulerLegacy : public MachineFunctionPass {
400
401public:
402 PostMachineSchedulerLegacy();
403 void getAnalysisUsage(AnalysisUsage &AU) const override;
404 bool runOnMachineFunction(MachineFunction &) override;
405
406 static char ID; // Class identification, replacement for typeinfo
407};
408
409} // end anonymous namespace
410
411char MachineSchedulerLegacy::ID = 0;
412
413char &llvm::MachineSchedulerID = MachineSchedulerLegacy::ID;
414
415INITIALIZE_PASS_BEGIN(MachineSchedulerLegacy, DEBUG_TYPE,
416 "Machine Instruction Scheduler", false, false)
422INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE,
424
425MachineSchedulerLegacy::MachineSchedulerLegacy() : MachineFunctionPass(ID) {
427}
428
429void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
430 AU.setPreservesCFG();
440}
441
442char PostMachineSchedulerLegacy::ID = 0;
443
444char &llvm::PostMachineSchedulerID = PostMachineSchedulerLegacy::ID;
445
446INITIALIZE_PASS_BEGIN(PostMachineSchedulerLegacy, "postmisched",
447 "PostRA Machine Instruction Scheduler", false, false)
451INITIALIZE_PASS_END(PostMachineSchedulerLegacy, "postmisched",
453
454PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
457}
458
459void PostMachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
460 AU.setPreservesCFG();
466}
467
470
471/// A dummy default scheduler factory indicates whether the scheduler
472/// is overridden on the command line.
474 return nullptr;
475}
476
477/// MachineSchedOpt allows command line selection of the scheduler.
482 cl::desc("Machine instruction scheduler to use"));
483
485DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
487
489 "enable-misched",
490 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
491 cl::Hidden);
492
494 "enable-post-misched",
495 cl::desc("Enable the post-ra machine instruction scheduling pass."),
496 cl::init(true), cl::Hidden);
497
498/// Decrement this iterator until reaching the top or a non-debug instr.
502 assert(I != Beg && "reached the top of the region, cannot decrement");
503 while (--I != Beg) {
504 if (!I->isDebugOrPseudoInstr())
505 break;
506 }
507 return I;
508}
509
510/// Non-const version.
516}
517
518/// If this iterator is a debug value, increment until reaching the End or a
519/// non-debug instruction.
523 for(; I != End; ++I) {
524 if (!I->isDebugOrPseudoInstr())
525 break;
526 }
527 return I;
528}
529
530/// Non-const version.
536}
537
538/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
539ScheduleDAGInstrs *MachineSchedulerImpl::createMachineScheduler() {
540 // Select the scheduler, or set the default.
542 if (Ctor != useDefaultMachineSched)
543 return Ctor(this);
544
545 // Get the default scheduler set by the target for this function.
546 ScheduleDAGInstrs *Scheduler = TM->createMachineScheduler(this);
547 if (Scheduler)
548 return Scheduler;
549
550 // Default to GenericScheduler.
551 return createSchedLive(this);
552}
553
554bool MachineSchedulerImpl::run(MachineFunction &Func, const TargetMachine &TM,
555 const RequiredAnalyses &Analyses) {
556 MF = &Func;
557 MLI = &Analyses.MLI;
558 MDT = &Analyses.MDT;
559 this->TM = &TM;
560 AA = &Analyses.AA;
561 LIS = &Analyses.LIS;
562
563 if (VerifyScheduling) {
564 LLVM_DEBUG(LIS->dump());
565 const char *MSchedBanner = "Before machine scheduling.";
566 if (P)
567 MF->verify(P, MSchedBanner, &errs());
568 else
569 MF->verify(*MFAM, MSchedBanner, &errs());
570 }
571 RegClassInfo->runOnMachineFunction(*MF);
572
573 // Instantiate the selected scheduler for this target, function, and
574 // optimization level.
575 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
576 scheduleRegions(*Scheduler, false);
577
578 LLVM_DEBUG(LIS->dump());
579 if (VerifyScheduling) {
580 const char *MSchedBanner = "After machine scheduling.";
581 if (P)
582 MF->verify(P, MSchedBanner, &errs());
583 else
584 MF->verify(*MFAM, MSchedBanner, &errs());
585 }
586 return true;
587}
588
589/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
590/// the caller. We don't have a command line option to override the postRA
591/// scheduler. The Target must configure it.
592ScheduleDAGInstrs *PostMachineSchedulerImpl::createPostMachineScheduler() {
593 // Get the postRA scheduler set by the target for this function.
594 ScheduleDAGInstrs *Scheduler = TM->createPostMachineScheduler(this);
595 if (Scheduler)
596 return Scheduler;
597
598 // Default to GenericScheduler.
599 return createSchedPostRA(this);
600}
601
602bool PostMachineSchedulerImpl::run(MachineFunction &Func,
603 const TargetMachine &TM,
604 const RequiredAnalyses &Analyses) {
605 MF = &Func;
606 MLI = &Analyses.MLI;
607 this->TM = &TM;
608 AA = &Analyses.AA;
609
610 if (VerifyScheduling) {
611 const char *PostMSchedBanner = "Before post machine scheduling.";
612 if (P)
613 MF->verify(P, PostMSchedBanner, &errs());
614 else
615 MF->verify(*MFAM, PostMSchedBanner, &errs());
616 }
617
618 // Instantiate the selected scheduler for this target, function, and
619 // optimization level.
620 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
621 scheduleRegions(*Scheduler, true);
622
623 if (VerifyScheduling) {
624 const char *PostMSchedBanner = "After post machine scheduling.";
625 if (P)
626 MF->verify(P, PostMSchedBanner, &errs());
627 else
628 MF->verify(*MFAM, PostMSchedBanner, &errs());
629 }
630 return true;
631}
632
633/// Top-level MachineScheduler pass driver.
634///
635/// Visit blocks in function order. Divide each block into scheduling regions
636/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
637/// consistent with the DAG builder, which traverses the interior of the
638/// scheduling regions bottom-up.
639///
640/// This design avoids exposing scheduling boundaries to the DAG builder,
641/// simplifying the DAG builder's support for "special" target instructions.
642/// At the same time the design allows target schedulers to operate across
643/// scheduling boundaries, for example to bundle the boundary instructions
644/// without reordering them. This creates complexity, because the target
645/// scheduler must update the RegionBegin and RegionEnd positions cached by
646/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
647/// design would be to split blocks at scheduling boundaries, but LLVM has a
648/// general bias against block splitting purely for implementation simplicity.
649bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
650 if (skipFunction(MF.getFunction()))
651 return false;
652
655 return false;
656 } else if (!MF.getSubtarget().enableMachineScheduler()) {
657 return false;
658 }
659
660 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
661
662 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
663 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
664 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
665 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
666 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
667 Impl.setLegacyPass(this);
668 return Impl.run(MF, TM, {MLI, MDT, AA, LIS});
669}
670
672 : Impl(std::make_unique<MachineSchedulerImpl>()), TM(TM) {}
675 default;
676
678 : Impl(std::make_unique<PostMachineSchedulerImpl>()), TM(TM) {}
680 PostMachineSchedulerPass &&Other) = default;
682
688 return PreservedAnalyses::all();
689 } else if (!MF.getSubtarget().enableMachineScheduler()) {
690 return PreservedAnalyses::all();
691 }
692
693 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
694 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
695 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
697 .getManager();
698 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
699 auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
700 Impl->setMFAM(&MFAM);
701 bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS});
702 if (!Changed)
703 return PreservedAnalyses::all();
704
707 .preserve<SlotIndexesAnalysis>()
708 .preserve<LiveIntervalsAnalysis>();
709}
710
711bool PostMachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
712 if (skipFunction(MF.getFunction()))
713 return false;
714
717 return false;
718 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
719 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
720 return false;
721 }
722 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
723 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
724 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
725 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
726 Impl.setLegacyPass(this);
727 return Impl.run(MF, TM, {MLI, AA});
728}
729
735 return PreservedAnalyses::all();
736 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
737 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
738 return PreservedAnalyses::all();
739 }
740 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
741 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
743 .getManager();
744 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
745
746 Impl->setMFAM(&MFAM);
747 bool Changed = Impl->run(MF, *TM, {MLI, AA});
748 if (!Changed)
749 return PreservedAnalyses::all();
750
753 return PA;
754}
755
756/// Return true of the given instruction should not be included in a scheduling
757/// region.
758///
759/// MachineScheduler does not currently support scheduling across calls. To
760/// handle calls, the DAG builder needs to be modified to create register
761/// anti/output dependencies on the registers clobbered by the call's regmask
762/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
763/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
764/// the boundary, but there would be no benefit to postRA scheduling across
765/// calls this late anyway.
768 MachineFunction *MF,
769 const TargetInstrInfo *TII) {
770 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
771 MI->isFakeUse();
772}
773
775
776static void
778 MBBRegionsVector &Regions,
779 bool RegionsTopDown) {
782
784 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
785 RegionEnd != MBB->begin(); RegionEnd = I) {
786
787 // Avoid decrementing RegionEnd for blocks with no terminator.
788 if (RegionEnd != MBB->end() ||
789 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
790 --RegionEnd;
791 }
792
793 // The next region starts above the previous region. Look backward in the
794 // instruction stream until we find the nearest boundary.
795 unsigned NumRegionInstrs = 0;
796 I = RegionEnd;
797 for (;I != MBB->begin(); --I) {
798 MachineInstr &MI = *std::prev(I);
799 if (isSchedBoundary(&MI, &*MBB, MF, TII))
800 break;
801 if (!MI.isDebugOrPseudoInstr()) {
802 // MBB::size() uses instr_iterator to count. Here we need a bundle to
803 // count as a single instruction.
804 ++NumRegionInstrs;
805 }
806 }
807
808 // It's possible we found a scheduling region that only has debug
809 // instructions. Don't bother scheduling these.
810 if (NumRegionInstrs != 0)
811 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
812 }
813
814 if (RegionsTopDown)
815 std::reverse(Regions.begin(), Regions.end());
816}
817
818/// Main driver for both MachineScheduler and PostMachineScheduler.
819void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
820 bool FixKillFlags) {
821 // Visit all machine basic blocks.
822 //
823 // TODO: Visit blocks in global postorder or postorder within the bottom-up
824 // loop tree. Then we can optionally compute global RegPressure.
825 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
826 MBB != MBBEnd; ++MBB) {
827
828 Scheduler.startBlock(&*MBB);
829
830#ifndef NDEBUG
831 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
832 continue;
833 if (SchedOnlyBlock.getNumOccurrences()
834 && (int)SchedOnlyBlock != MBB->getNumber())
835 continue;
836#endif
837
838 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
839 // points to the scheduling boundary at the bottom of the region. The DAG
840 // does not include RegionEnd, but the region does (i.e. the next
841 // RegionEnd is above the previous RegionBegin). If the current block has
842 // no terminator then RegionEnd == MBB->end() for the bottom region.
843 //
844 // All the regions of MBB are first found and stored in MBBRegions, which
845 // will be processed (MBB) top-down if initialized with true.
846 //
847 // The Scheduler may insert instructions during either schedule() or
848 // exitRegion(), even for empty regions. So the local iterators 'I' and
849 // 'RegionEnd' are invalid across these calls. Instructions must not be
850 // added to other regions than the current one without updating MBBRegions.
851
852 MBBRegionsVector MBBRegions;
853 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
854 bool ScheduleSingleMI = Scheduler.shouldScheduleSingleMIRegions();
855 for (const SchedRegion &R : MBBRegions) {
856 MachineBasicBlock::iterator I = R.RegionBegin;
857 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
858 unsigned NumRegionInstrs = R.NumRegionInstrs;
859
860 // Notify the scheduler of the region, even if we may skip scheduling
861 // it. Perhaps it still needs to be bundled.
862 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
863
864 // Skip empty scheduling regions and, conditionally, regions with a single
865 // MI.
866 if (I == RegionEnd || (!ScheduleSingleMI && I == std::prev(RegionEnd))) {
867 // Close the current region. Bundle the terminator if needed.
868 // This invalidates 'RegionEnd' and 'I'.
869 Scheduler.exitRegion();
870 continue;
871 }
872 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
873 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
874 << " " << MBB->getName() << "\n From: " << *I
875 << " To: ";
876 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
877 else dbgs() << "End\n";
878 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
880 errs() << MF->getName();
881 errs() << ":%bb. " << MBB->getNumber();
882 errs() << " " << MBB->getName() << " \n";
883 }
884
885 // Schedule a region: possibly reorder instructions.
886 // This invalidates the original region iterators.
887 Scheduler.schedule();
888
889 // Close the current region.
890 Scheduler.exitRegion();
891 }
892 Scheduler.finishBlock();
893 // FIXME: Ideally, no further passes should rely on kill flags. However,
894 // thumb2 size reduction is currently an exception, so the PostMIScheduler
895 // needs to do this.
896 if (FixKillFlags)
897 Scheduler.fixupKills(*MBB);
898 }
899 Scheduler.finalizeSchedule();
900}
901
902#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
904 dbgs() << "Queue " << Name << ": ";
905 for (const SUnit *SU : Queue)
906 dbgs() << SU->NodeNum << " ";
907 dbgs() << "\n";
908}
909#endif
910
911//===----------------------------------------------------------------------===//
912// ScheduleDAGMI - Basic machine instruction scheduling. This is
913// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
914// virtual registers.
915// ===----------------------------------------------------------------------===/
916
917// Provide a vtable anchor.
919
920/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
921/// NumPredsLeft reaches zero, release the successor node.
922///
923/// FIXME: Adjust SuccSU height based on MinLatency.
925 SUnit *SuccSU = SuccEdge->getSUnit();
926
927 if (SuccEdge->isWeak()) {
928 --SuccSU->WeakPredsLeft;
929 return;
930 }
931#ifndef NDEBUG
932 if (SuccSU->NumPredsLeft == 0) {
933 dbgs() << "*** Scheduling failed! ***\n";
934 dumpNode(*SuccSU);
935 dbgs() << " has been released too many times!\n";
936 llvm_unreachable(nullptr);
937 }
938#endif
939 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
940 // CurrCycle may have advanced since then.
941 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
942 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
943
944 --SuccSU->NumPredsLeft;
945 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
946 SchedImpl->releaseTopNode(SuccSU);
947}
948
949/// releaseSuccessors - Call releaseSucc on each of SU's successors.
951 for (SDep &Succ : SU->Succs)
952 releaseSucc(SU, &Succ);
953}
954
955/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
956/// NumSuccsLeft reaches zero, release the predecessor node.
957///
958/// FIXME: Adjust PredSU height based on MinLatency.
960 SUnit *PredSU = PredEdge->getSUnit();
961
962 if (PredEdge->isWeak()) {
963 --PredSU->WeakSuccsLeft;
964 return;
965 }
966#ifndef NDEBUG
967 if (PredSU->NumSuccsLeft == 0) {
968 dbgs() << "*** Scheduling failed! ***\n";
969 dumpNode(*PredSU);
970 dbgs() << " has been released too many times!\n";
971 llvm_unreachable(nullptr);
972 }
973#endif
974 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
975 // CurrCycle may have advanced since then.
976 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
977 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
978
979 --PredSU->NumSuccsLeft;
980 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
981 SchedImpl->releaseBottomNode(PredSU);
982}
983
984/// releasePredecessors - Call releasePred on each of SU's predecessors.
986 for (SDep &Pred : SU->Preds)
987 releasePred(SU, &Pred);
988}
989
992 SchedImpl->enterMBB(bb);
993}
994
996 SchedImpl->leaveMBB();
998}
999
1000/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
1001/// after crossing a scheduling boundary. [begin, end) includes all instructions
1002/// in the region, including the boundary itself and single-instruction regions
1003/// that don't get scheduled.
1007 unsigned regioninstrs)
1008{
1009 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
1010
1011 SchedImpl->initPolicy(begin, end, regioninstrs);
1012
1013 // Set dump direction after initializing sched policy.
1015 if (SchedImpl->getPolicy().OnlyTopDown)
1017 else if (SchedImpl->getPolicy().OnlyBottomUp)
1019 else
1022}
1023
1024/// This is normally called from the main scheduler loop but may also be invoked
1025/// by the scheduling strategy to perform additional code motion.
1028 // Advance RegionBegin if the first instruction moves down.
1029 if (&*RegionBegin == MI)
1030 ++RegionBegin;
1031
1032 // Update the instruction stream.
1033 BB->splice(InsertPos, BB, MI);
1034
1035 // Update LiveIntervals
1036 if (LIS)
1037 LIS->handleMove(*MI, /*UpdateFlags=*/true);
1038
1039 // Recede RegionBegin if an instruction moves above the first.
1040 if (RegionBegin == InsertPos)
1041 RegionBegin = MI;
1042}
1043
1045#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
1046 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
1048 return false;
1049 }
1050 ++NumInstrsScheduled;
1051#endif
1052 return true;
1053}
1054
1055/// Per-region scheduling driver, called back from
1056/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
1057/// that does not consider liveness or register pressure. It is useful for
1058/// PostRA scheduling and potentially other custom schedulers.
1060 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
1061 LLVM_DEBUG(SchedImpl->dumpPolicy());
1062
1063 // Build the DAG.
1065
1067
1068 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1069 findRootsAndBiasEdges(TopRoots, BotRoots);
1070
1071 LLVM_DEBUG(dump());
1072 if (PrintDAGs) dump();
1074
1075 // Initialize the strategy before modifying the DAG.
1076 // This may initialize a DFSResult to be used for queue priority.
1077 SchedImpl->initialize(this);
1078
1079 // Initialize ready queues now that the DAG and priority data are finalized.
1080 initQueues(TopRoots, BotRoots);
1081
1082 bool IsTopNode = false;
1083 while (true) {
1084 if (!checkSchedLimit())
1085 break;
1086
1087 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
1088 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1089 if (!SU) break;
1090
1091 assert(!SU->isScheduled && "Node already scheduled");
1092
1093 MachineInstr *MI = SU->getInstr();
1094 if (IsTopNode) {
1095 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1096 if (&*CurrentTop == MI)
1098 else
1100 } else {
1101 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1104 if (&*priorII == MI)
1105 CurrentBottom = priorII;
1106 else {
1107 if (&*CurrentTop == MI)
1108 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1110 CurrentBottom = MI;
1111 }
1112 }
1113 // Notify the scheduling strategy before updating the DAG.
1114 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
1115 // runs, it can then use the accurate ReadyCycle time to determine whether
1116 // newly released nodes can move to the readyQ.
1117 SchedImpl->schedNode(SU, IsTopNode);
1118
1119 updateQueues(SU, IsTopNode);
1120 }
1121 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1122
1124
1125 LLVM_DEBUG({
1126 dbgs() << "*** Final schedule for "
1127 << printMBBReference(*begin()->getParent()) << " ***\n";
1128 dumpSchedule();
1129 dbgs() << '\n';
1130 });
1131}
1132
1133/// Apply each ScheduleDAGMutation step in order.
1135 for (auto &m : Mutations)
1136 m->apply(this);
1137}
1138
1141 SmallVectorImpl<SUnit*> &BotRoots) {
1142 for (SUnit &SU : SUnits) {
1143 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
1144
1145 // Order predecessors so DFSResult follows the critical path.
1146 SU.biasCriticalPath();
1147
1148 // A SUnit is ready to top schedule if it has no predecessors.
1149 if (!SU.NumPredsLeft)
1150 TopRoots.push_back(&SU);
1151 // A SUnit is ready to bottom schedule if it has no successors.
1152 if (!SU.NumSuccsLeft)
1153 BotRoots.push_back(&SU);
1154 }
1156}
1157
1158/// Identify DAG roots and setup scheduler queues.
1160 ArrayRef<SUnit *> BotRoots) {
1161 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
1162 //
1163 // Nodes with unreleased weak edges can still be roots.
1164 // Release top roots in forward order.
1165 for (SUnit *SU : TopRoots)
1166 SchedImpl->releaseTopNode(SU);
1167
1168 // Release bottom roots in reverse order so the higher priority nodes appear
1169 // first. This is more natural and slightly more efficient.
1171 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
1172 SchedImpl->releaseBottomNode(*I);
1173 }
1174
1177
1178 SchedImpl->registerRoots();
1179
1180 // Advance past initial DebugValues.
1183}
1184
1185/// Update scheduler queues after scheduling an instruction.
1186void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
1187 // Release dependent instructions for scheduling.
1188 if (IsTopNode)
1190 else
1192
1193 SU->isScheduled = true;
1194}
1195
1196/// Reinsert any remaining debug_values, just like the PostRA scheduler.
1198 // If first instruction was a DBG_VALUE then put it back.
1199 if (FirstDbgValue) {
1202 }
1203
1204 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
1205 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
1206 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
1207 MachineInstr *DbgValue = P.first;
1208 MachineBasicBlock::iterator OrigPrevMI = P.second;
1209 if (&*RegionBegin == DbgValue)
1210 ++RegionBegin;
1211 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
1212 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
1214 }
1215}
1216
1217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1218static const char *scheduleTableLegend = " i: issue\n x: resource booked";
1219
1221 // Bail off when there is no schedule model to query.
1223 return;
1224
1225 // Nothing to show if there is no or just one instruction.
1226 if (BB->size() < 2)
1227 return;
1228
1229 dbgs() << " * Schedule table (TopDown):\n";
1230 dbgs() << scheduleTableLegend << "\n";
1231 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1232 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1233 for (MachineInstr &MI : *this) {
1234 SUnit *SU = getSUnit(&MI);
1235 if (!SU)
1236 continue;
1237 const MCSchedClassDesc *SC = getSchedClass(SU);
1240 PI != PE; ++PI) {
1241 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1242 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1243 }
1244 }
1245 // Print the header with the cycles
1246 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1247 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1248 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1249 dbgs() << "|\n";
1250
1251 for (MachineInstr &MI : *this) {
1252 SUnit *SU = getSUnit(&MI);
1253 if (!SU) {
1254 dbgs() << "Missing SUnit\n";
1255 continue;
1256 }
1257 std::string NodeName("SU(");
1258 NodeName += std::to_string(SU->NodeNum) + ")";
1259 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1260 unsigned C = FirstCycle;
1261 for (; C <= LastCycle; ++C) {
1262 if (C == SU->TopReadyCycle)
1263 dbgs() << llvm::left_justify("| i", ColWidth);
1264 else
1265 dbgs() << llvm::left_justify("|", ColWidth);
1266 }
1267 dbgs() << "|\n";
1268 const MCSchedClassDesc *SC = getSchedClass(SU);
1269
1273
1276 ResourcesIt,
1277 [](const MCWriteProcResEntry &LHS,
1278 const MCWriteProcResEntry &RHS) -> bool {
1279 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1280 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1281 });
1282 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1283 C = FirstCycle;
1284 const std::string ResName =
1285 SchedModel.getResourceName(PI.ProcResourceIdx);
1286 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1287 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1288 dbgs() << llvm::left_justify("|", ColWidth);
1289 }
1290 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1291 ++I, ++C)
1292 dbgs() << llvm::left_justify("| x", ColWidth);
1293 while (C++ <= LastCycle)
1294 dbgs() << llvm::left_justify("|", ColWidth);
1295 // Place end char
1296 dbgs() << "| \n";
1297 }
1298 }
1299}
1300
1302 // Bail off when there is no schedule model to query.
1304 return;
1305
1306 // Nothing to show if there is no or just one instruction.
1307 if (BB->size() < 2)
1308 return;
1309
1310 dbgs() << " * Schedule table (BottomUp):\n";
1311 dbgs() << scheduleTableLegend << "\n";
1312
1313 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1314 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1315 for (MachineInstr &MI : *this) {
1316 SUnit *SU = getSUnit(&MI);
1317 if (!SU)
1318 continue;
1319 const MCSchedClassDesc *SC = getSchedClass(SU);
1322 PI != PE; ++PI) {
1323 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1324 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1325 }
1326 }
1327 // Print the header with the cycles
1328 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1329 for (int C = FirstCycle; C >= LastCycle; --C)
1330 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1331 dbgs() << "|\n";
1332
1333 for (MachineInstr &MI : *this) {
1334 SUnit *SU = getSUnit(&MI);
1335 if (!SU) {
1336 dbgs() << "Missing SUnit\n";
1337 continue;
1338 }
1339 std::string NodeName("SU(");
1340 NodeName += std::to_string(SU->NodeNum) + ")";
1341 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1342 int C = FirstCycle;
1343 for (; C >= LastCycle; --C) {
1344 if (C == (int)SU->BotReadyCycle)
1345 dbgs() << llvm::left_justify("| i", ColWidth);
1346 else
1347 dbgs() << llvm::left_justify("|", ColWidth);
1348 }
1349 dbgs() << "|\n";
1350 const MCSchedClassDesc *SC = getSchedClass(SU);
1354
1357 ResourcesIt,
1358 [](const MCWriteProcResEntry &LHS,
1359 const MCWriteProcResEntry &RHS) -> bool {
1360 return std::tie(LHS.AcquireAtCycle, LHS.ReleaseAtCycle) <
1361 std::tie(RHS.AcquireAtCycle, RHS.ReleaseAtCycle);
1362 });
1363 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1364 C = FirstCycle;
1365 const std::string ResName =
1366 SchedModel.getResourceName(PI.ProcResourceIdx);
1367 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1368 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1369 dbgs() << llvm::left_justify("|", ColWidth);
1370 }
1371 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1372 ++I, --C)
1373 dbgs() << llvm::left_justify("| x", ColWidth);
1374 while (C-- >= LastCycle)
1375 dbgs() << llvm::left_justify("|", ColWidth);
1376 // Place end char
1377 dbgs() << "| \n";
1378 }
1379 }
1380}
1381#endif
1382
1383#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1388 else if (DumpDir == DumpDirection::BottomUp)
1391 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1392 } else {
1393 dbgs() << "* Schedule table: DumpDirection not set.\n";
1394 }
1395 }
1396
1397 for (MachineInstr &MI : *this) {
1398 if (SUnit *SU = getSUnit(&MI))
1399 dumpNode(*SU);
1400 else
1401 dbgs() << "Missing SUnit\n";
1402 }
1403}
1404#endif
1405
1406//===----------------------------------------------------------------------===//
1407// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1408// preservation.
1409//===----------------------------------------------------------------------===//
1410
1412 delete DFSResult;
1413}
1414
1416 const MachineInstr &MI = *SU.getInstr();
1417 for (const MachineOperand &MO : MI.operands()) {
1418 if (!MO.isReg())
1419 continue;
1420 if (!MO.readsReg())
1421 continue;
1422 if (TrackLaneMasks && !MO.isUse())
1423 continue;
1424
1425 Register Reg = MO.getReg();
1426 if (!Reg.isVirtual())
1427 continue;
1428
1429 // Ignore re-defs.
1430 if (TrackLaneMasks) {
1431 bool FoundDef = false;
1432 for (const MachineOperand &MO2 : MI.all_defs()) {
1433 if (MO2.getReg() == Reg && !MO2.isDead()) {
1434 FoundDef = true;
1435 break;
1436 }
1437 }
1438 if (FoundDef)
1439 continue;
1440 }
1441
1442 // Record this local VReg use.
1444 for (; UI != VRegUses.end(); ++UI) {
1445 if (UI->SU == &SU)
1446 break;
1447 }
1448 if (UI == VRegUses.end())
1450 }
1451}
1452
1453/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1454/// crossing a scheduling boundary. [begin, end) includes all instructions in
1455/// the region, including the boundary itself and single-instruction regions
1456/// that don't get scheduled.
1460 unsigned regioninstrs)
1461{
1462 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1463 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1464
1465 // For convenience remember the end of the liveness region.
1466 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1467
1469
1470 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1471 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1472
1474 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1475}
1476
1477// Setup the register pressure trackers for the top scheduled and bottom
1478// scheduled regions.
1480 VRegUses.clear();
1482 for (SUnit &SU : SUnits)
1483 collectVRegUses(SU);
1484
1486 ShouldTrackLaneMasks, false);
1488 ShouldTrackLaneMasks, false);
1489
1490 // Close the RPTracker to finalize live ins.
1492
1494
1495 // Initialize the live ins and live outs.
1498
1499 // Close one end of the tracker so we can call
1500 // getMaxUpward/DownwardPressureDelta before advancing across any
1501 // instructions. This converts currently live regs into live ins/outs.
1504
1506 if (!BotRPTracker.getLiveThru().empty()) {
1508 LLVM_DEBUG(dbgs() << "Live Thru: ";
1510 };
1511
1512 // For each live out vreg reduce the pressure change associated with other
1513 // uses of the same vreg below the live-out reaching def.
1515
1516 // Account for liveness generated by the region boundary.
1517 if (LiveRegionEnd != RegionEnd) {
1519 BotRPTracker.recede(&LiveUses);
1520 updatePressureDiffs(LiveUses);
1521 }
1522
1523 LLVM_DEBUG(dbgs() << "Top Pressure: ";
1525 dbgs() << "Bottom Pressure: ";
1527
1529 (RegionEnd->isDebugInstr() &&
1531 "Can't find the region bottom");
1532
1533 // Cache the list of excess pressure sets in this region. This will also track
1534 // the max pressure in the scheduled code for these sets.
1535 RegionCriticalPSets.clear();
1536 const std::vector<unsigned> &RegionPressure =
1538 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1539 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1540 if (RegionPressure[i] > Limit) {
1541 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1542 << " Actual " << RegionPressure[i] << "\n");
1543 RegionCriticalPSets.push_back(PressureChange(i));
1544 }
1545 }
1546 LLVM_DEBUG({
1547 if (RegionCriticalPSets.size() > 0) {
1548 dbgs() << "Excess PSets: ";
1549 for (const PressureChange &RCPS : RegionCriticalPSets)
1550 dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1551 dbgs() << "\n";
1552 }
1553 });
1554}
1555
1558 const std::vector<unsigned> &NewMaxPressure) {
1559 const PressureDiff &PDiff = getPressureDiff(SU);
1560 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1561 for (const PressureChange &PC : PDiff) {
1562 if (!PC.isValid())
1563 break;
1564 unsigned ID = PC.getPSet();
1565 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1566 ++CritIdx;
1567 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1568 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1569 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1570 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1571 }
1572 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1573 if (NewMaxPressure[ID] >= Limit - 2) {
1574 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1575 << NewMaxPressure[ID]
1576 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1577 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1578 << " livethru)\n");
1579 }
1580 }
1581}
1582
1583/// Update the PressureDiff array for liveness after scheduling this
1584/// instruction.
1586 for (const VRegMaskOrUnit &P : LiveUses) {
1587 Register Reg = P.RegUnit;
1588 /// FIXME: Currently assuming single-use physregs.
1589 if (!Reg.isVirtual())
1590 continue;
1591
1593 // If the register has just become live then other uses won't change
1594 // this fact anymore => decrement pressure.
1595 // If the register has just become dead then other uses make it come
1596 // back to life => increment pressure.
1597 bool Decrement = P.LaneMask.any();
1598
1599 for (const VReg2SUnit &V2SU
1600 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1601 SUnit &SU = *V2SU.SU;
1602 if (SU.isScheduled || &SU == &ExitSU)
1603 continue;
1604
1605 PressureDiff &PDiff = getPressureDiff(&SU);
1606 PDiff.addPressureChange(Reg, Decrement, &MRI);
1607 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1608 return Change.isValid();
1609 }))
1611 << " UpdateRegPressure: SU(" << SU.NodeNum << ") "
1612 << printReg(Reg, TRI) << ':'
1613 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1614 dbgs() << " to "; PDiff.dump(*TRI););
1615 }
1616 } else {
1617 assert(P.LaneMask.any());
1618 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1619 // This may be called before CurrentBottom has been initialized. However,
1620 // BotRPTracker must have a valid position. We want the value live into the
1621 // instruction or live out of the block, so ask for the previous
1622 // instruction's live-out.
1623 const LiveInterval &LI = LIS->getInterval(Reg);
1624 VNInfo *VNI;
1627 if (I == BB->end())
1628 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1629 else {
1631 VNI = LRQ.valueIn();
1632 }
1633 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1634 assert(VNI && "No live value at use.");
1635 for (const VReg2SUnit &V2SU
1636 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1637 SUnit *SU = V2SU.SU;
1638 // If this use comes before the reaching def, it cannot be a last use,
1639 // so decrease its pressure change.
1640 if (!SU->isScheduled && SU != &ExitSU) {
1641 LiveQueryResult LRQ =
1643 if (LRQ.valueIn() == VNI) {
1644 PressureDiff &PDiff = getPressureDiff(SU);
1645 PDiff.addPressureChange(Reg, true, &MRI);
1646 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1647 return Change.isValid();
1648 }))
1649 LLVM_DEBUG(dbgs() << " UpdateRegPressure: SU(" << SU->NodeNum
1650 << ") " << *SU->getInstr();
1651 dbgs() << " to ";
1652 PDiff.dump(*TRI););
1653 }
1654 }
1655 }
1656 }
1657 }
1658}
1659
1661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1662 if (EntrySU.getInstr() != nullptr)
1664 for (const SUnit &SU : SUnits) {
1665 dumpNodeAll(SU);
1666 if (ShouldTrackPressure) {
1667 dbgs() << " Pressure Diff : ";
1668 getPressureDiff(&SU).dump(*TRI);
1669 }
1670 dbgs() << " Single Issue : ";
1671 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1672 SchedModel.mustEndGroup(SU.getInstr()))
1673 dbgs() << "true;";
1674 else
1675 dbgs() << "false;";
1676 dbgs() << '\n';
1677 }
1678 if (ExitSU.getInstr() != nullptr)
1680#endif
1681}
1682
1683/// schedule - Called back from MachineScheduler::runOnMachineFunction
1684/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1685/// only includes instructions that have DAG nodes, not scheduling boundaries.
1686///
1687/// This is a skeletal driver, with all the functionality pushed into helpers,
1688/// so that it can be easily extended by experimental schedulers. Generally,
1689/// implementing MachineSchedStrategy should be sufficient to implement a new
1690/// scheduling algorithm. However, if a scheduler further subclasses
1691/// ScheduleDAGMILive then it will want to override this virtual method in order
1692/// to update any specialized state.
1694 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1695 LLVM_DEBUG(SchedImpl->dumpPolicy());
1697
1699
1700 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1701 findRootsAndBiasEdges(TopRoots, BotRoots);
1702
1703 // Initialize the strategy before modifying the DAG.
1704 // This may initialize a DFSResult to be used for queue priority.
1705 SchedImpl->initialize(this);
1706
1707 LLVM_DEBUG(dump());
1708 if (PrintDAGs) dump();
1710
1711 // Initialize ready queues now that the DAG and priority data are finalized.
1712 initQueues(TopRoots, BotRoots);
1713
1714 bool IsTopNode = false;
1715 while (true) {
1716 if (!checkSchedLimit())
1717 break;
1718
1719 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1720 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1721 if (!SU) break;
1722
1723 assert(!SU->isScheduled && "Node already scheduled");
1724
1725 scheduleMI(SU, IsTopNode);
1726
1727 if (DFSResult) {
1728 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1729 if (!ScheduledTrees.test(SubtreeID)) {
1730 ScheduledTrees.set(SubtreeID);
1731 DFSResult->scheduleTree(SubtreeID);
1732 SchedImpl->scheduleTree(SubtreeID);
1733 }
1734 }
1735
1736 // Notify the scheduling strategy after updating the DAG.
1737 SchedImpl->schedNode(SU, IsTopNode);
1738
1739 updateQueues(SU, IsTopNode);
1740 }
1741 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1742
1744
1745 LLVM_DEBUG({
1746 dbgs() << "*** Final schedule for "
1747 << printMBBReference(*begin()->getParent()) << " ***\n";
1748 dumpSchedule();
1749 dbgs() << '\n';
1750 });
1751}
1752
1753/// Build the DAG and setup three register pressure trackers.
1755 if (!ShouldTrackPressure) {
1756 RPTracker.reset();
1757 RegionCriticalPSets.clear();
1759 return;
1760 }
1761
1762 // Initialize the register pressure tracker used by buildSchedGraph.
1764 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1765
1766 // Account for liveness generate by the region boundary.
1767 if (LiveRegionEnd != RegionEnd)
1768 RPTracker.recede();
1769
1770 // Build the DAG, and compute current register pressure.
1772
1773 // Initialize top/bottom trackers after computing region pressure.
1775}
1776
1778 if (!DFSResult)
1779 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1780 DFSResult->clear();
1782 DFSResult->resize(SUnits.size());
1785}
1786
1787/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1788/// only provides the critical path for single block loops. To handle loops that
1789/// span blocks, we could use the vreg path latencies provided by
1790/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1791/// available for use in the scheduler.
1792///
1793/// The cyclic path estimation identifies a def-use pair that crosses the back
1794/// edge and considers the depth and height of the nodes. For example, consider
1795/// the following instruction sequence where each instruction has unit latency
1796/// and defines an eponymous virtual register:
1797///
1798/// a->b(a,c)->c(b)->d(c)->exit
1799///
1800/// The cyclic critical path is a two cycles: b->c->b
1801/// The acyclic critical path is four cycles: a->b->c->d->exit
1802/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1803/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1804/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1805/// LiveInDepth = depth(b) = len(a->b) = 1
1806///
1807/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1808/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1809/// CyclicCriticalPath = min(2, 2) = 2
1810///
1811/// This could be relevant to PostRA scheduling, but is currently implemented
1812/// assuming LiveIntervals.
1814 // This only applies to single block loop.
1815 if (!BB->isSuccessor(BB))
1816 return 0;
1817
1818 unsigned MaxCyclicLatency = 0;
1819 // Visit each live out vreg def to find def/use pairs that cross iterations.
1821 Register Reg = P.RegUnit;
1822 if (!Reg.isVirtual())
1823 continue;
1824 const LiveInterval &LI = LIS->getInterval(Reg);
1825 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1826 if (!DefVNI)
1827 continue;
1828
1830 const SUnit *DefSU = getSUnit(DefMI);
1831 if (!DefSU)
1832 continue;
1833
1834 unsigned LiveOutHeight = DefSU->getHeight();
1835 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1836 // Visit all local users of the vreg def.
1837 for (const VReg2SUnit &V2SU
1838 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1839 SUnit *SU = V2SU.SU;
1840 if (SU == &ExitSU)
1841 continue;
1842
1843 // Only consider uses of the phi.
1845 if (!LRQ.valueIn()->isPHIDef())
1846 continue;
1847
1848 // Assume that a path spanning two iterations is a cycle, which could
1849 // overestimate in strange cases. This allows cyclic latency to be
1850 // estimated as the minimum slack of the vreg's depth or height.
1851 unsigned CyclicLatency = 0;
1852 if (LiveOutDepth > SU->getDepth())
1853 CyclicLatency = LiveOutDepth - SU->getDepth();
1854
1855 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1856 if (LiveInHeight > LiveOutHeight) {
1857 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1858 CyclicLatency = LiveInHeight - LiveOutHeight;
1859 } else
1860 CyclicLatency = 0;
1861
1862 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1863 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1864 if (CyclicLatency > MaxCyclicLatency)
1865 MaxCyclicLatency = CyclicLatency;
1866 }
1867 }
1868 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1869 return MaxCyclicLatency;
1870}
1871
1872/// Release ExitSU predecessors and setup scheduler queues. Re-position
1873/// the Top RP tracker in case the region beginning has changed.
1875 ArrayRef<SUnit*> BotRoots) {
1876 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1877 if (ShouldTrackPressure) {
1878 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1880 }
1881}
1882
1883/// Move an instruction and update register pressure.
1884void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1885 // Move the instruction to its new location in the instruction stream.
1886 MachineInstr *MI = SU->getInstr();
1887
1888 if (IsTopNode) {
1889 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1890 if (&*CurrentTop == MI)
1892 else {
1895 }
1896
1897 if (ShouldTrackPressure) {
1898 // Update top scheduled pressure.
1899 RegisterOperands RegOpers;
1900 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1901 /*IgnoreDead=*/false);
1903 // Adjust liveness and add missing dead+read-undef flags.
1904 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1905 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1906 } else {
1907 // Adjust for missing dead-def flags.
1908 RegOpers.detectDeadDefs(*MI, *LIS);
1909 }
1910
1911 TopRPTracker.advance(RegOpers);
1912 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1913 LLVM_DEBUG(dbgs() << "Top Pressure: "; dumpRegSetPressure(
1915
1917 }
1918 } else {
1919 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1922 if (&*priorII == MI)
1923 CurrentBottom = priorII;
1924 else {
1925 if (&*CurrentTop == MI) {
1926 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1928 }
1930 CurrentBottom = MI;
1932 }
1933 if (ShouldTrackPressure) {
1934 RegisterOperands RegOpers;
1935 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1936 /*IgnoreDead=*/false);
1938 // Adjust liveness and add missing dead+read-undef flags.
1939 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1940 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1941 } else {
1942 // Adjust for missing dead-def flags.
1943 RegOpers.detectDeadDefs(*MI, *LIS);
1944 }
1945
1949 BotRPTracker.recede(RegOpers, &LiveUses);
1950 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1951 LLVM_DEBUG(dbgs() << "Bottom Pressure: "; dumpRegSetPressure(
1953
1955 updatePressureDiffs(LiveUses);
1956 }
1957 }
1958}
1959
1960//===----------------------------------------------------------------------===//
1961// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1962//===----------------------------------------------------------------------===//
1963
1964namespace {
1965
1966/// Post-process the DAG to create cluster edges between neighboring
1967/// loads or between neighboring stores.
1968class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1969 struct MemOpInfo {
1970 SUnit *SU;
1972 int64_t Offset;
1973 LocationSize Width;
1974 bool OffsetIsScalable;
1975
1976 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1977 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1978 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1979 OffsetIsScalable(OffsetIsScalable) {}
1980
1981 static bool Compare(const MachineOperand *const &A,
1982 const MachineOperand *const &B) {
1983 if (A->getType() != B->getType())
1984 return A->getType() < B->getType();
1985 if (A->isReg())
1986 return A->getReg() < B->getReg();
1987 if (A->isFI()) {
1988 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1990 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1992 return StackGrowsDown ? A->getIndex() > B->getIndex()
1993 : A->getIndex() < B->getIndex();
1994 }
1995
1996 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1997 "index bases.");
1998 }
1999
2000 bool operator<(const MemOpInfo &RHS) const {
2001 // FIXME: Don't compare everything twice. Maybe use C++20 three way
2002 // comparison instead when it's available.
2003 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
2004 RHS.BaseOps.begin(), RHS.BaseOps.end(),
2005 Compare))
2006 return true;
2007 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
2008 BaseOps.begin(), BaseOps.end(), Compare))
2009 return false;
2010 if (Offset != RHS.Offset)
2011 return Offset < RHS.Offset;
2012 return SU->NodeNum < RHS.SU->NodeNum;
2013 }
2014 };
2015
2016 const TargetInstrInfo *TII;
2017 const TargetRegisterInfo *TRI;
2018 bool IsLoad;
2019 bool ReorderWhileClustering;
2020
2021public:
2022 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
2023 const TargetRegisterInfo *tri, bool IsLoad,
2024 bool ReorderWhileClustering)
2025 : TII(tii), TRI(tri), IsLoad(IsLoad),
2026 ReorderWhileClustering(ReorderWhileClustering) {}
2027
2028 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2029
2030protected:
2031 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
2032 ScheduleDAGInstrs *DAG);
2033 void collectMemOpRecords(std::vector<SUnit> &SUnits,
2034 SmallVectorImpl<MemOpInfo> &MemOpRecords);
2035 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
2037};
2038
2039class StoreClusterMutation : public BaseMemOpClusterMutation {
2040public:
2041 StoreClusterMutation(const TargetInstrInfo *tii,
2042 const TargetRegisterInfo *tri,
2043 bool ReorderWhileClustering)
2044 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
2045};
2046
2047class LoadClusterMutation : public BaseMemOpClusterMutation {
2048public:
2049 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
2050 bool ReorderWhileClustering)
2051 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
2052};
2053
2054} // end anonymous namespace
2055
2056namespace llvm {
2057
2058std::unique_ptr<ScheduleDAGMutation>
2060 const TargetRegisterInfo *TRI,
2061 bool ReorderWhileClustering) {
2062 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
2063 TII, TRI, ReorderWhileClustering)
2064 : nullptr;
2065}
2066
2067std::unique_ptr<ScheduleDAGMutation>
2069 const TargetRegisterInfo *TRI,
2070 bool ReorderWhileClustering) {
2071 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
2072 TII, TRI, ReorderWhileClustering)
2073 : nullptr;
2074}
2075
2076} // end namespace llvm
2077
2078// Sorting all the loads/stores first, then for each load/store, checking the
2079// following load/store one by one, until reach the first non-dependent one and
2080// call target hook to see if they can cluster.
2081// If FastCluster is enabled, we assume that, all the loads/stores have been
2082// preprocessed and now, they didn't have dependencies on each other.
2083void BaseMemOpClusterMutation::clusterNeighboringMemOps(
2084 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
2085 ScheduleDAGInstrs *DAG) {
2086 // Keep track of the current cluster length and bytes for each SUnit.
2089
2090 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
2091 // cluster mem ops collected within `MemOpRecords` array.
2092 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
2093 // Decision to cluster mem ops is taken based on target dependent logic
2094 auto MemOpa = MemOpRecords[Idx];
2095
2096 // Seek for the next load/store to do the cluster.
2097 unsigned NextIdx = Idx + 1;
2098 for (; NextIdx < End; ++NextIdx)
2099 // Skip if MemOpb has been clustered already or has dependency with
2100 // MemOpa.
2101 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
2102 (FastCluster ||
2103 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
2104 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
2105 break;
2106 if (NextIdx == End)
2107 continue;
2108
2109 auto MemOpb = MemOpRecords[NextIdx];
2110 unsigned ClusterLength = 2;
2111 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
2112 MemOpb.Width.getValue().getKnownMinValue();
2113 auto It = SUnit2ClusterInfo.find(MemOpa.SU->NodeNum);
2114 if (It != SUnit2ClusterInfo.end()) {
2115 const auto &[Len, Bytes] = It->second;
2116 ClusterLength = Len + 1;
2117 CurrentClusterBytes = Bytes + MemOpb.Width.getValue().getKnownMinValue();
2118 }
2119
2120 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
2121 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
2122 MemOpb.Offset, MemOpb.OffsetIsScalable,
2123 ClusterLength, CurrentClusterBytes))
2124 continue;
2125
2126 SUnit *SUa = MemOpa.SU;
2127 SUnit *SUb = MemOpb.SU;
2128
2129 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
2130 std::swap(SUa, SUb);
2131
2132 // FIXME: Is this check really required?
2133 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
2134 continue;
2135
2136 Clusters.unionSets(SUa, SUb);
2137 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
2138 << SUb->NodeNum << ")\n");
2139 ++NumClustered;
2140
2141 if (IsLoad) {
2142 // Copy successor edges from SUa to SUb. Interleaving computation
2143 // dependent on SUa can prevent load combining due to register reuse.
2144 // Predecessor edges do not need to be copied from SUb to SUa since
2145 // nearby loads should have effectively the same inputs.
2146 for (const SDep &Succ : SUa->Succs) {
2147 if (Succ.getSUnit() == SUb)
2148 continue;
2149 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
2150 << ")\n");
2151 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
2152 }
2153 } else {
2154 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
2155 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
2156 // do not need to be copied from SUa to SUb since no one will depend
2157 // on stores.
2158 // Notice that, we don't need to care about the memory dependency as
2159 // we won't try to cluster them if they have any memory dependency.
2160 for (const SDep &Pred : SUb->Preds) {
2161 if (Pred.getSUnit() == SUa)
2162 continue;
2163 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
2164 << ")\n");
2165 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
2166 }
2167 }
2168
2169 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
2170 CurrentClusterBytes};
2171
2172 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
2173 << ", Curr cluster bytes: " << CurrentClusterBytes
2174 << "\n");
2175 }
2176
2177 // Add cluster group information.
2178 // Iterate over all of the equivalence sets.
2179 auto &AllClusters = DAG->getClusters();
2180 for (const EquivalenceClasses<SUnit *>::ECValue *I : Clusters) {
2181 if (!I->isLeader())
2182 continue;
2183 ClusterInfo Group;
2184 unsigned ClusterIdx = AllClusters.size();
2185 for (SUnit *MemberI : Clusters.members(*I)) {
2186 MemberI->ParentClusterIdx = ClusterIdx;
2187 Group.insert(MemberI);
2188 }
2189 AllClusters.push_back(Group);
2190 }
2191}
2192
2193void BaseMemOpClusterMutation::collectMemOpRecords(
2194 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
2195 for (auto &SU : SUnits) {
2196 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
2197 (!IsLoad && !SU.getInstr()->mayStore()))
2198 continue;
2199
2200 const MachineInstr &MI = *SU.getInstr();
2202 int64_t Offset;
2203 bool OffsetIsScalable;
2206 OffsetIsScalable, Width, TRI)) {
2207 if (!Width.hasValue())
2208 continue;
2209
2210 MemOpRecords.push_back(
2211 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
2212
2213 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
2214 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
2215 << ", Width: " << Width << "\n");
2216 }
2217#ifndef NDEBUG
2218 for (const auto *Op : BaseOps)
2219 assert(Op);
2220#endif
2221 }
2222}
2223
2224bool BaseMemOpClusterMutation::groupMemOps(
2227 bool FastCluster =
2229 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
2230
2231 for (const auto &MemOp : MemOps) {
2232 unsigned ChainPredID = DAG->SUnits.size();
2233 if (FastCluster) {
2234 for (const SDep &Pred : MemOp.SU->Preds) {
2235 // We only want to cluster the mem ops that have the same ctrl(non-data)
2236 // pred so that they didn't have ctrl dependency for each other. But for
2237 // store instrs, we can still cluster them if the pred is load instr.
2238 if ((Pred.isCtrl() &&
2239 (IsLoad ||
2240 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
2241 !Pred.isArtificial()) {
2242 ChainPredID = Pred.getSUnit()->NodeNum;
2243 break;
2244 }
2245 }
2246 } else
2247 ChainPredID = 0;
2248
2249 Groups[ChainPredID].push_back(MemOp);
2250 }
2251 return FastCluster;
2252}
2253
2254/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2255void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2256 // Collect all the clusterable loads/stores
2257 SmallVector<MemOpInfo, 32> MemOpRecords;
2258 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2259
2260 if (MemOpRecords.size() < 2)
2261 return;
2262
2263 // Put the loads/stores without dependency into the same group with some
2264 // heuristic if the DAG is too complex to avoid compiling time blow up.
2265 // Notice that, some fusion pair could be lost with this.
2267 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2268
2269 for (auto &Group : Groups) {
2270 // Sorting the loads/stores, so that, we can stop the cluster as early as
2271 // possible.
2272 llvm::sort(Group.second);
2273
2274 // Trying to cluster all the neighboring loads/stores.
2275 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2276 }
2277}
2278
2279//===----------------------------------------------------------------------===//
2280// CopyConstrain - DAG post-processing to encourage copy elimination.
2281//===----------------------------------------------------------------------===//
2282
2283namespace {
2284
2285/// Post-process the DAG to create weak edges from all uses of a copy to
2286/// the one use that defines the copy's source vreg, most likely an induction
2287/// variable increment.
2288class CopyConstrain : public ScheduleDAGMutation {
2289 // Transient state.
2290 SlotIndex RegionBeginIdx;
2291
2292 // RegionEndIdx is the slot index of the last non-debug instruction in the
2293 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2294 SlotIndex RegionEndIdx;
2295
2296public:
2297 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2298
2299 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2300
2301protected:
2302 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2303};
2304
2305} // end anonymous namespace
2306
2307namespace llvm {
2308
2309std::unique_ptr<ScheduleDAGMutation>
2311 const TargetRegisterInfo *TRI) {
2312 return std::make_unique<CopyConstrain>(TII, TRI);
2313}
2314
2315} // end namespace llvm
2316
2317/// constrainLocalCopy handles two possibilities:
2318/// 1) Local src:
2319/// I0: = dst
2320/// I1: src = ...
2321/// I2: = dst
2322/// I3: dst = src (copy)
2323/// (create pred->succ edges I0->I1, I2->I1)
2324///
2325/// 2) Local copy:
2326/// I0: dst = src (copy)
2327/// I1: = dst
2328/// I2: src = ...
2329/// I3: = dst
2330/// (create pred->succ edges I1->I2, I3->I2)
2331///
2332/// Although the MachineScheduler is currently constrained to single blocks,
2333/// this algorithm should handle extended blocks. An EBB is a set of
2334/// contiguously numbered blocks such that the previous block in the EBB is
2335/// always the single predecessor.
2336void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2337 LiveIntervals *LIS = DAG->getLIS();
2338 MachineInstr *Copy = CopySU->getInstr();
2339
2340 // Check for pure vreg copies.
2341 const MachineOperand &SrcOp = Copy->getOperand(1);
2342 Register SrcReg = SrcOp.getReg();
2343 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2344 return;
2345
2346 const MachineOperand &DstOp = Copy->getOperand(0);
2347 Register DstReg = DstOp.getReg();
2348 if (!DstReg.isVirtual() || DstOp.isDead())
2349 return;
2350
2351 // Check if either the dest or source is local. If it's live across a back
2352 // edge, it's not local. Note that if both vregs are live across the back
2353 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2354 // If both the copy's source and dest are local live intervals, then we
2355 // should treat the dest as the global for the purpose of adding
2356 // constraints. This adds edges from source's other uses to the copy.
2357 unsigned LocalReg = SrcReg;
2358 unsigned GlobalReg = DstReg;
2359 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2360 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2361 LocalReg = DstReg;
2362 GlobalReg = SrcReg;
2363 LocalLI = &LIS->getInterval(LocalReg);
2364 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2365 return;
2366 }
2367 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2368
2369 // Find the global segment after the start of the local LI.
2370 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2371 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2372 // local live range. We could create edges from other global uses to the local
2373 // start, but the coalescer should have already eliminated these cases, so
2374 // don't bother dealing with it.
2375 if (GlobalSegment == GlobalLI->end())
2376 return;
2377
2378 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2379 // returned the next global segment. But if GlobalSegment overlaps with
2380 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2381 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2382 if (GlobalSegment->contains(LocalLI->beginIndex()))
2383 ++GlobalSegment;
2384
2385 if (GlobalSegment == GlobalLI->end())
2386 return;
2387
2388 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2389 if (GlobalSegment != GlobalLI->begin()) {
2390 // Two address defs have no hole.
2391 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2392 GlobalSegment->start)) {
2393 return;
2394 }
2395 // If the prior global segment may be defined by the same two-address
2396 // instruction that also defines LocalLI, then can't make a hole here.
2397 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2398 LocalLI->beginIndex())) {
2399 return;
2400 }
2401 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2402 // it would be a disconnected component in the live range.
2403 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2404 "Disconnected LRG within the scheduling region.");
2405 }
2406 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2407 if (!GlobalDef)
2408 return;
2409
2410 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2411 if (!GlobalSU)
2412 return;
2413
2414 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2415 // constraining the uses of the last local def to precede GlobalDef.
2416 SmallVector<SUnit*,8> LocalUses;
2417 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2418 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2419 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2420 for (const SDep &Succ : LastLocalSU->Succs) {
2421 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2422 continue;
2423 if (Succ.getSUnit() == GlobalSU)
2424 continue;
2425 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2426 return;
2427 LocalUses.push_back(Succ.getSUnit());
2428 }
2429 // Open the top of the GlobalLI hole by constraining any earlier global uses
2430 // to precede the start of LocalLI.
2431 SmallVector<SUnit*,8> GlobalUses;
2432 MachineInstr *FirstLocalDef =
2433 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2434 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2435 for (const SDep &Pred : GlobalSU->Preds) {
2436 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2437 continue;
2438 if (Pred.getSUnit() == FirstLocalSU)
2439 continue;
2440 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2441 return;
2442 GlobalUses.push_back(Pred.getSUnit());
2443 }
2444 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2445 // Add the weak edges.
2446 for (SUnit *LU : LocalUses) {
2447 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2448 << GlobalSU->NodeNum << ")\n");
2449 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2450 }
2451 for (SUnit *GU : GlobalUses) {
2452 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2453 << FirstLocalSU->NodeNum << ")\n");
2454 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2455 }
2456}
2457
2458/// Callback from DAG postProcessing to create weak edges to encourage
2459/// copy elimination.
2460void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2461 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2462 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2463
2464 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2465 if (FirstPos == DAG->end())
2466 return;
2467 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2468 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2469 *priorNonDebug(DAG->end(), DAG->begin()));
2470
2471 for (SUnit &SU : DAG->SUnits) {
2472 if (!SU.getInstr()->isCopy())
2473 continue;
2474
2475 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2476 }
2477}
2478
2479//===----------------------------------------------------------------------===//
2480// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2481// and possibly other custom schedulers.
2482//===----------------------------------------------------------------------===//
2483
2484static const unsigned InvalidCycle = ~0U;
2485
2487
2488/// Given a Count of resource usage and a Latency value, return true if a
2489/// SchedBoundary becomes resource limited.
2490/// If we are checking after scheduling a node, we should return true when
2491/// we just reach the resource limit.
2492static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2493 unsigned Latency, bool AfterSchedNode) {
2494 int ResCntFactor = (int)(Count - (Latency * LFactor));
2495 if (AfterSchedNode)
2496 return ResCntFactor >= (int)LFactor;
2497 else
2498 return ResCntFactor > (int)LFactor;
2499}
2500
2502 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2503 // Destroying and reconstructing it is very expensive though. So keep
2504 // invalid, placeholder HazardRecs.
2505 if (HazardRec && HazardRec->isEnabled()) {
2506 delete HazardRec;
2507 HazardRec = nullptr;
2508 }
2509 Available.clear();
2510 Pending.clear();
2511 CheckPending = false;
2512 CurrCycle = 0;
2513 CurrMOps = 0;
2514 MinReadyCycle = std::numeric_limits<unsigned>::max();
2515 ExpectedLatency = 0;
2516 DependentLatency = 0;
2517 RetiredMOps = 0;
2518 MaxExecutedResCount = 0;
2519 ZoneCritResIdx = 0;
2520 IsResourceLimited = false;
2521 ReservedCycles.clear();
2522 ReservedResourceSegments.clear();
2523 ReservedCyclesIndex.clear();
2524 ResourceGroupSubUnitMasks.clear();
2525#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2526 // Track the maximum number of stall cycles that could arise either from the
2527 // latency of a DAG edge or the number of cycles that a processor resource is
2528 // reserved (SchedBoundary::ReservedCycles).
2529 MaxObservedStall = 0;
2530#endif
2531 // Reserve a zero-count for invalid CritResIdx.
2532 ExecutedResCounts.resize(1);
2533 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2534}
2535
2537init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2538 reset();
2539 if (!SchedModel->hasInstrSchedModel())
2540 return;
2542 for (SUnit &SU : DAG->SUnits) {
2543 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2544 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2545 * SchedModel->getMicroOpFactor();
2547 PI = SchedModel->getWriteProcResBegin(SC),
2548 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2549 unsigned PIdx = PI->ProcResourceIdx;
2550 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2551 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2552 RemainingCounts[PIdx] +=
2553 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2554 }
2555 }
2556}
2557
2559init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2560 reset();
2561 DAG = dag;
2562 SchedModel = smodel;
2563 Rem = rem;
2565 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2566 ReservedCyclesIndex.resize(ResourceCount);
2567 ExecutedResCounts.resize(ResourceCount);
2568 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2569 unsigned NumUnits = 0;
2570
2571 for (unsigned i = 0; i < ResourceCount; ++i) {
2572 ReservedCyclesIndex[i] = NumUnits;
2573 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2574 if (isUnbufferedGroup(i)) {
2575 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2576 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2577 U != UE; ++U)
2578 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2579 }
2580 }
2581
2582 ReservedCycles.resize(NumUnits, InvalidCycle);
2583 }
2584}
2585
2586/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2587/// these "soft stalls" differently than the hard stall cycles based on CPU
2588/// resources and computed by checkHazard(). A fully in-order model
2589/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2590/// available for scheduling until they are ready. However, a weaker in-order
2591/// model may use this for heuristics. For example, if a processor has in-order
2592/// behavior when reading certain resources, this may come into play.
2594 if (!SU->isUnbuffered)
2595 return 0;
2596
2597 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2598 if (ReadyCycle > CurrCycle)
2599 return ReadyCycle - CurrCycle;
2600 return 0;
2601}
2602
2603/// Compute the next cycle at which the given processor resource unit
2604/// can be scheduled.
2606 unsigned ReleaseAtCycle,
2607 unsigned AcquireAtCycle) {
2609 if (isTop())
2610 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2611 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2612
2613 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2614 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2615 }
2616
2617 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2618 // If this resource has never been used, always return cycle zero.
2619 if (NextUnreserved == InvalidCycle)
2620 return CurrCycle;
2621 // For bottom-up scheduling add the cycles needed for the current operation.
2622 if (!isTop())
2623 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2624 return NextUnreserved;
2625}
2626
2627/// Compute the next cycle at which the given processor resource can be
2628/// scheduled. Returns the next cycle and the index of the processor resource
2629/// instance in the reserved cycles vector.
2630std::pair<unsigned, unsigned>
2632 unsigned ReleaseAtCycle,
2633 unsigned AcquireAtCycle) {
2635 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2637 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2638 }
2639 unsigned MinNextUnreserved = InvalidCycle;
2640 unsigned InstanceIdx = 0;
2641 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2642 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2643 assert(NumberOfInstances > 0 &&
2644 "Cannot have zero instances of a ProcResource");
2645
2646 if (isUnbufferedGroup(PIdx)) {
2647 // If any subunits are used by the instruction, report that the
2648 // subunits of the resource group are available at the first cycle
2649 // in which the unit is available, effectively removing the group
2650 // record from hazarding and basing the hazarding decisions on the
2651 // subunit records. Otherwise, choose the first available instance
2652 // from among the subunits. Specifications which assign cycles to
2653 // both the subunits and the group or which use an unbuffered
2654 // group with buffered subunits will appear to schedule
2655 // strangely. In the first case, the additional cycles for the
2656 // group will be ignored. In the second, the group will be
2657 // ignored entirely.
2658 for (const MCWriteProcResEntry &PE :
2661 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2662 return std::make_pair(getNextResourceCycleByInstance(
2663 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2664 StartIndex);
2665
2666 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2667 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2668 unsigned NextUnreserved, NextInstanceIdx;
2669 std::tie(NextUnreserved, NextInstanceIdx) =
2670 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2671 if (MinNextUnreserved > NextUnreserved) {
2672 InstanceIdx = NextInstanceIdx;
2673 MinNextUnreserved = NextUnreserved;
2674 }
2675 }
2676 return std::make_pair(MinNextUnreserved, InstanceIdx);
2677 }
2678
2679 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2680 ++I) {
2681 unsigned NextUnreserved =
2682 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2684 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2685 << NextUnreserved << "c\n");
2686 if (MinNextUnreserved > NextUnreserved) {
2687 InstanceIdx = I;
2688 MinNextUnreserved = NextUnreserved;
2689 }
2690 }
2692 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2693 << "[" << InstanceIdx - StartIndex << "]"
2694 << " available @" << MinNextUnreserved << "c"
2695 << "\n");
2696 return std::make_pair(MinNextUnreserved, InstanceIdx);
2697}
2698
2699/// Does this SU have a hazard within the current instruction group.
2700///
2701/// The scheduler supports two modes of hazard recognition. The first is the
2702/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2703/// supports highly complicated in-order reservation tables
2704/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2705///
2706/// The second is a streamlined mechanism that checks for hazards based on
2707/// simple counters that the scheduler itself maintains. It explicitly checks
2708/// for instruction dispatch limitations, including the number of micro-ops that
2709/// can dispatch per cycle.
2710///
2711/// TODO: Also check whether the SU must start a new group.
2713 if (HazardRec->isEnabled()
2716 << "hazard: SU(" << SU->NodeNum << ") reported by HazardRec\n");
2717 return true;
2718 }
2719
2720 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2721 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2722 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") uops="
2723 << uops << ", CurrMOps = " << CurrMOps << ", "
2724 << "CurrMOps + uops > issue width of "
2725 << SchedModel->getIssueWidth() << "\n");
2726 return true;
2727 }
2728
2729 if (CurrMOps > 0 &&
2730 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2731 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2732 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum << ") must "
2733 << (isTop() ? "begin" : "end") << " group\n");
2734 return true;
2735 }
2736
2738 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2739 for (const MCWriteProcResEntry &PE :
2742 unsigned ResIdx = PE.ProcResourceIdx;
2743 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2744 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2745 unsigned NRCycle, InstanceIdx;
2746 std::tie(NRCycle, InstanceIdx) =
2747 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2748 if (NRCycle > CurrCycle) {
2749#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2750 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2751#endif
2753 << "hazard: SU(" << SU->NodeNum << ") "
2754 << SchedModel->getResourceName(ResIdx) << '['
2755 << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' << "="
2756 << NRCycle << "c, is later than "
2757 << "CurrCycle = " << CurrCycle << "c\n");
2758 return true;
2759 }
2760 }
2761 }
2762 return false;
2763}
2764
2765// Find the unscheduled node in ReadySUs with the highest latency.
2768 SUnit *LateSU = nullptr;
2769 unsigned RemLatency = 0;
2770 for (SUnit *SU : ReadySUs) {
2771 unsigned L = getUnscheduledLatency(SU);
2772 if (L > RemLatency) {
2773 RemLatency = L;
2774 LateSU = SU;
2775 }
2776 }
2777 if (LateSU) {
2778 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2779 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2780 }
2781 return RemLatency;
2782}
2783
2784// Count resources in this zone and the remaining unscheduled
2785// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2786// resource index, or zero if the zone is issue limited.
2788getOtherResourceCount(unsigned &OtherCritIdx) {
2789 OtherCritIdx = 0;
2791 return 0;
2792
2793 unsigned OtherCritCount = Rem->RemIssueCount
2794 + (RetiredMOps * SchedModel->getMicroOpFactor());
2795 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2796 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2797 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2798 PIdx != PEnd; ++PIdx) {
2799 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2800 if (OtherCount > OtherCritCount) {
2801 OtherCritCount = OtherCount;
2802 OtherCritIdx = PIdx;
2803 }
2804 }
2805 if (OtherCritIdx) {
2806 LLVM_DEBUG(
2807 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2808 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2809 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2810 }
2811 return OtherCritCount;
2812}
2813
2814void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2815 unsigned Idx) {
2816 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2817
2818#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2819 // ReadyCycle was been bumped up to the CurrCycle when this node was
2820 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2821 // scheduling, so may now be greater than ReadyCycle.
2822 if (ReadyCycle > CurrCycle)
2823 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2824#endif
2825
2826 if (ReadyCycle < MinReadyCycle)
2827 MinReadyCycle = ReadyCycle;
2828
2829 // Check for interlocks first. For the purpose of other heuristics, an
2830 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2831 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2832 bool HazardDetected = !IsBuffered && ReadyCycle > CurrCycle;
2833 if (HazardDetected)
2834 LLVM_DEBUG(dbgs().indent(2) << "hazard: SU(" << SU->NodeNum
2835 << ") ReadyCycle = " << ReadyCycle
2836 << " is later than CurrCycle = " << CurrCycle
2837 << " on an unbuffered resource" << "\n");
2838 else
2839 HazardDetected = checkHazard(SU);
2840
2841 if (!HazardDetected && Available.size() >= ReadyListLimit) {
2842 HazardDetected = true;
2843 LLVM_DEBUG(dbgs().indent(2) << "hazard: Available Q is full (size: "
2844 << Available.size() << ")\n");
2845 }
2846
2847 if (!HazardDetected) {
2848 Available.push(SU);
2850 << "Move SU(" << SU->NodeNum << ") into Available Q\n");
2851
2852 if (InPQueue)
2854 return;
2855 }
2856
2857 if (!InPQueue)
2858 Pending.push(SU);
2859}
2860
2861/// Move the boundary of scheduled code by one cycle.
2862void SchedBoundary::bumpCycle(unsigned NextCycle) {
2863 if (SchedModel->getMicroOpBufferSize() == 0) {
2864 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2865 "MinReadyCycle uninitialized");
2866 if (MinReadyCycle > NextCycle)
2867 NextCycle = MinReadyCycle;
2868 }
2869 // Update the current micro-ops, which will issue in the next cycle.
2870 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2871 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2872
2873 // Decrement DependentLatency based on the next cycle.
2874 if ((NextCycle - CurrCycle) > DependentLatency)
2875 DependentLatency = 0;
2876 else
2877 DependentLatency -= (NextCycle - CurrCycle);
2878
2879 if (!HazardRec->isEnabled()) {
2880 // Bypass HazardRec virtual calls.
2881 CurrCycle = NextCycle;
2882 } else {
2883 // Bypass getHazardType calls in case of long latency.
2884 for (; CurrCycle != NextCycle; ++CurrCycle) {
2885 if (isTop())
2887 else
2889 }
2890 }
2891 CheckPending = true;
2892 IsResourceLimited =
2894 getScheduledLatency(), true);
2895
2896 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2897 << '\n');
2898}
2899
2900void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2901 ExecutedResCounts[PIdx] += Count;
2902 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2903 MaxExecutedResCount = ExecutedResCounts[PIdx];
2904}
2905
2906/// Add the given processor resource to this scheduled zone.
2907///
2908/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2909/// cycles during which this resource is released.
2910///
2911/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2912/// cycles at which the resource is aquired after issue (assuming no stalls).
2913///
2914/// \return the next cycle at which the instruction may execute without
2915/// oversubscribing resources.
2916unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2917 unsigned ReleaseAtCycle,
2918 unsigned NextCycle,
2919 unsigned AcquireAtCycle) {
2920 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2921 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2922 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2923 << ReleaseAtCycle << "x" << Factor << "u\n");
2924
2925 // Update Executed resources counts.
2926 incExecutedResources(PIdx, Count);
2927 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2928 Rem->RemainingCounts[PIdx] -= Count;
2929
2930 // Check if this resource exceeds the current critical resource. If so, it
2931 // becomes the critical resource.
2932 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2933 ZoneCritResIdx = PIdx;
2934 LLVM_DEBUG(dbgs() << " *** Critical resource "
2935 << SchedModel->getResourceName(PIdx) << ": "
2937 << "c\n");
2938 }
2939 // For reserved resources, record the highest cycle using the resource.
2940 unsigned NextAvailable, InstanceIdx;
2941 std::tie(NextAvailable, InstanceIdx) =
2942 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2943 if (NextAvailable > CurrCycle) {
2944 LLVM_DEBUG(dbgs() << " Resource conflict: "
2945 << SchedModel->getResourceName(PIdx)
2946 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2947 << " reserved until @" << NextAvailable << "\n");
2948 }
2949 return NextAvailable;
2950}
2951
2952/// Move the boundary of scheduled code by one SUnit.
2954 // Update the reservation table.
2955 if (HazardRec->isEnabled()) {
2956 if (!isTop() && SU->isCall) {
2957 // Calls are scheduled with their preceding instructions. For bottom-up
2958 // scheduling, clear the pipeline state before emitting.
2959 HazardRec->Reset();
2960 }
2962 // Scheduling an instruction may have made pending instructions available.
2963 CheckPending = true;
2964 }
2965 // checkHazard should prevent scheduling multiple instructions per cycle that
2966 // exceed the issue width.
2967 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2968 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2969 assert(
2970 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2971 "Cannot schedule this instruction's MicroOps in the current cycle.");
2972
2973 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2974 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2975
2976 unsigned NextCycle = CurrCycle;
2977 switch (SchedModel->getMicroOpBufferSize()) {
2978 case 0:
2979 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2980 break;
2981 case 1:
2982 if (ReadyCycle > NextCycle) {
2983 NextCycle = ReadyCycle;
2984 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2985 }
2986 break;
2987 default:
2988 // We don't currently model the OOO reorder buffer, so consider all
2989 // scheduled MOps to be "retired". We do loosely model in-order resource
2990 // latency. If this instruction uses an in-order resource, account for any
2991 // likely stall cycles.
2992 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2993 NextCycle = ReadyCycle;
2994 break;
2995 }
2996 RetiredMOps += IncMOps;
2997
2998 // Update resource counts and critical resource.
3000 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
3001 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
3002 Rem->RemIssueCount -= DecRemIssue;
3003 if (ZoneCritResIdx) {
3004 // Scale scheduled micro-ops for comparing with the critical resource.
3005 unsigned ScaledMOps =
3006 RetiredMOps * SchedModel->getMicroOpFactor();
3007
3008 // If scaled micro-ops are now more than the previous critical resource by
3009 // a full cycle, then micro-ops issue becomes critical.
3010 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
3011 >= (int)SchedModel->getLatencyFactor()) {
3012 ZoneCritResIdx = 0;
3013 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
3014 << ScaledMOps / SchedModel->getLatencyFactor()
3015 << "c\n");
3016 }
3017 }
3020 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3021 unsigned RCycle =
3022 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
3023 PI->AcquireAtCycle);
3024 if (RCycle > NextCycle)
3025 NextCycle = RCycle;
3026 }
3027 if (SU->hasReservedResource) {
3028 // For reserved resources, record the highest cycle using the resource.
3029 // For top-down scheduling, this is the cycle in which we schedule this
3030 // instruction plus the number of cycles the operations reserves the
3031 // resource. For bottom-up is it simply the instruction's cycle.
3034 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3035 unsigned PIdx = PI->ProcResourceIdx;
3036 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
3037
3039 unsigned ReservedUntil, InstanceIdx;
3040 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3041 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3042 if (isTop()) {
3043 ReservedResourceSegments[InstanceIdx].add(
3045 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3047 } else {
3048 ReservedResourceSegments[InstanceIdx].add(
3050 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
3052 }
3053 } else {
3054
3055 unsigned ReservedUntil, InstanceIdx;
3056 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
3057 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
3058 if (isTop()) {
3059 ReservedCycles[InstanceIdx] =
3060 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
3061 } else
3062 ReservedCycles[InstanceIdx] = NextCycle;
3063 }
3064 }
3065 }
3066 }
3067 }
3068 // Update ExpectedLatency and DependentLatency.
3069 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
3070 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
3071 if (SU->getDepth() > TopLatency) {
3072 TopLatency = SU->getDepth();
3073 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
3074 << SU->NodeNum << ") " << TopLatency << "c\n");
3075 }
3076 if (SU->getHeight() > BotLatency) {
3077 BotLatency = SU->getHeight();
3078 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
3079 << SU->NodeNum << ") " << BotLatency << "c\n");
3080 }
3081 // If we stall for any reason, bump the cycle.
3082 if (NextCycle > CurrCycle)
3083 bumpCycle(NextCycle);
3084 else
3085 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
3086 // resource limited. If a stall occurred, bumpCycle does this.
3087 IsResourceLimited =
3089 getScheduledLatency(), true);
3090
3091 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
3092 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
3093 // one cycle. Since we commonly reach the max MOps here, opportunistically
3094 // bump the cycle to avoid uselessly checking everything in the readyQ.
3095 CurrMOps += IncMOps;
3096
3097 // Bump the cycle count for issue group constraints.
3098 // This must be done after NextCycle has been adjust for all other stalls.
3099 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
3100 // currCycle to X.
3101 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
3102 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
3103 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
3104 << " group\n");
3105 bumpCycle(++NextCycle);
3106 }
3107
3108 while (CurrMOps >= SchedModel->getIssueWidth()) {
3109 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
3110 << CurrCycle << '\n');
3111 bumpCycle(++NextCycle);
3112 }
3114}
3115
3116/// Release pending ready nodes in to the available queue. This makes them
3117/// visible to heuristics.
3119 // If the available queue is empty, it is safe to reset MinReadyCycle.
3120 if (Available.empty())
3121 MinReadyCycle = std::numeric_limits<unsigned>::max();
3122
3123 // Check to see if any of the pending instructions are ready to issue. If
3124 // so, add them to the available queue.
3125 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
3126 SUnit *SU = *(Pending.begin() + I);
3127 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
3128
3129 LLVM_DEBUG(dbgs() << "Checking pending node SU(" << SU->NodeNum << ")\n");
3130
3131 if (ReadyCycle < MinReadyCycle)
3132 MinReadyCycle = ReadyCycle;
3133
3134 if (Available.size() >= ReadyListLimit)
3135 break;
3136
3137 releaseNode(SU, ReadyCycle, true, I);
3138 if (E != Pending.size()) {
3139 --I;
3140 --E;
3141 }
3142 }
3143 CheckPending = false;
3144}
3145
3146/// Remove SU from the ready set for this boundary.
3148 if (Available.isInQueue(SU))
3150 else {
3151 assert(Pending.isInQueue(SU) && "bad ready count");
3153 }
3154}
3155
3156/// If this queue only has one ready candidate, return it. As a side effect,
3157/// defer any nodes that now hit a hazard, and advance the cycle until at least
3158/// one node is ready. If multiple instructions are ready, return NULL.
3160 if (CheckPending)
3162
3163 // Defer any ready instrs that now have a hazard.
3165 if (checkHazard(*I)) {
3166 Pending.push(*I);
3167 I = Available.remove(I);
3168 continue;
3169 }
3170 ++I;
3171 }
3172 for (unsigned i = 0; Available.empty(); ++i) {
3173// FIXME: Re-enable assert once PR20057 is resolved.
3174// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
3175// "permanent hazard");
3176 (void)i;
3177 bumpCycle(CurrCycle + 1);
3179 }
3180
3183
3184 if (Available.size() == 1)
3185 return *Available.begin();
3186 return nullptr;
3187}
3188
3189#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3190
3191/// Dump the content of the \ref ReservedCycles vector for the
3192/// resources that are used in the basic block.
3193///
3196 return;
3197
3198 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
3199 unsigned StartIdx = 0;
3200
3201 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
3202 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
3203 std::string ResName = SchedModel->getResourceName(ResIdx);
3204 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
3205 dbgs() << ResName << "(" << UnitIdx << ") = ";
3207 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
3208 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
3209 else
3210 dbgs() << "{ }\n";
3211 } else
3212 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
3213 }
3214 StartIdx += NumUnits;
3215 }
3216}
3217
3218// This is useful information to dump after bumpNode.
3219// Note that the Queue contents are more useful before pickNodeFromQueue.
3221 unsigned ResFactor;
3222 unsigned ResCount;
3223 if (ZoneCritResIdx) {
3224 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
3225 ResCount = getResourceCount(ZoneCritResIdx);
3226 } else {
3227 ResFactor = SchedModel->getMicroOpFactor();
3228 ResCount = RetiredMOps * ResFactor;
3229 }
3230 unsigned LFactor = SchedModel->getLatencyFactor();
3231 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
3232 << " Retired: " << RetiredMOps;
3233 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
3234 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
3235 << ResCount / ResFactor << " "
3236 << SchedModel->getResourceName(ZoneCritResIdx)
3237 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
3238 << (IsResourceLimited ? " - Resource" : " - Latency")
3239 << " limited.\n";
3242}
3243#endif
3244
3245//===----------------------------------------------------------------------===//
3246// GenericScheduler - Generic implementation of MachineSchedStrategy.
3247//===----------------------------------------------------------------------===//
3248
3251 const TargetSchedModel *SchedModel) {
3253 return;
3254
3255 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
3258 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3259 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
3260 ResDelta.CritResources += PI->ReleaseAtCycle;
3261 if (PI->ProcResourceIdx == Policy.DemandResIdx)
3262 ResDelta.DemandedResources += PI->ReleaseAtCycle;
3263 }
3264}
3265
3266/// Compute remaining latency. We need this both to determine whether the
3267/// overall schedule has become latency-limited and whether the instructions
3268/// outside this zone are resource or latency limited.
3269///
3270/// The "dependent" latency is updated incrementally during scheduling as the
3271/// max height/depth of scheduled nodes minus the cycles since it was
3272/// scheduled:
3273/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
3274///
3275/// The "independent" latency is the max ready queue depth:
3276/// ILat = max N.depth for N in Available|Pending
3277///
3278/// RemainingLatency is the greater of independent and dependent latency.
3279///
3280/// These computations are expensive, especially in DAGs with many edges, so
3281/// only do them if necessary.
3282static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3283 unsigned RemLatency = CurrZone.getDependentLatency();
3284 RemLatency = std::max(RemLatency,
3285 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3286 RemLatency = std::max(RemLatency,
3287 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3288 return RemLatency;
3289}
3290
3291/// Returns true if the current cycle plus remaning latency is greater than
3292/// the critical path in the scheduling region.
3293bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3294 SchedBoundary &CurrZone,
3295 bool ComputeRemLatency,
3296 unsigned &RemLatency) const {
3297 // The current cycle is already greater than the critical path, so we are
3298 // already latency limited and don't need to compute the remaining latency.
3299 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3300 return true;
3301
3302 // If we haven't scheduled anything yet, then we aren't latency limited.
3303 if (CurrZone.getCurrCycle() == 0)
3304 return false;
3305
3306 if (ComputeRemLatency)
3307 RemLatency = computeRemLatency(CurrZone);
3308
3309 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3310}
3311
3312/// Set the CandPolicy given a scheduling zone given the current resources and
3313/// latencies inside and outside the zone.
3315 SchedBoundary &CurrZone,
3316 SchedBoundary *OtherZone) {
3317 // Apply preemptive heuristics based on the total latency and resources
3318 // inside and outside this zone. Potential stalls should be considered before
3319 // following this policy.
3320
3321 // Compute the critical resource outside the zone.
3322 unsigned OtherCritIdx = 0;
3323 unsigned OtherCount =
3324 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3325
3326 bool OtherResLimited = false;
3327 unsigned RemLatency = 0;
3328 bool RemLatencyComputed = false;
3329 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3330 RemLatency = computeRemLatency(CurrZone);
3331 RemLatencyComputed = true;
3332 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3333 OtherCount, RemLatency, false);
3334 }
3335
3336 // Schedule aggressively for latency in PostRA mode. We don't check for
3337 // acyclic latency during PostRA, and highly out-of-order processors will
3338 // skip PostRA scheduling.
3339 if (!OtherResLimited &&
3340 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3341 RemLatency))) {
3342 Policy.ReduceLatency |= true;
3343 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3344 << " RemainingLatency " << RemLatency << " + "
3345 << CurrZone.getCurrCycle() << "c > CritPath "
3346 << Rem.CriticalPath << "\n");
3347 }
3348 // If the same resource is limiting inside and outside the zone, do nothing.
3349 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3350 return;
3351
3352 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3353 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3354 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3355 } if (OtherResLimited) dbgs()
3356 << " RemainingLimit: "
3357 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3358 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3359 << " Latency limited both directions.\n");
3360
3361 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3362 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3363
3364 if (OtherResLimited)
3365 Policy.DemandResIdx = OtherCritIdx;
3366}
3367
3368#ifndef NDEBUG
3371 // clang-format off
3372 switch (Reason) {
3373 case NoCand: return "NOCAND ";
3374 case Only1: return "ONLY1 ";
3375 case PhysReg: return "PHYS-REG ";
3376 case RegExcess: return "REG-EXCESS";
3377 case RegCritical: return "REG-CRIT ";
3378 case Stall: return "STALL ";
3379 case Cluster: return "CLUSTER ";
3380 case Weak: return "WEAK ";
3381 case RegMax: return "REG-MAX ";
3382 case ResourceReduce: return "RES-REDUCE";
3383 case ResourceDemand: return "RES-DEMAND";
3384 case TopDepthReduce: return "TOP-DEPTH ";
3385 case TopPathReduce: return "TOP-PATH ";
3386 case BotHeightReduce:return "BOT-HEIGHT";
3387 case BotPathReduce: return "BOT-PATH ";
3388 case NodeOrder: return "ORDER ";
3389 case FirstValid: return "FIRST ";
3390 };
3391 // clang-format on
3392 llvm_unreachable("Unknown reason!");
3393}
3394
3397 unsigned ResIdx = 0;
3398 unsigned Latency = 0;
3399 switch (Cand.Reason) {
3400 default:
3401 break;
3402 case RegExcess:
3403 P = Cand.RPDelta.Excess;
3404 break;
3405 case RegCritical:
3406 P = Cand.RPDelta.CriticalMax;
3407 break;
3408 case RegMax:
3409 P = Cand.RPDelta.CurrentMax;
3410 break;
3411 case ResourceReduce:
3412 ResIdx = Cand.Policy.ReduceResIdx;
3413 break;
3414 case ResourceDemand:
3415 ResIdx = Cand.Policy.DemandResIdx;
3416 break;
3417 case TopDepthReduce:
3418 Latency = Cand.SU->getDepth();
3419 break;
3420 case TopPathReduce:
3421 Latency = Cand.SU->getHeight();
3422 break;
3423 case BotHeightReduce:
3424 Latency = Cand.SU->getHeight();
3425 break;
3426 case BotPathReduce:
3427 Latency = Cand.SU->getDepth();
3428 break;
3429 }
3430 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3431 if (P.isValid())
3432 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3433 << ":" << P.getUnitInc() << " ";
3434 else
3435 dbgs() << " ";
3436 if (ResIdx)
3437 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3438 else
3439 dbgs() << " ";
3440 if (Latency)
3441 dbgs() << " " << Latency << " cycles ";
3442 else
3443 dbgs() << " ";
3444 dbgs() << '\n';
3445}
3446#endif
3447
3448namespace llvm {
3449/// Return true if this heuristic determines order.
3450/// TODO: Consider refactor return type of these functions as integer or enum,
3451/// as we may need to differentiate whether TryCand is better than Cand.
3452bool tryLess(int TryVal, int CandVal,
3456 if (TryVal < CandVal) {
3457 TryCand.Reason = Reason;
3458 return true;
3459 }
3460 if (TryVal > CandVal) {
3461 if (Cand.Reason > Reason)
3462 Cand.Reason = Reason;
3463 return true;
3464 }
3465 return false;
3466}
3467
3468bool tryGreater(int TryVal, int CandVal,
3472 if (TryVal > CandVal) {
3473 TryCand.Reason = Reason;
3474 return true;
3475 }
3476 if (TryVal < CandVal) {
3477 if (Cand.Reason > Reason)
3478 Cand.Reason = Reason;
3479 return true;
3480 }
3481 return false;
3482}
3483
3486 SchedBoundary &Zone) {
3487 if (Zone.isTop()) {
3488 // Prefer the candidate with the lesser depth, but only if one of them has
3489 // depth greater than the total latency scheduled so far, otherwise either
3490 // of them could be scheduled now with no stall.
3491 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3492 Zone.getScheduledLatency()) {
3493 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3495 return true;
3496 }
3497 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3499 return true;
3500 } else {
3501 // Prefer the candidate with the lesser height, but only if one of them has
3502 // height greater than the total latency scheduled so far, otherwise either
3503 // of them could be scheduled now with no stall.
3504 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3505 Zone.getScheduledLatency()) {
3506 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3508 return true;
3509 }
3510 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3512 return true;
3513 }
3514 return false;
3515}
3516} // end namespace llvm
3517
3518static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop,
3519 bool IsPostRA = false) {
3520 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3521 << GenericSchedulerBase::getReasonStr(Reason) << " ["
3522 << (IsPostRA ? "post-RA" : "pre-RA") << "]\n");
3523
3524 if (IsPostRA) {
3525 if (IsTop)
3526 NumTopPostRA++;
3527 else
3528 NumBotPostRA++;
3529
3530 switch (Reason) {
3532 NumNoCandPostRA++;
3533 return;
3535 NumOnly1PostRA++;
3536 return;
3538 NumPhysRegPostRA++;
3539 return;
3541 NumRegExcessPostRA++;
3542 return;
3544 NumRegCriticalPostRA++;
3545 return;
3547 NumStallPostRA++;
3548 return;
3550 NumClusterPostRA++;
3551 return;
3553 NumWeakPostRA++;
3554 return;
3556 NumRegMaxPostRA++;
3557 return;
3559 NumResourceReducePostRA++;
3560 return;
3562 NumResourceDemandPostRA++;
3563 return;
3565 NumTopDepthReducePostRA++;
3566 return;
3568 NumTopPathReducePostRA++;
3569 return;
3571 NumBotHeightReducePostRA++;
3572 return;
3574 NumBotPathReducePostRA++;
3575 return;
3577 NumNodeOrderPostRA++;
3578 return;
3580 NumFirstValidPostRA++;
3581 return;
3582 };
3583 } else {
3584 if (IsTop)
3585 NumTopPreRA++;
3586 else
3587 NumBotPreRA++;
3588
3589 switch (Reason) {
3591 NumNoCandPreRA++;
3592 return;
3594 NumOnly1PreRA++;
3595 return;
3597 NumPhysRegPreRA++;
3598 return;
3600 NumRegExcessPreRA++;
3601 return;
3603 NumRegCriticalPreRA++;
3604 return;
3606 NumStallPreRA++;
3607 return;
3609 NumClusterPreRA++;
3610 return;
3612 NumWeakPreRA++;
3613 return;
3615 NumRegMaxPreRA++;
3616 return;
3618 NumResourceReducePreRA++;
3619 return;
3621 NumResourceDemandPreRA++;
3622 return;
3624 NumTopDepthReducePreRA++;
3625 return;
3627 NumTopPathReducePreRA++;
3628 return;
3630 NumBotHeightReducePreRA++;
3631 return;
3633 NumBotPathReducePreRA++;
3634 return;
3636 NumNodeOrderPreRA++;
3637 return;
3639 NumFirstValidPreRA++;
3640 return;
3641 };
3642 }
3643 llvm_unreachable("Unknown reason!");
3644}
3645
3647 bool IsPostRA = false) {
3648 tracePick(Cand.Reason, Cand.AtTop, IsPostRA);
3649}
3650
3652 assert(dag->hasVRegLiveness() &&
3653 "(PreRA)GenericScheduler needs vreg liveness");
3654 DAG = static_cast<ScheduleDAGMILive*>(dag);
3655 SchedModel = DAG->getSchedModel();
3656 TRI = DAG->TRI;
3657
3659 DAG->computeDFSResult();
3660
3661 Rem.init(DAG, SchedModel);
3662 Top.init(DAG, SchedModel, &Rem);
3663 Bot.init(DAG, SchedModel, &Rem);
3664
3665 // Initialize resource counts.
3666
3667 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3668 // are disabled, then these HazardRecs will be disabled.
3670 if (!Top.HazardRec) {
3671 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3672 }
3673 if (!Bot.HazardRec) {
3674 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3675 }
3676 TopCand.SU = nullptr;
3677 BotCand.SU = nullptr;
3678
3679 TopClusterID = InvalidClusterId;
3680 BotClusterID = InvalidClusterId;
3681}
3682
3683/// Initialize the per-region scheduling policy.
3686 unsigned NumRegionInstrs) {
3687 const MachineFunction &MF = *Begin->getMF();
3688 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3689
3690 // Avoid setting up the register pressure tracker for small regions to save
3691 // compile time. As a rough heuristic, only track pressure when the number of
3692 // schedulable instructions exceeds half the allocatable integer register file
3693 // that is the largest legal integer regiser type.
3695 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3697 if (TLI->isTypeLegal(LegalIntVT)) {
3698 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3699 TLI->getRegClassFor(LegalIntVT));
3701 break;
3702 }
3703 }
3704
3705 // For generic targets, we default to bottom-up, because it's simpler and more
3706 // compile-time optimizations have been implemented in that direction.
3708
3709 // Allow the subtarget to override default policy.
3712
3713 // After subtarget overrides, apply command line options.
3714 if (!EnableRegPressure) {
3717 }
3718
3721 RegionPolicy.OnlyBottomUp = false;
3722 } else if (PreRADirection == MISched::BottomUp) {
3723 RegionPolicy.OnlyTopDown = false;
3725 } else if (PreRADirection == MISched::Bidirectional) {
3726 RegionPolicy.OnlyBottomUp = false;
3727 RegionPolicy.OnlyTopDown = false;
3728 }
3729
3730 BotIdx = NumRegionInstrs - 1;
3731 this->NumRegionInstrs = NumRegionInstrs;
3732}
3733
3735 // Cannot completely remove virtual function even in release mode.
3736#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3737 dbgs() << "GenericScheduler RegionPolicy: "
3738 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3739 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3740 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3741 << "\n";
3742#endif
3743}
3744
3745/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3746/// critical path by more cycles than it takes to drain the instruction buffer.
3747/// We estimate an upper bounds on in-flight instructions as:
3748///
3749/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3750/// InFlightIterations = AcyclicPath / CyclesPerIteration
3751/// InFlightResources = InFlightIterations * LoopResources
3752///
3753/// TODO: Check execution resources in addition to IssueCount.
3756 return;
3757
3758 // Scaled number of cycles per loop iteration.
3759 unsigned IterCount =
3762 // Scaled acyclic critical path.
3763 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3764 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3765 unsigned InFlightCount =
3766 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3767 unsigned BufferLimit =
3769
3770 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3771
3772 LLVM_DEBUG(
3773 dbgs() << "IssueCycles="
3775 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3776 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3777 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3778 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3779 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3780}
3781
3784
3785 // Some roots may not feed into ExitSU. Check all of them in case.
3786 for (const SUnit *SU : Bot.Available) {
3787 if (SU->getDepth() > Rem.CriticalPath)
3788 Rem.CriticalPath = SU->getDepth();
3789 }
3790 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3792 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3793 }
3794
3796 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3797 checkAcyclicLatency();
3798 }
3799}
3800
3801namespace llvm {
3803 const PressureChange &CandP,
3807 const TargetRegisterInfo *TRI,
3808 const MachineFunction &MF) {
3809 // If one candidate decreases and the other increases, go with it.
3810 // Invalid candidates have UnitInc==0.
3811 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3812 Reason)) {
3813 return true;
3814 }
3815 // Do not compare the magnitude of pressure changes between top and bottom
3816 // boundary.
3817 if (Cand.AtTop != TryCand.AtTop)
3818 return false;
3819
3820 // If both candidates affect the same set in the same boundary, go with the
3821 // smallest increase.
3822 unsigned TryPSet = TryP.getPSetOrMax();
3823 unsigned CandPSet = CandP.getPSetOrMax();
3824 if (TryPSet == CandPSet) {
3825 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3826 Reason);
3827 }
3828
3829 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3830 std::numeric_limits<int>::max();
3831
3832 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3833 std::numeric_limits<int>::max();
3834
3835 // If the candidates are decreasing pressure, reverse priority.
3836 if (TryP.getUnitInc() < 0)
3837 std::swap(TryRank, CandRank);
3838 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3839}
3840
3841unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3842 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3843}
3844
3845/// Minimize physical register live ranges. Regalloc wants them adjacent to
3846/// their physreg def/use.
3847///
3848/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3849/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3850/// with the operation that produces or consumes the physreg. We'll do this when
3851/// regalloc has support for parallel copies.
3852int biasPhysReg(const SUnit *SU, bool isTop) {
3853 const MachineInstr *MI = SU->getInstr();
3854
3855 if (MI->isCopy()) {
3856 unsigned ScheduledOper = isTop ? 1 : 0;
3857 unsigned UnscheduledOper = isTop ? 0 : 1;
3858 // If we have already scheduled the physreg produce/consumer, immediately
3859 // schedule the copy.
3860 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3861 return 1;
3862 // If the physreg is at the boundary, defer it. Otherwise schedule it
3863 // immediately to free the dependent. We can hoist the copy later.
3864 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3865 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3866 return AtBoundary ? -1 : 1;
3867 }
3868
3869 if (MI->isMoveImmediate()) {
3870 // If we have a move immediate and all successors have been assigned, bias
3871 // towards scheduling this later. Make sure all register defs are to
3872 // physical registers.
3873 bool DoBias = true;
3874 for (const MachineOperand &Op : MI->defs()) {
3875 if (Op.isReg() && !Op.getReg().isPhysical()) {
3876 DoBias = false;
3877 break;
3878 }
3879 }
3880
3881 if (DoBias)
3882 return isTop ? -1 : 1;
3883 }
3884
3885 return 0;
3886}
3887} // end namespace llvm
3888
3890 bool AtTop,
3891 const RegPressureTracker &RPTracker,
3892 RegPressureTracker &TempTracker) {
3893 Cand.SU = SU;
3894 Cand.AtTop = AtTop;
3895 if (DAG->isTrackingPressure()) {
3896 if (AtTop) {
3897 TempTracker.getMaxDownwardPressureDelta(
3898 Cand.SU->getInstr(),
3899 Cand.RPDelta,
3900 DAG->getRegionCriticalPSets(),
3901 DAG->getRegPressure().MaxSetPressure);
3902 } else {
3903 if (VerifyScheduling) {
3904 TempTracker.getMaxUpwardPressureDelta(
3905 Cand.SU->getInstr(),
3906 &DAG->getPressureDiff(Cand.SU),
3907 Cand.RPDelta,
3908 DAG->getRegionCriticalPSets(),
3909 DAG->getRegPressure().MaxSetPressure);
3910 } else {
3911 RPTracker.getUpwardPressureDelta(
3912 Cand.SU->getInstr(),
3913 DAG->getPressureDiff(Cand.SU),
3914 Cand.RPDelta,
3915 DAG->getRegionCriticalPSets(),
3916 DAG->getRegPressure().MaxSetPressure);
3917 }
3918 }
3919 }
3920 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3921 << " Try SU(" << Cand.SU->NodeNum << ") "
3923 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3924}
3925
3926/// Apply a set of heuristics to a new candidate. Heuristics are currently
3927/// hierarchical. This may be more efficient than a graduated cost model because
3928/// we don't need to evaluate all aspects of the model for each node in the
3929/// queue. But it's really done to make the heuristics easier to debug and
3930/// statistically analyze.
3931///
3932/// \param Cand provides the policy and current best candidate.
3933/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3934/// \param Zone describes the scheduled zone that we are extending, or nullptr
3935/// if Cand is from a different zone than TryCand.
3936/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3938 SchedCandidate &TryCand,
3939 SchedBoundary *Zone) const {
3940 // Initialize the candidate if needed.
3941 if (!Cand.isValid()) {
3942 TryCand.Reason = FirstValid;
3943 return true;
3944 }
3945
3946 // Bias PhysReg Defs and copies to their uses and defined respectively.
3947 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3948 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3949 return TryCand.Reason != NoCand;
3950
3951 // Avoid exceeding the target's limit.
3952 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3953 Cand.RPDelta.Excess,
3954 TryCand, Cand, RegExcess, TRI,
3955 DAG->MF))
3956 return TryCand.Reason != NoCand;
3957
3958 // Avoid increasing the max critical pressure in the scheduled region.
3959 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3960 Cand.RPDelta.CriticalMax,
3961 TryCand, Cand, RegCritical, TRI,
3962 DAG->MF))
3963 return TryCand.Reason != NoCand;
3964
3965 // We only compare a subset of features when comparing nodes between
3966 // Top and Bottom boundary. Some properties are simply incomparable, in many
3967 // other instances we should only override the other boundary if something
3968 // is a clear good pick on one boundary. Skip heuristics that are more
3969 // "tie-breaking" in nature.
3970 bool SameBoundary = Zone != nullptr;
3971 if (SameBoundary) {
3972 // For loops that are acyclic path limited, aggressively schedule for
3973 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3974 // heuristics to take precedence.
3975 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3976 tryLatency(TryCand, Cand, *Zone))
3977 return TryCand.Reason != NoCand;
3978
3979 // Prioritize instructions that read unbuffered resources by stall cycles.
3980 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3981 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3982 return TryCand.Reason != NoCand;
3983 }
3984
3985 // Keep clustered nodes together to encourage downstream peephole
3986 // optimizations which may reduce resource requirements.
3987 //
3988 // This is a best effort to set things up for a post-RA pass. Optimizations
3989 // like generating loads of multiple registers should ideally be done within
3990 // the scheduler pass by combining the loads during DAG postprocessing.
3991 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
3992 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
3993 bool CandIsClusterSucc =
3994 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
3995 bool TryCandIsClusterSucc =
3996 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
3997
3998 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
3999 Cluster))
4000 return TryCand.Reason != NoCand;
4001
4002 if (SameBoundary) {
4003 // Weak edges are for clustering and other constraints.
4004 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
4005 getWeakLeft(Cand.SU, Cand.AtTop),
4006 TryCand, Cand, Weak))
4007 return TryCand.Reason != NoCand;
4008 }
4009
4010 // Avoid increasing the max pressure of the entire region.
4011 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
4012 Cand.RPDelta.CurrentMax,
4013 TryCand, Cand, RegMax, TRI,
4014 DAG->MF))
4015 return TryCand.Reason != NoCand;
4016
4017 if (SameBoundary) {
4018 // Avoid critical resource consumption and balance the schedule.
4019 TryCand.initResourceDelta(DAG, SchedModel);
4021 TryCand, Cand, ResourceReduce))
4022 return TryCand.Reason != NoCand;
4025 TryCand, Cand, ResourceDemand))
4026 return TryCand.Reason != NoCand;
4027
4028 // Avoid serializing long latency dependence chains.
4029 // For acyclic path limited loops, latency was already checked above.
4031 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
4032 return TryCand.Reason != NoCand;
4033
4034 // Fall through to original instruction order.
4035 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
4036 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
4037 TryCand.Reason = NodeOrder;
4038 return true;
4039 }
4040 }
4041
4042 return false;
4043}
4044
4045/// Pick the best candidate from the queue.
4046///
4047/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
4048/// DAG building. To adjust for the current scheduling location we need to
4049/// maintain the number of vreg uses remaining to be top-scheduled.
4051 const CandPolicy &ZonePolicy,
4052 const RegPressureTracker &RPTracker,
4053 SchedCandidate &Cand) {
4054 // getMaxPressureDelta temporarily modifies the tracker.
4055 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
4056
4057 ReadyQueue &Q = Zone.Available;
4058 for (SUnit *SU : Q) {
4059
4060 SchedCandidate TryCand(ZonePolicy);
4061 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
4062 // Pass SchedBoundary only when comparing nodes from the same boundary.
4063 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
4064 if (tryCandidate(Cand, TryCand, ZoneArg)) {
4065 // Initialize resource delta if needed in case future heuristics query it.
4066 if (TryCand.ResDelta == SchedResourceDelta())
4067 TryCand.initResourceDelta(DAG, SchedModel);
4068 Cand.setBest(TryCand);
4070 }
4071 }
4072}
4073
4074/// Pick the best candidate node from either the top or bottom queue.
4076 // Schedule as far as possible in the direction of no choice. This is most
4077 // efficient, but also provides the best heuristics for CriticalPSets.
4078 if (SUnit *SU = Bot.pickOnlyChoice()) {
4079 IsTopNode = false;
4080 tracePick(Only1, /*IsTopNode=*/false);
4081 return SU;
4082 }
4083 if (SUnit *SU = Top.pickOnlyChoice()) {
4084 IsTopNode = true;
4085 tracePick(Only1, /*IsTopNode=*/true);
4086 return SU;
4087 }
4088 // Set the bottom-up policy based on the state of the current bottom zone and
4089 // the instructions outside the zone, including the top zone.
4090 CandPolicy BotPolicy;
4091 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
4092 // Set the top-down policy based on the state of the current top zone and
4093 // the instructions outside the zone, including the bottom zone.
4094 CandPolicy TopPolicy;
4095 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
4096
4097 // See if BotCand is still valid (because we previously scheduled from Top).
4098 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4099 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4100 BotCand.Policy != BotPolicy) {
4101 BotCand.reset(CandPolicy());
4102 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
4103 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4104 } else {
4105 LLVM_DEBUG(traceCandidate(BotCand));
4106#ifndef NDEBUG
4107 if (VerifyScheduling) {
4108 SchedCandidate TCand;
4109 TCand.reset(CandPolicy());
4110 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
4111 assert(TCand.SU == BotCand.SU &&
4112 "Last pick result should correspond to re-picking right now");
4113 }
4114#endif
4115 }
4116
4117 // Check if the top Q has a better candidate.
4118 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4119 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4120 TopCand.Policy != TopPolicy) {
4121 TopCand.reset(CandPolicy());
4122 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
4123 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4124 } else {
4125 LLVM_DEBUG(traceCandidate(TopCand));
4126#ifndef NDEBUG
4127 if (VerifyScheduling) {
4128 SchedCandidate TCand;
4129 TCand.reset(CandPolicy());
4130 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
4131 assert(TCand.SU == TopCand.SU &&
4132 "Last pick result should correspond to re-picking right now");
4133 }
4134#endif
4135 }
4136
4137 // Pick best from BotCand and TopCand.
4138 assert(BotCand.isValid());
4139 assert(TopCand.isValid());
4140 SchedCandidate Cand = BotCand;
4141 TopCand.Reason = NoCand;
4142 if (tryCandidate(Cand, TopCand, nullptr)) {
4143 Cand.setBest(TopCand);
4145 }
4146
4147 IsTopNode = Cand.AtTop;
4148 tracePick(Cand);
4149 return Cand.SU;
4150}
4151
4152/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
4154 if (DAG->top() == DAG->bottom()) {
4155 assert(Top.Available.empty() && Top.Pending.empty() &&
4156 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4157 return nullptr;
4158 }
4159 SUnit *SU;
4160 do {
4162 SU = Top.pickOnlyChoice();
4163 if (!SU) {
4164 CandPolicy NoPolicy;
4165 TopCand.reset(NoPolicy);
4166 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
4167 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4168 tracePick(TopCand);
4169 SU = TopCand.SU;
4170 }
4171 IsTopNode = true;
4172 } else if (RegionPolicy.OnlyBottomUp) {
4173 SU = Bot.pickOnlyChoice();
4174 if (!SU) {
4175 CandPolicy NoPolicy;
4176 BotCand.reset(NoPolicy);
4177 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
4178 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4179 tracePick(BotCand);
4180 SU = BotCand.SU;
4181 }
4182 IsTopNode = false;
4183 } else {
4184 SU = pickNodeBidirectional(IsTopNode);
4185 }
4186 } while (SU->isScheduled);
4187
4188 // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
4189 // if isTopReady(), then SU is in either Top.Available or Top.Pending.
4190 // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
4191 // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
4192 //
4193 // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
4194 // isBottomReady. That is, it didn't factor into the decision to choose SU
4195 // because it isTopReady or isBottomReady, respectively. In fact, if the
4196 // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
4197 // queues respectivley contain the original roots and don't get updated when
4198 // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
4199 // because we schduled everything but the top roots. Conversley, if SU
4200 // isBottomReady on OnlyTopDown, then it was because we scheduled everything
4201 // but the bottom roots. If its in a queue even coincidentally, it should be
4202 // removed so it does not get re-picked in a subsequent pickNode call.
4203 if (SU->isTopReady())
4204 Top.removeReady(SU);
4205 if (SU->isBottomReady())
4206 Bot.removeReady(SU);
4207
4208 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4209 << *SU->getInstr());
4210
4211 if (IsTopNode) {
4212 if (SU->NodeNum == TopIdx++)
4213 ++NumInstrsInSourceOrderPreRA;
4214 } else {
4215 assert(BotIdx < NumRegionInstrs && "out of bounds");
4216 if (SU->NodeNum == BotIdx--)
4217 ++NumInstrsInSourceOrderPreRA;
4218 }
4219
4220 NumInstrsScheduledPreRA += 1;
4221
4222 return SU;
4223}
4224
4226 MachineBasicBlock::iterator InsertPos = SU->getInstr();
4227 if (!isTop)
4228 ++InsertPos;
4229 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
4230
4231 // Find already scheduled copies with a single physreg dependence and move
4232 // them just above the scheduled instruction.
4233 for (SDep &Dep : Deps) {
4234 if (Dep.getKind() != SDep::Data || !Dep.getReg().isPhysical())
4235 continue;
4236 SUnit *DepSU = Dep.getSUnit();
4237 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
4238 continue;
4239 MachineInstr *Copy = DepSU->getInstr();
4240 if (!Copy->isCopy() && !Copy->isMoveImmediate())
4241 continue;
4242 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
4243 DAG->dumpNode(*Dep.getSUnit()));
4244 DAG->moveInstruction(Copy, InsertPos);
4245 }
4246}
4247
4248/// Update the scheduler's state after scheduling a node. This is the same node
4249/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
4250/// update it's state based on the current cycle before MachineSchedStrategy
4251/// does.
4252///
4253/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
4254/// them here. See comments in biasPhysReg.
4255void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4256 if (IsTopNode) {
4257 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4258 TopClusterID = SU->ParentClusterIdx;
4259 LLVM_DEBUG({
4260 if (TopClusterID != InvalidClusterId) {
4261 ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
4262 dbgs() << " Top Cluster: ";
4263 for (auto *N : *TopCluster)
4264 dbgs() << N->NodeNum << '\t';
4265 dbgs() << '\n';
4266 }
4267 });
4268 Top.bumpNode(SU);
4269 if (SU->hasPhysRegUses)
4270 reschedulePhysReg(SU, true);
4271 } else {
4272 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4273 BotClusterID = SU->ParentClusterIdx;
4274 LLVM_DEBUG({
4275 if (BotClusterID != InvalidClusterId) {
4276 ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
4277 dbgs() << " Bot Cluster: ";
4278 for (auto *N : *BotCluster)
4279 dbgs() << N->NodeNum << '\t';
4280 dbgs() << '\n';
4281 }
4282 });
4283 Bot.bumpNode(SU);
4284 if (SU->hasPhysRegDefs)
4285 reschedulePhysReg(SU, false);
4286 }
4287}
4288
4290 return createSchedLive(C);
4291}
4292
4294GenericSchedRegistry("converge", "Standard converging scheduler.",
4296
4297//===----------------------------------------------------------------------===//
4298// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
4299//===----------------------------------------------------------------------===//
4300
4302 DAG = Dag;
4303 SchedModel = DAG->getSchedModel();
4304 TRI = DAG->TRI;
4305
4306 Rem.init(DAG, SchedModel);
4307 Top.init(DAG, SchedModel, &Rem);
4308 Bot.init(DAG, SchedModel, &Rem);
4309
4310 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
4311 // or are disabled, then these HazardRecs will be disabled.
4313 if (!Top.HazardRec) {
4314 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4315 }
4316 if (!Bot.HazardRec) {
4317 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4318 }
4319 TopClusterID = InvalidClusterId;
4320 BotClusterID = InvalidClusterId;
4321}
4322
4325 unsigned NumRegionInstrs) {
4326 const MachineFunction &MF = *Begin->getMF();
4327
4328 // Default to top-down because it was implemented first and existing targets
4329 // expect that behavior by default.
4331 RegionPolicy.OnlyBottomUp = false;
4332
4333 // Allow the subtarget to override default policy.
4336
4337 // After subtarget overrides, apply command line options.
4340 RegionPolicy.OnlyBottomUp = false;
4341 } else if (PostRADirection == MISched::BottomUp) {
4342 RegionPolicy.OnlyTopDown = false;
4345 RegionPolicy.OnlyBottomUp = false;
4346 RegionPolicy.OnlyTopDown = false;
4347 }
4348
4349 BotIdx = NumRegionInstrs - 1;
4350 this->NumRegionInstrs = NumRegionInstrs;
4351}
4352
4355
4356 // Some roots may not feed into ExitSU. Check all of them in case.
4357 for (const SUnit *SU : Bot.Available) {
4358 if (SU->getDepth() > Rem.CriticalPath)
4359 Rem.CriticalPath = SU->getDepth();
4360 }
4361 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
4363 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
4364 }
4365}
4366
4367/// Apply a set of heuristics to a new candidate for PostRA scheduling.
4368///
4369/// \param Cand provides the policy and current best candidate.
4370/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
4371/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
4373 SchedCandidate &TryCand) {
4374 // Initialize the candidate if needed.
4375 if (!Cand.isValid()) {
4376 TryCand.Reason = FirstValid;
4377 return true;
4378 }
4379
4380 // Prioritize instructions that read unbuffered resources by stall cycles.
4381 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
4382 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
4383 return TryCand.Reason != NoCand;
4384
4385 // Keep clustered nodes together.
4386 unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
4387 unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
4388 bool CandIsClusterSucc =
4389 isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
4390 bool TryCandIsClusterSucc =
4391 isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
4392
4393 if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
4394 Cluster))
4395 return TryCand.Reason != NoCand;
4396 // Avoid critical resource consumption and balance the schedule.
4398 TryCand, Cand, ResourceReduce))
4399 return TryCand.Reason != NoCand;
4402 TryCand, Cand, ResourceDemand))
4403 return TryCand.Reason != NoCand;
4404
4405 // We only compare a subset of features when comparing nodes between
4406 // Top and Bottom boundary.
4407 if (Cand.AtTop == TryCand.AtTop) {
4408 // Avoid serializing long latency dependence chains.
4409 if (Cand.Policy.ReduceLatency &&
4410 tryLatency(TryCand, Cand, Cand.AtTop ? Top : Bot))
4411 return TryCand.Reason != NoCand;
4412 }
4413
4414 // Fall through to original instruction order.
4415 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
4416 TryCand.Reason = NodeOrder;
4417 return true;
4418 }
4419
4420 return false;
4421}
4422
4424 SchedCandidate &Cand) {
4425 ReadyQueue &Q = Zone.Available;
4426 for (SUnit *SU : Q) {
4427 SchedCandidate TryCand(Cand.Policy);
4428 TryCand.SU = SU;
4429 TryCand.AtTop = Zone.isTop();
4430 TryCand.initResourceDelta(DAG, SchedModel);
4431 if (tryCandidate(Cand, TryCand)) {
4432 Cand.setBest(TryCand);
4434 }
4435 }
4436}
4437
4438/// Pick the best candidate node from either the top or bottom queue.
4440 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
4441 // out common parts.
4442
4443 // Schedule as far as possible in the direction of no choice. This is most
4444 // efficient, but also provides the best heuristics for CriticalPSets.
4445 if (SUnit *SU = Bot.pickOnlyChoice()) {
4446 IsTopNode = false;
4447 tracePick(Only1, /*IsTopNode=*/false, /*IsPostRA=*/true);
4448 return SU;
4449 }
4450 if (SUnit *SU = Top.pickOnlyChoice()) {
4451 IsTopNode = true;
4452 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4453 return SU;
4454 }
4455 // Set the bottom-up policy based on the state of the current bottom zone and
4456 // the instructions outside the zone, including the top zone.
4457 CandPolicy BotPolicy;
4458 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4459 // Set the top-down policy based on the state of the current top zone and
4460 // the instructions outside the zone, including the bottom zone.
4461 CandPolicy TopPolicy;
4462 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4463
4464 // See if BotCand is still valid (because we previously scheduled from Top).
4465 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4466 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4467 BotCand.Policy != BotPolicy) {
4468 BotCand.reset(CandPolicy());
4469 pickNodeFromQueue(Bot, BotCand);
4470 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4471 } else {
4472 LLVM_DEBUG(traceCandidate(BotCand));
4473#ifndef NDEBUG
4474 if (VerifyScheduling) {
4475 SchedCandidate TCand;
4476 TCand.reset(CandPolicy());
4477 pickNodeFromQueue(Bot, BotCand);
4478 assert(TCand.SU == BotCand.SU &&
4479 "Last pick result should correspond to re-picking right now");
4480 }
4481#endif
4482 }
4483
4484 // Check if the top Q has a better candidate.
4485 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4486 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4487 TopCand.Policy != TopPolicy) {
4488 TopCand.reset(CandPolicy());
4489 pickNodeFromQueue(Top, TopCand);
4490 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4491 } else {
4492 LLVM_DEBUG(traceCandidate(TopCand));
4493#ifndef NDEBUG
4494 if (VerifyScheduling) {
4495 SchedCandidate TCand;
4496 TCand.reset(CandPolicy());
4497 pickNodeFromQueue(Top, TopCand);
4498 assert(TCand.SU == TopCand.SU &&
4499 "Last pick result should correspond to re-picking right now");
4500 }
4501#endif
4502 }
4503
4504 // Pick best from BotCand and TopCand.
4505 assert(BotCand.isValid());
4506 assert(TopCand.isValid());
4507 SchedCandidate Cand = BotCand;
4508 TopCand.Reason = NoCand;
4509 if (tryCandidate(Cand, TopCand)) {
4510 Cand.setBest(TopCand);
4512 }
4513
4514 IsTopNode = Cand.AtTop;
4515 tracePick(Cand, /*IsPostRA=*/true);
4516 return Cand.SU;
4517}
4518
4519/// Pick the next node to schedule.
4521 if (DAG->top() == DAG->bottom()) {
4522 assert(Top.Available.empty() && Top.Pending.empty() &&
4523 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4524 return nullptr;
4525 }
4526 SUnit *SU;
4527 do {
4529 SU = Bot.pickOnlyChoice();
4530 if (SU) {
4531 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4532 } else {
4533 CandPolicy NoPolicy;
4534 BotCand.reset(NoPolicy);
4535 // Set the bottom-up policy based on the state of the current bottom
4536 // zone and the instructions outside the zone, including the top zone.
4537 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4538 pickNodeFromQueue(Bot, BotCand);
4539 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4540 tracePick(BotCand, /*IsPostRA=*/true);
4541 SU = BotCand.SU;
4542 }
4543 IsTopNode = false;
4544 } else if (RegionPolicy.OnlyTopDown) {
4545 SU = Top.pickOnlyChoice();
4546 if (SU) {
4547 tracePick(Only1, /*IsTopNode=*/true, /*IsPostRA=*/true);
4548 } else {
4549 CandPolicy NoPolicy;
4550 TopCand.reset(NoPolicy);
4551 // Set the top-down policy based on the state of the current top zone
4552 // and the instructions outside the zone, including the bottom zone.
4553 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4554 pickNodeFromQueue(Top, TopCand);
4555 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4556 tracePick(TopCand, /*IsPostRA=*/true);
4557 SU = TopCand.SU;
4558 }
4559 IsTopNode = true;
4560 } else {
4561 SU = pickNodeBidirectional(IsTopNode);
4562 }
4563 } while (SU->isScheduled);
4564
4565 if (SU->isTopReady())
4566 Top.removeReady(SU);
4567 if (SU->isBottomReady())
4568 Bot.removeReady(SU);
4569
4570 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4571 << *SU->getInstr());
4572
4573 if (IsTopNode) {
4574 if (SU->NodeNum == TopIdx++)
4575 ++NumInstrsInSourceOrderPostRA;
4576 } else {
4577 assert(BotIdx < NumRegionInstrs && "out of bounds");
4578 if (SU->NodeNum == BotIdx--)
4579 ++NumInstrsInSourceOrderPostRA;
4580 }
4581
4582 NumInstrsScheduledPostRA += 1;
4583
4584 return SU;
4585}
4586
4587/// Called after ScheduleDAGMI has scheduled an instruction and updated
4588/// scheduled/remaining flags in the DAG nodes.
4589void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4590 if (IsTopNode) {
4591 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4592 TopClusterID = SU->ParentClusterIdx;
4593 Top.bumpNode(SU);
4594 } else {
4595 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4596 BotClusterID = SU->ParentClusterIdx;
4597 Bot.bumpNode(SU);
4598 }
4599}
4600
4601//===----------------------------------------------------------------------===//
4602// ILP Scheduler. Currently for experimental analysis of heuristics.
4603//===----------------------------------------------------------------------===//
4604
4605namespace {
4606
4607/// Order nodes by the ILP metric.
4608struct ILPOrder {
4609 const SchedDFSResult *DFSResult = nullptr;
4610 const BitVector *ScheduledTrees = nullptr;
4611 bool MaximizeILP;
4612
4613 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4614
4615 /// Apply a less-than relation on node priority.
4616 ///
4617 /// (Return true if A comes after B in the Q.)
4618 bool operator()(const SUnit *A, const SUnit *B) const {
4619 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4620 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4621 if (SchedTreeA != SchedTreeB) {
4622 // Unscheduled trees have lower priority.
4623 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4624 return ScheduledTrees->test(SchedTreeB);
4625
4626 // Trees with shallower connections have lower priority.
4627 if (DFSResult->getSubtreeLevel(SchedTreeA)
4628 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4629 return DFSResult->getSubtreeLevel(SchedTreeA)
4630 < DFSResult->getSubtreeLevel(SchedTreeB);
4631 }
4632 }
4633 if (MaximizeILP)
4634 return DFSResult->getILP(A) < DFSResult->getILP(B);
4635 else
4636 return DFSResult->getILP(A) > DFSResult->getILP(B);
4637 }
4638};
4639
4640/// Schedule based on the ILP metric.
4641class ILPScheduler : public MachineSchedStrategy {
4642 ScheduleDAGMILive *DAG = nullptr;
4643 ILPOrder Cmp;
4644
4645 std::vector<SUnit*> ReadyQ;
4646
4647public:
4648 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4649
4650 void initialize(ScheduleDAGMI *dag) override {
4651 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4652 DAG = static_cast<ScheduleDAGMILive*>(dag);
4653 DAG->computeDFSResult();
4654 Cmp.DFSResult = DAG->getDFSResult();
4655 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4656 ReadyQ.clear();
4657 }
4658
4659 void registerRoots() override {
4660 // Restore the heap in ReadyQ with the updated DFS results.
4661 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4662 }
4663
4664 /// Implement MachineSchedStrategy interface.
4665 /// -----------------------------------------
4666
4667 /// Callback to select the highest priority node from the ready Q.
4668 SUnit *pickNode(bool &IsTopNode) override {
4669 if (ReadyQ.empty()) return nullptr;
4670 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4671 SUnit *SU = ReadyQ.back();
4672 ReadyQ.pop_back();
4673 IsTopNode = false;
4674 LLVM_DEBUG(dbgs() << "Pick node "
4675 << "SU(" << SU->NodeNum << ") "
4676 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4677 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4678 << " @"
4679 << DAG->getDFSResult()->getSubtreeLevel(
4680 DAG->getDFSResult()->getSubtreeID(SU))
4681 << '\n'
4682 << "Scheduling " << *SU->getInstr());
4683 return SU;
4684 }
4685
4686 /// Scheduler callback to notify that a new subtree is scheduled.
4687 void scheduleTree(unsigned SubtreeID) override {
4688 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4689 }
4690
4691 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4692 /// DFSResults, and resort the priority Q.
4693 void schedNode(SUnit *SU, bool IsTopNode) override {
4694 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4695 }
4696
4697 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4698
4699 void releaseBottomNode(SUnit *SU) override {
4700 ReadyQ.push_back(SU);
4701 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4702 }
4703};
4704
4705} // end anonymous namespace
4706
4708 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4709}
4711 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4712}
4713
4715 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4717 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4718
4719//===----------------------------------------------------------------------===//
4720// Machine Instruction Shuffler for Correctness Testing
4721//===----------------------------------------------------------------------===//
4722
4723#ifndef NDEBUG
4724namespace {
4725
4726/// Apply a less-than relation on the node order, which corresponds to the
4727/// instruction order prior to scheduling. IsReverse implements greater-than.
4728template<bool IsReverse>
4729struct SUnitOrder {
4730 bool operator()(SUnit *A, SUnit *B) const {
4731 if (IsReverse)
4732 return A->NodeNum > B->NodeNum;
4733 else
4734 return A->NodeNum < B->NodeNum;
4735 }
4736};
4737
4738/// Reorder instructions as much as possible.
4739class InstructionShuffler : public MachineSchedStrategy {
4740 bool IsAlternating;
4741 bool IsTopDown;
4742
4743 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4744 // gives nodes with a higher number higher priority causing the latest
4745 // instructions to be scheduled first.
4747 TopQ;
4748
4749 // When scheduling bottom-up, use greater-than as the queue priority.
4751 BottomQ;
4752
4753public:
4754 InstructionShuffler(bool alternate, bool topdown)
4755 : IsAlternating(alternate), IsTopDown(topdown) {}
4756
4757 void initialize(ScheduleDAGMI*) override {
4758 TopQ.clear();
4759 BottomQ.clear();
4760 }
4761
4762 /// Implement MachineSchedStrategy interface.
4763 /// -----------------------------------------
4764
4765 SUnit *pickNode(bool &IsTopNode) override {
4766 SUnit *SU;
4767 if (IsTopDown) {
4768 do {
4769 if (TopQ.empty()) return nullptr;
4770 SU = TopQ.top();
4771 TopQ.pop();
4772 } while (SU->isScheduled);
4773 IsTopNode = true;
4774 } else {
4775 do {
4776 if (BottomQ.empty()) return nullptr;
4777 SU = BottomQ.top();
4778 BottomQ.pop();
4779 } while (SU->isScheduled);
4780 IsTopNode = false;
4781 }
4782 if (IsAlternating)
4783 IsTopDown = !IsTopDown;
4784 return SU;
4785 }
4786
4787 void schedNode(SUnit *SU, bool IsTopNode) override {}
4788
4789 void releaseTopNode(SUnit *SU) override {
4790 TopQ.push(SU);
4791 }
4792 void releaseBottomNode(SUnit *SU) override {
4793 BottomQ.push(SU);
4794 }
4795};
4796
4797} // end anonymous namespace
4798
4800 bool Alternate =
4802 bool TopDown = PreRADirection != MISched::BottomUp;
4803 return new ScheduleDAGMILive(
4804 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4805}
4806
4808 "shuffle", "Shuffle machine instructions alternating directions",
4810#endif // !NDEBUG
4811
4812//===----------------------------------------------------------------------===//
4813// GraphWriter support for ScheduleDAGMILive.
4814//===----------------------------------------------------------------------===//
4815
4816#ifndef NDEBUG
4817namespace llvm {
4818
4819template<> struct GraphTraits<
4821
4822template<>
4825
4826 static std::string getGraphName(const ScheduleDAG *G) {
4827 return std::string(G->MF.getName());
4828 }
4829
4831 return true;
4832 }
4833
4834 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4835 if (ViewMISchedCutoff == 0)
4836 return false;
4837 return (Node->Preds.size() > ViewMISchedCutoff
4838 || Node->Succs.size() > ViewMISchedCutoff);
4839 }
4840
4841 /// If you want to override the dot attributes printed for a particular
4842 /// edge, override this method.
4843 static std::string getEdgeAttributes(const SUnit *Node,
4844 SUnitIterator EI,
4845 const ScheduleDAG *Graph) {
4846 if (EI.isArtificialDep())
4847 return "color=cyan,style=dashed";
4848 if (EI.isCtrlDep())
4849 return "color=blue,style=dashed";
4850 return "";
4851 }
4852
4853 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4854 std::string Str;
4855 raw_string_ostream SS(Str);
4856 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4857 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4858 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4859 SS << "SU:" << SU->NodeNum;
4860 if (DFS)
4861 SS << " I:" << DFS->getNumInstrs(SU);
4862 return Str;
4863 }
4864
4865 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4866 return G->getGraphNodeLabel(SU);
4867 }
4868
4869 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4870 std::string Str("shape=Mrecord");
4871 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4872 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4873 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4874 if (DFS) {
4875 Str += ",style=filled,fillcolor=\"#";
4876 Str += DOT::getColorString(DFS->getSubtreeID(N));
4877 Str += '"';
4878 }
4879 return Str;
4880 }
4881};
4882
4883} // end namespace llvm
4884#endif // NDEBUG
4885
4886/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4887/// rendered using 'dot'.
4888void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4889#ifndef NDEBUG
4890 ViewGraph(this, Name, false, Title);
4891#else
4892 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4893 << "systems with Graphviz or gv!\n";
4894#endif // NDEBUG
4895}
4896
4897/// Out-of-line implementation with no arguments is handy for gdb.
4899 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4900}
4901
4902/// Sort predicate for the intervals stored in an instance of
4903/// ResourceSegments. Intervals are always disjoint (no intersection
4904/// for any pairs of intervals), therefore we can sort the totality of
4905/// the intervals by looking only at the left boundary.
4908 return A.first < B.first;
4909}
4910
4911unsigned ResourceSegments::getFirstAvailableAt(
4912 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4913 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4914 IntervalBuilder) const {
4915 assert(llvm::is_sorted(_Intervals, sortIntervals) &&
4916 "Cannot execute on an un-sorted set of intervals.");
4917
4918 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4919 // a ResourceSegment interval for that situation.
4920 if (AcquireAtCycle == ReleaseAtCycle)
4921 return CurrCycle;
4922
4923 unsigned RetCycle = CurrCycle;
4924 ResourceSegments::IntervalTy NewInterval =
4925 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4926 for (auto &Interval : _Intervals) {
4927 if (!intersects(NewInterval, Interval))
4928 continue;
4929
4930 // Move the interval right next to the top of the one it
4931 // intersects.
4932 assert(Interval.second > NewInterval.first &&
4933 "Invalid intervals configuration.");
4934 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4935 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4936 }
4937 return RetCycle;
4938}
4939
4941 const unsigned CutOff) {
4942 assert(A.first <= A.second && "Cannot add negative resource usage");
4943 assert(CutOff > 0 && "0-size interval history has no use.");
4944 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4945 // instruction needed the resource to be available but does not use it.
4946 // However, ResourceSegment represents an interval that is closed on the left
4947 // and open on the right. It is impossible to represent an empty interval when
4948 // the left is closed. Do not add it to Intervals.
4949 if (A.first == A.second)
4950 return;
4951
4952 assert(all_of(_Intervals,
4953 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4954 return !intersects(A, Interval);
4955 }) &&
4956 "A resource is being overwritten");
4957 _Intervals.push_back(A);
4958
4959 sortAndMerge();
4960
4961 // Do not keep the full history of the intervals, just the
4962 // latest #CutOff.
4963 while (_Intervals.size() > CutOff)
4964 _Intervals.pop_front();
4965}
4966
4969 assert(A.first <= A.second && "Invalid interval");
4970 assert(B.first <= B.second && "Invalid interval");
4971
4972 // Share one boundary.
4973 if ((A.first == B.first) || (A.second == B.second))
4974 return true;
4975
4976 // full intersersect: [ *** ) B
4977 // [***) A
4978 if ((A.first > B.first) && (A.second < B.second))
4979 return true;
4980
4981 // right intersect: [ ***) B
4982 // [*** ) A
4983 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4984 return true;
4985
4986 // left intersect: [*** ) B
4987 // [ ***) A
4988 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4989 return true;
4990
4991 return false;
4992}
4993
4994void ResourceSegments::sortAndMerge() {
4995 if (_Intervals.size() <= 1)
4996 return;
4997
4998 // First sort the collection.
4999 _Intervals.sort(sortIntervals);
5000
5001 // can use next because I have at least 2 elements in the list
5002 auto next = std::next(std::begin(_Intervals));
5003 auto E = std::end(_Intervals);
5004 for (; next != E; ++next) {
5005 if (std::prev(next)->second >= next->first) {
5006 next->first = std::prev(next)->first;
5007 _Intervals.erase(std::prev(next));
5008 continue;
5009 }
5010 }
5011}
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static const Function * getParent(const Value *V)
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::string Name
bool End
Definition: ELF_riscv.cpp:480
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop, bool IsPostRA=false)
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
Register const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file defines the PriorityQueue class.
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class recording the (high level) value of a variable.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A private abstract base class describing the concept of an individual alias analysis implementation.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:139
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
reverse_iterator rbegin() const
Definition: ArrayRef.h:138
bool test(unsigned Idx) const
Definition: BitVector.h:461
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335
BitVector & set()
Definition: BitVector.h:351
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
Register getReg() const
ECValue - The EquivalenceClasses data structure is just a set of these.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:690
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
Definition: LiveInterval.h:91
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:106
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:545
iterator end()
Definition: LiveInterval.h:217
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
Definition: LiveInterval.h:431
iterator begin()
Definition: LiveInterval.h:216
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:387
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:394
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:521
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool hasValue() const
static LocationSize precise(uint64_t Value)
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
Definition: MachineInstr.h:72
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static LLVM_ABI MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI MachineSchedulerPass(const TargetMachine *TM)
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI PostMachineSchedulerPass(const TargetMachine *TM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:151
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
LLVM_ABI void dump(const TargetRegisterInfo &TRI) const
LLVM_ABI void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
Helpers for implementing custom MachineSchedStrategy classes.
void push(SUnit *SU)
iterator find(SUnit *SU)
ArrayRef< SUnit * > elements()
LLVM_ABI void dump() const
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
bool empty() const
StringRef getName() const
unsigned size() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
LLVM_ABI void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
LLVM_ABI void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
LLVM_ABI void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
LLVM_ABI void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
LLVM_ABI void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
LLVM_ABI void dump() const
LLVM_ABI void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
LLVM_ABI void closeTop()
Set the boundary for the top of the region and summarize live ins.
LLVM_ABI void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
LLVM_ABI void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
LLVM_ABI void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static LLVM_ABI bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition: ScheduleDAG.h:51
SUnit * getSUnit() const
Definition: ScheduleDAG.h:507
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:513
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:56
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:55
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:76
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:74
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:75
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Register getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:216
bool isArtificialDep() const
Definition: ScheduleDAG.h:695
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:692
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
bool isCall
Is a function call.
Definition: ScheduleDAG.h:296
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:285
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:277
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:282
LLVM_ABI void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:309
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:433
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:312
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:425
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:305
unsigned ParentClusterIdx
The parent cluster id.
Definition: ScheduleDAG.h:288
unsigned NumPredsLeft
Definition: ScheduleDAG.h:281
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:301
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:286
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:270
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:310
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:283
bool isBottomReady() const
Definition: ScheduleDAG.h:476
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:300
bool isTopReady() const
Definition: ScheduleDAG.h:473
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:269
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:284
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:399
Each Scheduling boundary is associated with ready queues.
LLVM_ABI unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
LLVM_ABI void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
ScheduleDAGMI * DAG
LLVM_ABI void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
LLVM_ABI unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
LLVM_ABI void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
LLVM_ABI unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
LLVM_ABI void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
LLVM_ABI void dumpScheduledState() const
LLVM_ABI void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
SmallVector< ClusterInfo > & getClusters()
Returns the array of the clusters.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
ClusterInfo * getCluster(unsigned Idx)
Get the specific cluster, return nullptr for InvalidClusterId.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:587
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:584
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:585
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:589
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:586
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:590
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:66
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:177
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:238
size_type size() const
Definition: SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:255
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
LLVM_ABI bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
LLVM_ABI bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
LLVM_ABI unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
LLVM_ABI bool enableIntervals() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const
Override generic scheduling policy within a region.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
VNInfo - Value Number Information.
Definition: LiveInterval.h:54
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:62
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:79
int getNumOccurrences() const
Definition: CommandLine.h:400
Base class for the machine scheduler classes.
void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags)
Main driver for both MachineScheduler and PostMachineScheduler.
Impl class for MachineScheduler.
void setMFAM(MachineFunctionAnalysisManager *MFAM)
void setLegacyPass(MachineFunctionPass *P)
bool run(MachineFunction &MF, const TargetMachine &TM, const RequiredAnalyses &Analyses)
ScheduleDAGInstrs * createMachineScheduler()
Instantiate a ScheduleDAGInstrs that will be owned by the caller.
Impl class for PostMachineScheduler.
bool run(MachineFunction &Func, const TargetMachine &TM, const RequiredAnalyses &Analyses)
void setMFAM(MachineFunctionAnalysisManager *MFAM)
ScheduleDAGInstrs * createPostMachineScheduler()
Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by the caller.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:91
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1315
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ScheduleDAGMILive * createSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
@ Offset
Definition: DWP.cpp:477
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
cl::opt< bool > PrintDAGs
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:154
static cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
LLVM_ABI char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< bool > ViewMISchedDAGs
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
ScheduleDAGMI * createSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
static cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1939
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr unsigned InvalidClusterId
Definition: ScheduleDAG.h:241
@ Other
Any other memory.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:147
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
Definition: ScheduleDAG.h:244
LLVM_ABI cl::opt< MISched::Direction > PreRADirection
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:443
LLVM_ABI void initializeMachineSchedulerLegacyPass(PassRegistry &)
LLVM_ABI void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI void initializePostMachineSchedulerLegacyPass(PassRegistry &)
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:58
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:123
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:68
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
const TargetMachine * TM
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
PressureChange CriticalMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< VRegMaskOrUnit, 8 > LiveOutRegs
SmallVector< VRegMaskOrUnit, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
A region of an MBB for scheduling.
Summarize the unscheduled region.
LLVM_ABI void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.