LLVM 21.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
51#include "llvm/Config/llvm-config.h"
53#include "llvm/MC/LaneBitmask.h"
54#include "llvm/Pass.h"
57#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <cstdint>
65#include <iterator>
66#include <limits>
67#include <memory>
68#include <string>
69#include <tuple>
70#include <utility>
71#include <vector>
72
73using namespace llvm;
74
75#define DEBUG_TYPE "machine-scheduler"
76
77STATISTIC(NumClustered, "Number of load/store pairs clustered");
78
79namespace llvm {
80
82 "misched-prera-direction", cl::Hidden,
83 cl::desc("Pre reg-alloc list scheduling direction"),
87 "Force top-down pre reg-alloc list scheduling"),
88 clEnumValN(MISched::BottomUp, "bottomup",
89 "Force bottom-up pre reg-alloc list scheduling"),
90 clEnumValN(MISched::Bidirectional, "bidirectional",
91 "Force bidirectional pre reg-alloc list scheduling")));
92
94 "misched-postra-direction", cl::Hidden,
95 cl::desc("Post reg-alloc list scheduling direction"),
99 "Force top-down post reg-alloc list scheduling"),
100 clEnumValN(MISched::BottomUp, "bottomup",
101 "Force bottom-up post reg-alloc list scheduling"),
102 clEnumValN(MISched::Bidirectional, "bidirectional",
103 "Force bidirectional post reg-alloc list scheduling")));
104
107 cl::desc("Print critical path length to stdout"));
108
110 "verify-misched", cl::Hidden,
111 cl::desc("Verify machine instrs before and after machine scheduling"));
112
113#ifndef NDEBUG
115 "view-misched-dags", cl::Hidden,
116 cl::desc("Pop up a window to show MISched dags after they are processed"));
117cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
118 cl::desc("Print schedule DAGs"));
120 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
121 cl::desc("Dump resource usage at schedule boundary."));
123 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
124 cl::desc("Show details of invoking getNextResoufceCycle."));
125#else
126const bool ViewMISchedDAGs = false;
127const bool PrintDAGs = false;
128const bool MischedDetailResourceBooking = false;
129#ifdef LLVM_ENABLE_DUMP
130const bool MISchedDumpReservedCycles = false;
131#endif // LLVM_ENABLE_DUMP
132#endif // NDEBUG
133
134} // end namespace llvm
135
136#ifndef NDEBUG
137/// In some situations a few uninteresting nodes depend on nearly all other
138/// nodes in the graph, provide a cutoff to hide them.
139static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
140 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
141
143 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
144
146 cl::desc("Only schedule this function"));
147static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
148 cl::desc("Only schedule this MBB#"));
149#endif // NDEBUG
150
151/// Avoid quadratic complexity in unusually large basic blocks by limiting the
152/// size of the ready lists.
154 cl::desc("Limit ready list to N instructions"), cl::init(256));
155
156static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
157 cl::desc("Enable register pressure scheduling."), cl::init(true));
158
159static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
160 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
161
163 cl::desc("Enable memop clustering."),
164 cl::init(true));
165static cl::opt<bool>
166 ForceFastCluster("force-fast-cluster", cl::Hidden,
167 cl::desc("Switch to fast cluster algorithm with the lost "
168 "of some fusion opportunities"),
169 cl::init(false));
171 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
172 cl::desc("The threshold for fast cluster"),
173 cl::init(1000));
174
175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
177 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
178 cl::desc("Dump resource usage at schedule boundary."));
180 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
181 cl::desc("Set width of the columns with "
182 "the resources and schedule units"),
183 cl::init(19));
185 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
186 cl::desc("Set width of the columns showing resource booking."),
187 cl::init(5));
189 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
190 cl::desc("Sort the resources printed in the dump trace"));
191#endif
192
194 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
195 cl::desc("Number of intervals to track"), cl::init(10));
196
197// DAG subtrees must have at least this many nodes.
198static const unsigned MinSubtreeSize = 8;
199
200// Pin the vtables to this file.
201void MachineSchedStrategy::anchor() {}
202
203void ScheduleDAGMutation::anchor() {}
204
205//===----------------------------------------------------------------------===//
206// Machine Instruction Scheduling Pass and Registry
207//===----------------------------------------------------------------------===//
208
211}
212
214 delete RegClassInfo;
215}
216
217namespace llvm {
218namespace impl_detail {
219
220/// Base class for the machine scheduler classes.
222protected:
223 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
224};
225
226/// Impl class for MachineScheduler.
228 // These are only for using MF.verify()
229 // remove when verify supports passing in all analyses
230 MachineFunctionPass *P = nullptr;
231 MachineFunctionAnalysisManager *MFAM = nullptr;
232
233public:
239 };
240
242 // Migration only
243 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
244 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
245
246 bool run(MachineFunction &MF, const TargetMachine &TM,
247 const RequiredAnalyses &Analyses);
248
249protected:
251};
252
253/// Impl class for PostMachineScheduler.
255 // These are only for using MF.verify()
256 // remove when verify supports passing in all analyses
257 MachineFunctionPass *P = nullptr;
258 MachineFunctionAnalysisManager *MFAM = nullptr;
259
260public:
264 };
266 // Migration only
267 void setLegacyPass(MachineFunctionPass *P) { this->P = P; }
268 void setMFAM(MachineFunctionAnalysisManager *MFAM) { this->MFAM = MFAM; }
269
270 bool run(MachineFunction &Func, const TargetMachine &TM,
271 const RequiredAnalyses &Analyses);
272
273protected:
275};
276
277} // namespace impl_detail
278} // namespace llvm
279
283
284namespace {
285/// MachineScheduler runs after coalescing and before register allocation.
286class MachineSchedulerLegacy : public MachineFunctionPass {
288
289public:
290 MachineSchedulerLegacy();
291 void getAnalysisUsage(AnalysisUsage &AU) const override;
292 bool runOnMachineFunction(MachineFunction&) override;
293
294 static char ID; // Class identification, replacement for typeinfo
295};
296
297/// PostMachineScheduler runs after shortly before code emission.
298class PostMachineSchedulerLegacy : public MachineFunctionPass {
300
301public:
302 PostMachineSchedulerLegacy();
303 void getAnalysisUsage(AnalysisUsage &AU) const override;
304 bool runOnMachineFunction(MachineFunction &) override;
305
306 static char ID; // Class identification, replacement for typeinfo
307};
308
309} // end anonymous namespace
310
311char MachineSchedulerLegacy::ID = 0;
312
313char &llvm::MachineSchedulerID = MachineSchedulerLegacy::ID;
314
315INITIALIZE_PASS_BEGIN(MachineSchedulerLegacy, DEBUG_TYPE,
316 "Machine Instruction Scheduler", false, false)
322INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE,
324
325MachineSchedulerLegacy::MachineSchedulerLegacy() : MachineFunctionPass(ID) {
327}
328
329void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
330 AU.setPreservesCFG();
340}
341
342char PostMachineSchedulerLegacy::ID = 0;
343
344char &llvm::PostMachineSchedulerID = PostMachineSchedulerLegacy::ID;
345
346INITIALIZE_PASS_BEGIN(PostMachineSchedulerLegacy, "postmisched",
347 "PostRA Machine Instruction Scheduler", false, false)
351INITIALIZE_PASS_END(PostMachineSchedulerLegacy, "postmisched",
353
354PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
357}
358
359void PostMachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
360 AU.setPreservesCFG();
366}
367
370
371/// A dummy default scheduler factory indicates whether the scheduler
372/// is overridden on the command line.
374 return nullptr;
375}
376
377/// MachineSchedOpt allows command line selection of the scheduler.
382 cl::desc("Machine instruction scheduler to use"));
383
385DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
387
389 "enable-misched",
390 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
391 cl::Hidden);
392
394 "enable-post-misched",
395 cl::desc("Enable the post-ra machine instruction scheduling pass."),
396 cl::init(true), cl::Hidden);
397
398/// Decrement this iterator until reaching the top or a non-debug instr.
402 assert(I != Beg && "reached the top of the region, cannot decrement");
403 while (--I != Beg) {
404 if (!I->isDebugOrPseudoInstr())
405 break;
406 }
407 return I;
408}
409
410/// Non-const version.
416}
417
418/// If this iterator is a debug value, increment until reaching the End or a
419/// non-debug instruction.
423 for(; I != End; ++I) {
424 if (!I->isDebugOrPseudoInstr())
425 break;
426 }
427 return I;
428}
429
430/// Non-const version.
436}
437
438/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
439ScheduleDAGInstrs *MachineSchedulerImpl::createMachineScheduler() {
440 // Select the scheduler, or set the default.
442 if (Ctor != useDefaultMachineSched)
443 return Ctor(this);
444
445 // Get the default scheduler set by the target for this function.
446 ScheduleDAGInstrs *Scheduler = TM->createMachineScheduler(this);
447 if (Scheduler)
448 return Scheduler;
449
450 // Default to GenericScheduler.
451 return createGenericSchedLive(this);
452}
453
454bool MachineSchedulerImpl::run(MachineFunction &Func, const TargetMachine &TM,
455 const RequiredAnalyses &Analyses) {
456 MF = &Func;
457 MLI = &Analyses.MLI;
458 MDT = &Analyses.MDT;
459 this->TM = &TM;
460 AA = &Analyses.AA;
461 LIS = &Analyses.LIS;
462
463 if (VerifyScheduling) {
464 LLVM_DEBUG(LIS->dump());
465 const char *MSchedBanner = "Before machine scheduling.";
466 if (P)
467 MF->verify(P, MSchedBanner, &errs());
468 else
469 MF->verify(*MFAM, MSchedBanner, &errs());
470 }
471 RegClassInfo->runOnMachineFunction(*MF);
472
473 // Instantiate the selected scheduler for this target, function, and
474 // optimization level.
475 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
476 scheduleRegions(*Scheduler, false);
477
478 LLVM_DEBUG(LIS->dump());
479 if (VerifyScheduling) {
480 const char *MSchedBanner = "After machine scheduling.";
481 if (P)
482 MF->verify(P, MSchedBanner, &errs());
483 else
484 MF->verify(*MFAM, MSchedBanner, &errs());
485 }
486 return true;
487}
488
489/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
490/// the caller. We don't have a command line option to override the postRA
491/// scheduler. The Target must configure it.
492ScheduleDAGInstrs *PostMachineSchedulerImpl::createPostMachineScheduler() {
493 // Get the postRA scheduler set by the target for this function.
494 ScheduleDAGInstrs *Scheduler = TM->createPostMachineScheduler(this);
495 if (Scheduler)
496 return Scheduler;
497
498 // Default to GenericScheduler.
499 return createGenericSchedPostRA(this);
500}
501
502bool PostMachineSchedulerImpl::run(MachineFunction &Func,
503 const TargetMachine &TM,
504 const RequiredAnalyses &Analyses) {
505 MF = &Func;
506 MLI = &Analyses.MLI;
507 this->TM = &TM;
508 AA = &Analyses.AA;
509
510 if (VerifyScheduling) {
511 const char *PostMSchedBanner = "Before post machine scheduling.";
512 if (P)
513 MF->verify(P, PostMSchedBanner, &errs());
514 else
515 MF->verify(*MFAM, PostMSchedBanner, &errs());
516 }
517
518 // Instantiate the selected scheduler for this target, function, and
519 // optimization level.
520 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
521 scheduleRegions(*Scheduler, true);
522
523 if (VerifyScheduling) {
524 const char *PostMSchedBanner = "After post machine scheduling.";
525 if (P)
526 MF->verify(P, PostMSchedBanner, &errs());
527 else
528 MF->verify(*MFAM, PostMSchedBanner, &errs());
529 }
530 return true;
531}
532
533/// Top-level MachineScheduler pass driver.
534///
535/// Visit blocks in function order. Divide each block into scheduling regions
536/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
537/// consistent with the DAG builder, which traverses the interior of the
538/// scheduling regions bottom-up.
539///
540/// This design avoids exposing scheduling boundaries to the DAG builder,
541/// simplifying the DAG builder's support for "special" target instructions.
542/// At the same time the design allows target schedulers to operate across
543/// scheduling boundaries, for example to bundle the boundary instructions
544/// without reordering them. This creates complexity, because the target
545/// scheduler must update the RegionBegin and RegionEnd positions cached by
546/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
547/// design would be to split blocks at scheduling boundaries, but LLVM has a
548/// general bias against block splitting purely for implementation simplicity.
549bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
550 if (skipFunction(MF.getFunction()))
551 return false;
552
555 return false;
556 } else if (!MF.getSubtarget().enableMachineScheduler()) {
557 return false;
558 }
559
560 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
561
562 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
563 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
564 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
565 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
566 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
567 Impl.setLegacyPass(this);
568 return Impl.run(MF, TM, {MLI, MDT, AA, LIS});
569}
570
572 : Impl(std::make_unique<MachineSchedulerImpl>()), TM(TM) {}
575 default;
576
578 : Impl(std::make_unique<PostMachineSchedulerImpl>()), TM(TM) {}
580 PostMachineSchedulerPass &&Other) = default;
582
588 return PreservedAnalyses::all();
589 } else if (!MF.getSubtarget().enableMachineScheduler()) {
590 return PreservedAnalyses::all();
591 }
592
593 LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
594 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
595 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
597 .getManager();
598 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
599 auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
600 Impl->setMFAM(&MFAM);
601 bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS});
602 if (!Changed)
603 return PreservedAnalyses::all();
604
609 return PA;
610}
611
612bool PostMachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
613 if (skipFunction(MF.getFunction()))
614 return false;
615
618 return false;
619 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
620 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
621 return false;
622 }
623 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
624 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
625 auto &TM = getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
626 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
627 Impl.setLegacyPass(this);
628 return Impl.run(MF, TM, {MLI, AA});
629}
630
636 return PreservedAnalyses::all();
637 } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
638 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
639 return PreservedAnalyses::all();
640 }
641 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
642 auto &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
644 .getManager();
645 auto &AA = FAM.getResult<AAManager>(MF.getFunction());
646
647 Impl->setMFAM(&MFAM);
648 bool Changed = Impl->run(MF, *TM, {MLI, AA});
649 if (!Changed)
650 return PreservedAnalyses::all();
651
654 return PA;
655}
656
657/// Return true of the given instruction should not be included in a scheduling
658/// region.
659///
660/// MachineScheduler does not currently support scheduling across calls. To
661/// handle calls, the DAG builder needs to be modified to create register
662/// anti/output dependencies on the registers clobbered by the call's regmask
663/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
664/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
665/// the boundary, but there would be no benefit to postRA scheduling across
666/// calls this late anyway.
669 MachineFunction *MF,
670 const TargetInstrInfo *TII) {
671 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF) ||
672 MI->isFakeUse();
673}
674
675/// A region of an MBB for scheduling.
676namespace {
677struct SchedRegion {
678 /// RegionBegin is the first instruction in the scheduling region, and
679 /// RegionEnd is either MBB->end() or the scheduling boundary after the
680 /// last instruction in the scheduling region. These iterators cannot refer
681 /// to instructions outside of the identified scheduling region because
682 /// those may be reordered before scheduling this region.
683 MachineBasicBlock::iterator RegionBegin;
685 unsigned NumRegionInstrs;
686
688 unsigned N) :
689 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
690};
691} // end anonymous namespace
692
694
695static void
697 MBBRegionsVector &Regions,
698 bool RegionsTopDown) {
701
703 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
704 RegionEnd != MBB->begin(); RegionEnd = I) {
705
706 // Avoid decrementing RegionEnd for blocks with no terminator.
707 if (RegionEnd != MBB->end() ||
708 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
709 --RegionEnd;
710 }
711
712 // The next region starts above the previous region. Look backward in the
713 // instruction stream until we find the nearest boundary.
714 unsigned NumRegionInstrs = 0;
715 I = RegionEnd;
716 for (;I != MBB->begin(); --I) {
717 MachineInstr &MI = *std::prev(I);
718 if (isSchedBoundary(&MI, &*MBB, MF, TII))
719 break;
720 if (!MI.isDebugOrPseudoInstr()) {
721 // MBB::size() uses instr_iterator to count. Here we need a bundle to
722 // count as a single instruction.
723 ++NumRegionInstrs;
724 }
725 }
726
727 // It's possible we found a scheduling region that only has debug
728 // instructions. Don't bother scheduling these.
729 if (NumRegionInstrs != 0)
730 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
731 }
732
733 if (RegionsTopDown)
734 std::reverse(Regions.begin(), Regions.end());
735}
736
737/// Main driver for both MachineScheduler and PostMachineScheduler.
738void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
739 bool FixKillFlags) {
740 // Visit all machine basic blocks.
741 //
742 // TODO: Visit blocks in global postorder or postorder within the bottom-up
743 // loop tree. Then we can optionally compute global RegPressure.
744 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
745 MBB != MBBEnd; ++MBB) {
746
747 Scheduler.startBlock(&*MBB);
748
749#ifndef NDEBUG
750 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
751 continue;
752 if (SchedOnlyBlock.getNumOccurrences()
753 && (int)SchedOnlyBlock != MBB->getNumber())
754 continue;
755#endif
756
757 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
758 // points to the scheduling boundary at the bottom of the region. The DAG
759 // does not include RegionEnd, but the region does (i.e. the next
760 // RegionEnd is above the previous RegionBegin). If the current block has
761 // no terminator then RegionEnd == MBB->end() for the bottom region.
762 //
763 // All the regions of MBB are first found and stored in MBBRegions, which
764 // will be processed (MBB) top-down if initialized with true.
765 //
766 // The Scheduler may insert instructions during either schedule() or
767 // exitRegion(), even for empty regions. So the local iterators 'I' and
768 // 'RegionEnd' are invalid across these calls. Instructions must not be
769 // added to other regions than the current one without updating MBBRegions.
770
771 MBBRegionsVector MBBRegions;
772 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
773 for (const SchedRegion &R : MBBRegions) {
774 MachineBasicBlock::iterator I = R.RegionBegin;
775 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
776 unsigned NumRegionInstrs = R.NumRegionInstrs;
777
778 // Notify the scheduler of the region, even if we may skip scheduling
779 // it. Perhaps it still needs to be bundled.
780 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
781
782 // Skip empty scheduling regions (0 or 1 schedulable instructions).
783 if (I == RegionEnd || I == std::prev(RegionEnd)) {
784 // Close the current region. Bundle the terminator if needed.
785 // This invalidates 'RegionEnd' and 'I'.
786 Scheduler.exitRegion();
787 continue;
788 }
789 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
790 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
791 << " " << MBB->getName() << "\n From: " << *I
792 << " To: ";
793 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
794 else dbgs() << "End\n";
795 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
797 errs() << MF->getName();
798 errs() << ":%bb. " << MBB->getNumber();
799 errs() << " " << MBB->getName() << " \n";
800 }
801
802 // Schedule a region: possibly reorder instructions.
803 // This invalidates the original region iterators.
804 Scheduler.schedule();
805
806 // Close the current region.
807 Scheduler.exitRegion();
808 }
809 Scheduler.finishBlock();
810 // FIXME: Ideally, no further passes should rely on kill flags. However,
811 // thumb2 size reduction is currently an exception, so the PostMIScheduler
812 // needs to do this.
813 if (FixKillFlags)
814 Scheduler.fixupKills(*MBB);
815 }
816 Scheduler.finalizeSchedule();
817}
818
819#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
821 dbgs() << "Queue " << Name << ": ";
822 for (const SUnit *SU : Queue)
823 dbgs() << SU->NodeNum << " ";
824 dbgs() << "\n";
825}
826#endif
827
828//===----------------------------------------------------------------------===//
829// ScheduleDAGMI - Basic machine instruction scheduling. This is
830// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
831// virtual registers.
832// ===----------------------------------------------------------------------===/
833
834// Provide a vtable anchor.
836
837/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
838/// NumPredsLeft reaches zero, release the successor node.
839///
840/// FIXME: Adjust SuccSU height based on MinLatency.
842 SUnit *SuccSU = SuccEdge->getSUnit();
843
844 if (SuccEdge->isWeak()) {
845 --SuccSU->WeakPredsLeft;
846 if (SuccEdge->isCluster())
847 NextClusterSucc = SuccSU;
848 return;
849 }
850#ifndef NDEBUG
851 if (SuccSU->NumPredsLeft == 0) {
852 dbgs() << "*** Scheduling failed! ***\n";
853 dumpNode(*SuccSU);
854 dbgs() << " has been released too many times!\n";
855 llvm_unreachable(nullptr);
856 }
857#endif
858 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
859 // CurrCycle may have advanced since then.
860 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
861 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
862
863 --SuccSU->NumPredsLeft;
864 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
865 SchedImpl->releaseTopNode(SuccSU);
866}
867
868/// releaseSuccessors - Call releaseSucc on each of SU's successors.
870 for (SDep &Succ : SU->Succs)
871 releaseSucc(SU, &Succ);
872}
873
874/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
875/// NumSuccsLeft reaches zero, release the predecessor node.
876///
877/// FIXME: Adjust PredSU height based on MinLatency.
879 SUnit *PredSU = PredEdge->getSUnit();
880
881 if (PredEdge->isWeak()) {
882 --PredSU->WeakSuccsLeft;
883 if (PredEdge->isCluster())
884 NextClusterPred = PredSU;
885 return;
886 }
887#ifndef NDEBUG
888 if (PredSU->NumSuccsLeft == 0) {
889 dbgs() << "*** Scheduling failed! ***\n";
890 dumpNode(*PredSU);
891 dbgs() << " has been released too many times!\n";
892 llvm_unreachable(nullptr);
893 }
894#endif
895 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
896 // CurrCycle may have advanced since then.
897 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
898 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
899
900 --PredSU->NumSuccsLeft;
901 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
902 SchedImpl->releaseBottomNode(PredSU);
903}
904
905/// releasePredecessors - Call releasePred on each of SU's predecessors.
907 for (SDep &Pred : SU->Preds)
908 releasePred(SU, &Pred);
909}
910
913 SchedImpl->enterMBB(bb);
914}
915
917 SchedImpl->leaveMBB();
919}
920
921/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
922/// after crossing a scheduling boundary. [begin, end) includes all instructions
923/// in the region, including the boundary itself and single-instruction regions
924/// that don't get scheduled.
928 unsigned regioninstrs)
929{
930 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
931
932 SchedImpl->initPolicy(begin, end, regioninstrs);
933
934 // Set dump direction after initializing sched policy.
936 if (SchedImpl->getPolicy().OnlyTopDown)
938 else if (SchedImpl->getPolicy().OnlyBottomUp)
940 else
943}
944
945/// This is normally called from the main scheduler loop but may also be invoked
946/// by the scheduling strategy to perform additional code motion.
949 // Advance RegionBegin if the first instruction moves down.
950 if (&*RegionBegin == MI)
951 ++RegionBegin;
952
953 // Update the instruction stream.
954 BB->splice(InsertPos, BB, MI);
955
956 // Update LiveIntervals
957 if (LIS)
958 LIS->handleMove(*MI, /*UpdateFlags=*/true);
959
960 // Recede RegionBegin if an instruction moves above the first.
961 if (RegionBegin == InsertPos)
962 RegionBegin = MI;
963}
964
966#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
967 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
969 return false;
970 }
971 ++NumInstrsScheduled;
972#endif
973 return true;
974}
975
976/// Per-region scheduling driver, called back from
977/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
978/// that does not consider liveness or register pressure. It is useful for
979/// PostRA scheduling and potentially other custom schedulers.
981 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
982 LLVM_DEBUG(SchedImpl->dumpPolicy());
983
984 // Build the DAG.
986
988
989 SmallVector<SUnit*, 8> TopRoots, BotRoots;
990 findRootsAndBiasEdges(TopRoots, BotRoots);
991
992 LLVM_DEBUG(dump());
993 if (PrintDAGs) dump();
995
996 // Initialize the strategy before modifying the DAG.
997 // This may initialize a DFSResult to be used for queue priority.
998 SchedImpl->initialize(this);
999
1000 // Initialize ready queues now that the DAG and priority data are finalized.
1001 initQueues(TopRoots, BotRoots);
1002
1003 bool IsTopNode = false;
1004 while (true) {
1005 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
1006 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1007 if (!SU) break;
1008
1009 assert(!SU->isScheduled && "Node already scheduled");
1010 if (!checkSchedLimit())
1011 break;
1012
1013 MachineInstr *MI = SU->getInstr();
1014 if (IsTopNode) {
1015 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1016 if (&*CurrentTop == MI)
1018 else
1020 } else {
1021 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1024 if (&*priorII == MI)
1025 CurrentBottom = priorII;
1026 else {
1027 if (&*CurrentTop == MI)
1028 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1030 CurrentBottom = MI;
1031 }
1032 }
1033 // Notify the scheduling strategy before updating the DAG.
1034 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
1035 // runs, it can then use the accurate ReadyCycle time to determine whether
1036 // newly released nodes can move to the readyQ.
1037 SchedImpl->schedNode(SU, IsTopNode);
1038
1039 updateQueues(SU, IsTopNode);
1040 }
1041 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1042
1044
1045 LLVM_DEBUG({
1046 dbgs() << "*** Final schedule for "
1047 << printMBBReference(*begin()->getParent()) << " ***\n";
1048 dumpSchedule();
1049 dbgs() << '\n';
1050 });
1051}
1052
1053/// Apply each ScheduleDAGMutation step in order.
1055 for (auto &m : Mutations)
1056 m->apply(this);
1057}
1058
1061 SmallVectorImpl<SUnit*> &BotRoots) {
1062 for (SUnit &SU : SUnits) {
1063 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
1064
1065 // Order predecessors so DFSResult follows the critical path.
1066 SU.biasCriticalPath();
1067
1068 // A SUnit is ready to top schedule if it has no predecessors.
1069 if (!SU.NumPredsLeft)
1070 TopRoots.push_back(&SU);
1071 // A SUnit is ready to bottom schedule if it has no successors.
1072 if (!SU.NumSuccsLeft)
1073 BotRoots.push_back(&SU);
1074 }
1076}
1077
1078/// Identify DAG roots and setup scheduler queues.
1080 ArrayRef<SUnit*> BotRoots) {
1081 NextClusterSucc = nullptr;
1082 NextClusterPred = nullptr;
1083
1084 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
1085 //
1086 // Nodes with unreleased weak edges can still be roots.
1087 // Release top roots in forward order.
1088 for (SUnit *SU : TopRoots)
1089 SchedImpl->releaseTopNode(SU);
1090
1091 // Release bottom roots in reverse order so the higher priority nodes appear
1092 // first. This is more natural and slightly more efficient.
1094 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
1095 SchedImpl->releaseBottomNode(*I);
1096 }
1097
1100
1101 SchedImpl->registerRoots();
1102
1103 // Advance past initial DebugValues.
1106}
1107
1108/// Update scheduler queues after scheduling an instruction.
1109void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
1110 // Release dependent instructions for scheduling.
1111 if (IsTopNode)
1113 else
1115
1116 SU->isScheduled = true;
1117}
1118
1119/// Reinsert any remaining debug_values, just like the PostRA scheduler.
1121 // If first instruction was a DBG_VALUE then put it back.
1122 if (FirstDbgValue) {
1125 }
1126
1127 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
1128 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
1129 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
1130 MachineInstr *DbgValue = P.first;
1131 MachineBasicBlock::iterator OrigPrevMI = P.second;
1132 if (&*RegionBegin == DbgValue)
1133 ++RegionBegin;
1134 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
1135 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
1137 }
1138}
1139
1140#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1141static const char *scheduleTableLegend = " i: issue\n x: resource booked";
1142
1144 // Bail off when there is no schedule model to query.
1146 return;
1147
1148 // Nothing to show if there is no or just one instruction.
1149 if (BB->size() < 2)
1150 return;
1151
1152 dbgs() << " * Schedule table (TopDown):\n";
1153 dbgs() << scheduleTableLegend << "\n";
1154 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1155 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1156 for (MachineInstr &MI : *this) {
1157 SUnit *SU = getSUnit(&MI);
1158 if (!SU)
1159 continue;
1160 const MCSchedClassDesc *SC = getSchedClass(SU);
1163 PI != PE; ++PI) {
1164 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1165 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1166 }
1167 }
1168 // Print the header with the cycles
1169 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1170 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1171 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1172 dbgs() << "|\n";
1173
1174 for (MachineInstr &MI : *this) {
1175 SUnit *SU = getSUnit(&MI);
1176 if (!SU) {
1177 dbgs() << "Missing SUnit\n";
1178 continue;
1179 }
1180 std::string NodeName("SU(");
1181 NodeName += std::to_string(SU->NodeNum) + ")";
1182 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1183 unsigned C = FirstCycle;
1184 for (; C <= LastCycle; ++C) {
1185 if (C == SU->TopReadyCycle)
1186 dbgs() << llvm::left_justify("| i", ColWidth);
1187 else
1188 dbgs() << llvm::left_justify("|", ColWidth);
1189 }
1190 dbgs() << "|\n";
1191 const MCSchedClassDesc *SC = getSchedClass(SU);
1192
1196
1198 llvm::stable_sort(ResourcesIt,
1199 [](const MCWriteProcResEntry &LHS,
1200 const MCWriteProcResEntry &RHS) -> bool {
1201 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1202 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1203 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1204 });
1205 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1206 C = FirstCycle;
1207 const std::string ResName =
1208 SchedModel.getResourceName(PI.ProcResourceIdx);
1209 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1210 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1211 dbgs() << llvm::left_justify("|", ColWidth);
1212 }
1213 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1214 ++I, ++C)
1215 dbgs() << llvm::left_justify("| x", ColWidth);
1216 while (C++ <= LastCycle)
1217 dbgs() << llvm::left_justify("|", ColWidth);
1218 // Place end char
1219 dbgs() << "| \n";
1220 }
1221 }
1222}
1223
1225 // Bail off when there is no schedule model to query.
1227 return;
1228
1229 // Nothing to show if there is no or just one instruction.
1230 if (BB->size() < 2)
1231 return;
1232
1233 dbgs() << " * Schedule table (BottomUp):\n";
1234 dbgs() << scheduleTableLegend << "\n";
1235
1236 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1237 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1238 for (MachineInstr &MI : *this) {
1239 SUnit *SU = getSUnit(&MI);
1240 if (!SU)
1241 continue;
1242 const MCSchedClassDesc *SC = getSchedClass(SU);
1245 PI != PE; ++PI) {
1246 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1247 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1248 }
1249 }
1250 // Print the header with the cycles
1251 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1252 for (int C = FirstCycle; C >= LastCycle; --C)
1253 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1254 dbgs() << "|\n";
1255
1256 for (MachineInstr &MI : *this) {
1257 SUnit *SU = getSUnit(&MI);
1258 if (!SU) {
1259 dbgs() << "Missing SUnit\n";
1260 continue;
1261 }
1262 std::string NodeName("SU(");
1263 NodeName += std::to_string(SU->NodeNum) + ")";
1264 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1265 int C = FirstCycle;
1266 for (; C >= LastCycle; --C) {
1267 if (C == (int)SU->BotReadyCycle)
1268 dbgs() << llvm::left_justify("| i", ColWidth);
1269 else
1270 dbgs() << llvm::left_justify("|", ColWidth);
1271 }
1272 dbgs() << "|\n";
1273 const MCSchedClassDesc *SC = getSchedClass(SU);
1277
1279 llvm::stable_sort(ResourcesIt,
1280 [](const MCWriteProcResEntry &LHS,
1281 const MCWriteProcResEntry &RHS) -> bool {
1282 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1283 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1284 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1285 });
1286 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1287 C = FirstCycle;
1288 const std::string ResName =
1289 SchedModel.getResourceName(PI.ProcResourceIdx);
1290 dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1291 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1292 dbgs() << llvm::left_justify("|", ColWidth);
1293 }
1294 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1295 ++I, --C)
1296 dbgs() << llvm::left_justify("| x", ColWidth);
1297 while (C-- >= LastCycle)
1298 dbgs() << llvm::left_justify("|", ColWidth);
1299 // Place end char
1300 dbgs() << "| \n";
1301 }
1302 }
1303}
1304#endif
1305
1306#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1311 else if (DumpDir == DumpDirection::BottomUp)
1314 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1315 } else {
1316 dbgs() << "* Schedule table: DumpDirection not set.\n";
1317 }
1318 }
1319
1320 for (MachineInstr &MI : *this) {
1321 if (SUnit *SU = getSUnit(&MI))
1322 dumpNode(*SU);
1323 else
1324 dbgs() << "Missing SUnit\n";
1325 }
1326}
1327#endif
1328
1329//===----------------------------------------------------------------------===//
1330// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1331// preservation.
1332//===----------------------------------------------------------------------===//
1333
1335 delete DFSResult;
1336}
1337
1339 const MachineInstr &MI = *SU.getInstr();
1340 for (const MachineOperand &MO : MI.operands()) {
1341 if (!MO.isReg())
1342 continue;
1343 if (!MO.readsReg())
1344 continue;
1345 if (TrackLaneMasks && !MO.isUse())
1346 continue;
1347
1348 Register Reg = MO.getReg();
1349 if (!Reg.isVirtual())
1350 continue;
1351
1352 // Ignore re-defs.
1353 if (TrackLaneMasks) {
1354 bool FoundDef = false;
1355 for (const MachineOperand &MO2 : MI.all_defs()) {
1356 if (MO2.getReg() == Reg && !MO2.isDead()) {
1357 FoundDef = true;
1358 break;
1359 }
1360 }
1361 if (FoundDef)
1362 continue;
1363 }
1364
1365 // Record this local VReg use.
1367 for (; UI != VRegUses.end(); ++UI) {
1368 if (UI->SU == &SU)
1369 break;
1370 }
1371 if (UI == VRegUses.end())
1373 }
1374}
1375
1376/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1377/// crossing a scheduling boundary. [begin, end) includes all instructions in
1378/// the region, including the boundary itself and single-instruction regions
1379/// that don't get scheduled.
1383 unsigned regioninstrs)
1384{
1385 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1386 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1387
1388 // For convenience remember the end of the liveness region.
1389 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1390
1392
1393 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1394 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1395
1397 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1398}
1399
1400// Setup the register pressure trackers for the top scheduled and bottom
1401// scheduled regions.
1403 VRegUses.clear();
1405 for (SUnit &SU : SUnits)
1406 collectVRegUses(SU);
1407
1409 ShouldTrackLaneMasks, false);
1411 ShouldTrackLaneMasks, false);
1412
1413 // Close the RPTracker to finalize live ins.
1415
1417
1418 // Initialize the live ins and live outs.
1421
1422 // Close one end of the tracker so we can call
1423 // getMaxUpward/DownwardPressureDelta before advancing across any
1424 // instructions. This converts currently live regs into live ins/outs.
1427
1429 if (!BotRPTracker.getLiveThru().empty()) {
1431 LLVM_DEBUG(dbgs() << "Live Thru: ";
1433 };
1434
1435 // For each live out vreg reduce the pressure change associated with other
1436 // uses of the same vreg below the live-out reaching def.
1438
1439 // Account for liveness generated by the region boundary.
1440 if (LiveRegionEnd != RegionEnd) {
1442 BotRPTracker.recede(&LiveUses);
1443 updatePressureDiffs(LiveUses);
1444 }
1445
1446 LLVM_DEBUG(dbgs() << "Top Pressure: ";
1448 dbgs() << "Bottom Pressure: ";
1450
1452 (RegionEnd->isDebugInstr() &&
1454 "Can't find the region bottom");
1455
1456 // Cache the list of excess pressure sets in this region. This will also track
1457 // the max pressure in the scheduled code for these sets.
1458 RegionCriticalPSets.clear();
1459 const std::vector<unsigned> &RegionPressure =
1461 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1462 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1463 if (RegionPressure[i] > Limit) {
1464 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1465 << " Actual " << RegionPressure[i] << "\n");
1466 RegionCriticalPSets.push_back(PressureChange(i));
1467 }
1468 }
1469 LLVM_DEBUG({
1470 if (RegionCriticalPSets.size() > 0) {
1471 dbgs() << "Excess PSets: ";
1472 for (const PressureChange &RCPS : RegionCriticalPSets)
1473 dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1474 dbgs() << "\n";
1475 }
1476 });
1477}
1478
1481 const std::vector<unsigned> &NewMaxPressure) {
1482 const PressureDiff &PDiff = getPressureDiff(SU);
1483 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1484 for (const PressureChange &PC : PDiff) {
1485 if (!PC.isValid())
1486 break;
1487 unsigned ID = PC.getPSet();
1488 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1489 ++CritIdx;
1490 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1491 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1492 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1493 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1494 }
1495 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1496 if (NewMaxPressure[ID] >= Limit - 2) {
1497 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1498 << NewMaxPressure[ID]
1499 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1500 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1501 << " livethru)\n");
1502 }
1503 }
1504}
1505
1506/// Update the PressureDiff array for liveness after scheduling this
1507/// instruction.
1509 for (const VRegMaskOrUnit &P : LiveUses) {
1510 Register Reg = P.RegUnit;
1511 /// FIXME: Currently assuming single-use physregs.
1512 if (!Reg.isVirtual())
1513 continue;
1514
1516 // If the register has just become live then other uses won't change
1517 // this fact anymore => decrement pressure.
1518 // If the register has just become dead then other uses make it come
1519 // back to life => increment pressure.
1520 bool Decrement = P.LaneMask.any();
1521
1522 for (const VReg2SUnit &V2SU
1523 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1524 SUnit &SU = *V2SU.SU;
1525 if (SU.isScheduled || &SU == &ExitSU)
1526 continue;
1527
1528 PressureDiff &PDiff = getPressureDiff(&SU);
1529 PDiff.addPressureChange(Reg, Decrement, &MRI);
1530 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1531 return Change.isValid();
1532 }))
1534 << " UpdateRegPressure: SU(" << SU.NodeNum << ") "
1535 << printReg(Reg, TRI) << ':'
1536 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1537 dbgs() << " to "; PDiff.dump(*TRI););
1538 }
1539 } else {
1540 assert(P.LaneMask.any());
1541 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1542 // This may be called before CurrentBottom has been initialized. However,
1543 // BotRPTracker must have a valid position. We want the value live into the
1544 // instruction or live out of the block, so ask for the previous
1545 // instruction's live-out.
1546 const LiveInterval &LI = LIS->getInterval(Reg);
1547 VNInfo *VNI;
1550 if (I == BB->end())
1551 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1552 else {
1554 VNI = LRQ.valueIn();
1555 }
1556 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1557 assert(VNI && "No live value at use.");
1558 for (const VReg2SUnit &V2SU
1559 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1560 SUnit *SU = V2SU.SU;
1561 // If this use comes before the reaching def, it cannot be a last use,
1562 // so decrease its pressure change.
1563 if (!SU->isScheduled && SU != &ExitSU) {
1564 LiveQueryResult LRQ =
1566 if (LRQ.valueIn() == VNI) {
1567 PressureDiff &PDiff = getPressureDiff(SU);
1568 PDiff.addPressureChange(Reg, true, &MRI);
1569 if (llvm::any_of(PDiff, [](const PressureChange &Change) {
1570 return Change.isValid();
1571 }))
1572 LLVM_DEBUG(dbgs() << " UpdateRegPressure: SU(" << SU->NodeNum
1573 << ") " << *SU->getInstr();
1574 dbgs() << " to ";
1575 PDiff.dump(*TRI););
1576 }
1577 }
1578 }
1579 }
1580 }
1581}
1582
1584#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1585 if (EntrySU.getInstr() != nullptr)
1587 for (const SUnit &SU : SUnits) {
1588 dumpNodeAll(SU);
1589 if (ShouldTrackPressure) {
1590 dbgs() << " Pressure Diff : ";
1591 getPressureDiff(&SU).dump(*TRI);
1592 }
1593 dbgs() << " Single Issue : ";
1594 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1595 SchedModel.mustEndGroup(SU.getInstr()))
1596 dbgs() << "true;";
1597 else
1598 dbgs() << "false;";
1599 dbgs() << '\n';
1600 }
1601 if (ExitSU.getInstr() != nullptr)
1603#endif
1604}
1605
1606/// schedule - Called back from MachineScheduler::runOnMachineFunction
1607/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1608/// only includes instructions that have DAG nodes, not scheduling boundaries.
1609///
1610/// This is a skeletal driver, with all the functionality pushed into helpers,
1611/// so that it can be easily extended by experimental schedulers. Generally,
1612/// implementing MachineSchedStrategy should be sufficient to implement a new
1613/// scheduling algorithm. However, if a scheduler further subclasses
1614/// ScheduleDAGMILive then it will want to override this virtual method in order
1615/// to update any specialized state.
1617 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1618 LLVM_DEBUG(SchedImpl->dumpPolicy());
1620
1622
1623 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1624 findRootsAndBiasEdges(TopRoots, BotRoots);
1625
1626 // Initialize the strategy before modifying the DAG.
1627 // This may initialize a DFSResult to be used for queue priority.
1628 SchedImpl->initialize(this);
1629
1630 LLVM_DEBUG(dump());
1631 if (PrintDAGs) dump();
1633
1634 // Initialize ready queues now that the DAG and priority data are finalized.
1635 initQueues(TopRoots, BotRoots);
1636
1637 bool IsTopNode = false;
1638 while (true) {
1639 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1640 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1641 if (!SU) break;
1642
1643 assert(!SU->isScheduled && "Node already scheduled");
1644 if (!checkSchedLimit())
1645 break;
1646
1647 scheduleMI(SU, IsTopNode);
1648
1649 if (DFSResult) {
1650 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1651 if (!ScheduledTrees.test(SubtreeID)) {
1652 ScheduledTrees.set(SubtreeID);
1653 DFSResult->scheduleTree(SubtreeID);
1654 SchedImpl->scheduleTree(SubtreeID);
1655 }
1656 }
1657
1658 // Notify the scheduling strategy after updating the DAG.
1659 SchedImpl->schedNode(SU, IsTopNode);
1660
1661 updateQueues(SU, IsTopNode);
1662 }
1663 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1664
1666
1667 LLVM_DEBUG({
1668 dbgs() << "*** Final schedule for "
1669 << printMBBReference(*begin()->getParent()) << " ***\n";
1670 dumpSchedule();
1671 dbgs() << '\n';
1672 });
1673}
1674
1675/// Build the DAG and setup three register pressure trackers.
1677 if (!ShouldTrackPressure) {
1678 RPTracker.reset();
1679 RegionCriticalPSets.clear();
1681 return;
1682 }
1683
1684 // Initialize the register pressure tracker used by buildSchedGraph.
1686 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1687
1688 // Account for liveness generate by the region boundary.
1689 if (LiveRegionEnd != RegionEnd)
1690 RPTracker.recede();
1691
1692 // Build the DAG, and compute current register pressure.
1694
1695 // Initialize top/bottom trackers after computing region pressure.
1697}
1698
1700 if (!DFSResult)
1701 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1702 DFSResult->clear();
1704 DFSResult->resize(SUnits.size());
1707}
1708
1709/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1710/// only provides the critical path for single block loops. To handle loops that
1711/// span blocks, we could use the vreg path latencies provided by
1712/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1713/// available for use in the scheduler.
1714///
1715/// The cyclic path estimation identifies a def-use pair that crosses the back
1716/// edge and considers the depth and height of the nodes. For example, consider
1717/// the following instruction sequence where each instruction has unit latency
1718/// and defines an eponymous virtual register:
1719///
1720/// a->b(a,c)->c(b)->d(c)->exit
1721///
1722/// The cyclic critical path is a two cycles: b->c->b
1723/// The acyclic critical path is four cycles: a->b->c->d->exit
1724/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1725/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1726/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1727/// LiveInDepth = depth(b) = len(a->b) = 1
1728///
1729/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1730/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1731/// CyclicCriticalPath = min(2, 2) = 2
1732///
1733/// This could be relevant to PostRA scheduling, but is currently implemented
1734/// assuming LiveIntervals.
1736 // This only applies to single block loop.
1737 if (!BB->isSuccessor(BB))
1738 return 0;
1739
1740 unsigned MaxCyclicLatency = 0;
1741 // Visit each live out vreg def to find def/use pairs that cross iterations.
1743 Register Reg = P.RegUnit;
1744 if (!Reg.isVirtual())
1745 continue;
1746 const LiveInterval &LI = LIS->getInterval(Reg);
1747 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1748 if (!DefVNI)
1749 continue;
1750
1752 const SUnit *DefSU = getSUnit(DefMI);
1753 if (!DefSU)
1754 continue;
1755
1756 unsigned LiveOutHeight = DefSU->getHeight();
1757 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1758 // Visit all local users of the vreg def.
1759 for (const VReg2SUnit &V2SU
1760 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1761 SUnit *SU = V2SU.SU;
1762 if (SU == &ExitSU)
1763 continue;
1764
1765 // Only consider uses of the phi.
1767 if (!LRQ.valueIn()->isPHIDef())
1768 continue;
1769
1770 // Assume that a path spanning two iterations is a cycle, which could
1771 // overestimate in strange cases. This allows cyclic latency to be
1772 // estimated as the minimum slack of the vreg's depth or height.
1773 unsigned CyclicLatency = 0;
1774 if (LiveOutDepth > SU->getDepth())
1775 CyclicLatency = LiveOutDepth - SU->getDepth();
1776
1777 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1778 if (LiveInHeight > LiveOutHeight) {
1779 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1780 CyclicLatency = LiveInHeight - LiveOutHeight;
1781 } else
1782 CyclicLatency = 0;
1783
1784 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1785 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1786 if (CyclicLatency > MaxCyclicLatency)
1787 MaxCyclicLatency = CyclicLatency;
1788 }
1789 }
1790 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1791 return MaxCyclicLatency;
1792}
1793
1794/// Release ExitSU predecessors and setup scheduler queues. Re-position
1795/// the Top RP tracker in case the region beginning has changed.
1797 ArrayRef<SUnit*> BotRoots) {
1798 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1799 if (ShouldTrackPressure) {
1800 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1802 }
1803}
1804
1805/// Move an instruction and update register pressure.
1806void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1807 // Move the instruction to its new location in the instruction stream.
1808 MachineInstr *MI = SU->getInstr();
1809
1810 if (IsTopNode) {
1811 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1812 if (&*CurrentTop == MI)
1814 else {
1817 }
1818
1819 if (ShouldTrackPressure) {
1820 // Update top scheduled pressure.
1821 RegisterOperands RegOpers;
1822 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1823 /*IgnoreDead=*/false);
1825 // Adjust liveness and add missing dead+read-undef flags.
1826 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1827 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1828 } else {
1829 // Adjust for missing dead-def flags.
1830 RegOpers.detectDeadDefs(*MI, *LIS);
1831 }
1832
1833 TopRPTracker.advance(RegOpers);
1834 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1835 LLVM_DEBUG(dbgs() << "Top Pressure: "; dumpRegSetPressure(
1837
1839 }
1840 } else {
1841 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1844 if (&*priorII == MI)
1845 CurrentBottom = priorII;
1846 else {
1847 if (&*CurrentTop == MI) {
1848 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1850 }
1852 CurrentBottom = MI;
1854 }
1855 if (ShouldTrackPressure) {
1856 RegisterOperands RegOpers;
1857 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks,
1858 /*IgnoreDead=*/false);
1860 // Adjust liveness and add missing dead+read-undef flags.
1861 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1862 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1863 } else {
1864 // Adjust for missing dead-def flags.
1865 RegOpers.detectDeadDefs(*MI, *LIS);
1866 }
1867
1871 BotRPTracker.recede(RegOpers, &LiveUses);
1872 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1873 LLVM_DEBUG(dbgs() << "Bottom Pressure: "; dumpRegSetPressure(
1875
1877 updatePressureDiffs(LiveUses);
1878 }
1879 }
1880}
1881
1882//===----------------------------------------------------------------------===//
1883// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1884//===----------------------------------------------------------------------===//
1885
1886namespace {
1887
1888/// Post-process the DAG to create cluster edges between neighboring
1889/// loads or between neighboring stores.
1890class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1891 struct MemOpInfo {
1892 SUnit *SU;
1894 int64_t Offset;
1895 LocationSize Width;
1896 bool OffsetIsScalable;
1897
1898 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1899 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1900 : SU(SU), BaseOps(BaseOps), Offset(Offset), Width(Width),
1901 OffsetIsScalable(OffsetIsScalable) {}
1902
1903 static bool Compare(const MachineOperand *const &A,
1904 const MachineOperand *const &B) {
1905 if (A->getType() != B->getType())
1906 return A->getType() < B->getType();
1907 if (A->isReg())
1908 return A->getReg() < B->getReg();
1909 if (A->isFI()) {
1910 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1912 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1914 return StackGrowsDown ? A->getIndex() > B->getIndex()
1915 : A->getIndex() < B->getIndex();
1916 }
1917
1918 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1919 "index bases.");
1920 }
1921
1922 bool operator<(const MemOpInfo &RHS) const {
1923 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1924 // comparison instead when it's available.
1925 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1926 RHS.BaseOps.begin(), RHS.BaseOps.end(),
1927 Compare))
1928 return true;
1929 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1930 BaseOps.begin(), BaseOps.end(), Compare))
1931 return false;
1932 if (Offset != RHS.Offset)
1933 return Offset < RHS.Offset;
1934 return SU->NodeNum < RHS.SU->NodeNum;
1935 }
1936 };
1937
1938 const TargetInstrInfo *TII;
1939 const TargetRegisterInfo *TRI;
1940 bool IsLoad;
1941 bool ReorderWhileClustering;
1942
1943public:
1944 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1945 const TargetRegisterInfo *tri, bool IsLoad,
1946 bool ReorderWhileClustering)
1947 : TII(tii), TRI(tri), IsLoad(IsLoad),
1948 ReorderWhileClustering(ReorderWhileClustering) {}
1949
1950 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1951
1952protected:
1953 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1954 ScheduleDAGInstrs *DAG);
1955 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1956 SmallVectorImpl<MemOpInfo> &MemOpRecords);
1957 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1959};
1960
1961class StoreClusterMutation : public BaseMemOpClusterMutation {
1962public:
1963 StoreClusterMutation(const TargetInstrInfo *tii,
1964 const TargetRegisterInfo *tri,
1965 bool ReorderWhileClustering)
1966 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1967};
1968
1969class LoadClusterMutation : public BaseMemOpClusterMutation {
1970public:
1971 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1972 bool ReorderWhileClustering)
1973 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1974};
1975
1976} // end anonymous namespace
1977
1978namespace llvm {
1979
1980std::unique_ptr<ScheduleDAGMutation>
1982 const TargetRegisterInfo *TRI,
1983 bool ReorderWhileClustering) {
1984 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1985 TII, TRI, ReorderWhileClustering)
1986 : nullptr;
1987}
1988
1989std::unique_ptr<ScheduleDAGMutation>
1991 const TargetRegisterInfo *TRI,
1992 bool ReorderWhileClustering) {
1993 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1994 TII, TRI, ReorderWhileClustering)
1995 : nullptr;
1996}
1997
1998} // end namespace llvm
1999
2000// Sorting all the loads/stores first, then for each load/store, checking the
2001// following load/store one by one, until reach the first non-dependent one and
2002// call target hook to see if they can cluster.
2003// If FastCluster is enabled, we assume that, all the loads/stores have been
2004// preprocessed and now, they didn't have dependencies on each other.
2005void BaseMemOpClusterMutation::clusterNeighboringMemOps(
2006 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
2007 ScheduleDAGInstrs *DAG) {
2008 // Keep track of the current cluster length and bytes for each SUnit.
2010
2011 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
2012 // cluster mem ops collected within `MemOpRecords` array.
2013 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
2014 // Decision to cluster mem ops is taken based on target dependent logic
2015 auto MemOpa = MemOpRecords[Idx];
2016
2017 // Seek for the next load/store to do the cluster.
2018 unsigned NextIdx = Idx + 1;
2019 for (; NextIdx < End; ++NextIdx)
2020 // Skip if MemOpb has been clustered already or has dependency with
2021 // MemOpa.
2022 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
2023 (FastCluster ||
2024 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
2025 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
2026 break;
2027 if (NextIdx == End)
2028 continue;
2029
2030 auto MemOpb = MemOpRecords[NextIdx];
2031 unsigned ClusterLength = 2;
2032 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
2033 MemOpb.Width.getValue().getKnownMinValue();
2034 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
2035 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
2036 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
2037 MemOpb.Width.getValue().getKnownMinValue();
2038 }
2039
2040 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
2041 MemOpa.OffsetIsScalable, MemOpb.BaseOps,
2042 MemOpb.Offset, MemOpb.OffsetIsScalable,
2043 ClusterLength, CurrentClusterBytes))
2044 continue;
2045
2046 SUnit *SUa = MemOpa.SU;
2047 SUnit *SUb = MemOpb.SU;
2048 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
2049 std::swap(SUa, SUb);
2050
2051 // FIXME: Is this check really required?
2052 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
2053 continue;
2054
2055 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
2056 << SUb->NodeNum << ")\n");
2057 ++NumClustered;
2058
2059 if (IsLoad) {
2060 // Copy successor edges from SUa to SUb. Interleaving computation
2061 // dependent on SUa can prevent load combining due to register reuse.
2062 // Predecessor edges do not need to be copied from SUb to SUa since
2063 // nearby loads should have effectively the same inputs.
2064 for (const SDep &Succ : SUa->Succs) {
2065 if (Succ.getSUnit() == SUb)
2066 continue;
2067 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
2068 << ")\n");
2069 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
2070 }
2071 } else {
2072 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
2073 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
2074 // do not need to be copied from SUa to SUb since no one will depend
2075 // on stores.
2076 // Notice that, we don't need to care about the memory dependency as
2077 // we won't try to cluster them if they have any memory dependency.
2078 for (const SDep &Pred : SUb->Preds) {
2079 if (Pred.getSUnit() == SUa)
2080 continue;
2081 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
2082 << ")\n");
2083 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
2084 }
2085 }
2086
2087 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
2088 CurrentClusterBytes};
2089
2090 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
2091 << ", Curr cluster bytes: " << CurrentClusterBytes
2092 << "\n");
2093 }
2094}
2095
2096void BaseMemOpClusterMutation::collectMemOpRecords(
2097 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
2098 for (auto &SU : SUnits) {
2099 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
2100 (!IsLoad && !SU.getInstr()->mayStore()))
2101 continue;
2102
2103 const MachineInstr &MI = *SU.getInstr();
2105 int64_t Offset;
2106 bool OffsetIsScalable;
2107 LocationSize Width = 0;
2109 OffsetIsScalable, Width, TRI)) {
2110 if (!Width.hasValue())
2111 continue;
2112
2113 MemOpRecords.push_back(
2114 MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
2115
2116 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
2117 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
2118 << ", Width: " << Width << "\n");
2119 }
2120#ifndef NDEBUG
2121 for (const auto *Op : BaseOps)
2122 assert(Op);
2123#endif
2124 }
2125}
2126
2127bool BaseMemOpClusterMutation::groupMemOps(
2130 bool FastCluster =
2132 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
2133
2134 for (const auto &MemOp : MemOps) {
2135 unsigned ChainPredID = DAG->SUnits.size();
2136 if (FastCluster) {
2137 for (const SDep &Pred : MemOp.SU->Preds) {
2138 // We only want to cluster the mem ops that have the same ctrl(non-data)
2139 // pred so that they didn't have ctrl dependency for each other. But for
2140 // store instrs, we can still cluster them if the pred is load instr.
2141 if ((Pred.isCtrl() &&
2142 (IsLoad ||
2143 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
2144 !Pred.isArtificial()) {
2145 ChainPredID = Pred.getSUnit()->NodeNum;
2146 break;
2147 }
2148 }
2149 } else
2150 ChainPredID = 0;
2151
2152 Groups[ChainPredID].push_back(MemOp);
2153 }
2154 return FastCluster;
2155}
2156
2157/// Callback from DAG postProcessing to create cluster edges for loads/stores.
2158void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
2159 // Collect all the clusterable loads/stores
2160 SmallVector<MemOpInfo, 32> MemOpRecords;
2161 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2162
2163 if (MemOpRecords.size() < 2)
2164 return;
2165
2166 // Put the loads/stores without dependency into the same group with some
2167 // heuristic if the DAG is too complex to avoid compiling time blow up.
2168 // Notice that, some fusion pair could be lost with this.
2170 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2171
2172 for (auto &Group : Groups) {
2173 // Sorting the loads/stores, so that, we can stop the cluster as early as
2174 // possible.
2175 llvm::sort(Group.second);
2176
2177 // Trying to cluster all the neighboring loads/stores.
2178 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2179 }
2180}
2181
2182//===----------------------------------------------------------------------===//
2183// CopyConstrain - DAG post-processing to encourage copy elimination.
2184//===----------------------------------------------------------------------===//
2185
2186namespace {
2187
2188/// Post-process the DAG to create weak edges from all uses of a copy to
2189/// the one use that defines the copy's source vreg, most likely an induction
2190/// variable increment.
2191class CopyConstrain : public ScheduleDAGMutation {
2192 // Transient state.
2193 SlotIndex RegionBeginIdx;
2194
2195 // RegionEndIdx is the slot index of the last non-debug instruction in the
2196 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2197 SlotIndex RegionEndIdx;
2198
2199public:
2200 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2201
2202 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2203
2204protected:
2205 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2206};
2207
2208} // end anonymous namespace
2209
2210namespace llvm {
2211
2212std::unique_ptr<ScheduleDAGMutation>
2214 const TargetRegisterInfo *TRI) {
2215 return std::make_unique<CopyConstrain>(TII, TRI);
2216}
2217
2218} // end namespace llvm
2219
2220/// constrainLocalCopy handles two possibilities:
2221/// 1) Local src:
2222/// I0: = dst
2223/// I1: src = ...
2224/// I2: = dst
2225/// I3: dst = src (copy)
2226/// (create pred->succ edges I0->I1, I2->I1)
2227///
2228/// 2) Local copy:
2229/// I0: dst = src (copy)
2230/// I1: = dst
2231/// I2: src = ...
2232/// I3: = dst
2233/// (create pred->succ edges I1->I2, I3->I2)
2234///
2235/// Although the MachineScheduler is currently constrained to single blocks,
2236/// this algorithm should handle extended blocks. An EBB is a set of
2237/// contiguously numbered blocks such that the previous block in the EBB is
2238/// always the single predecessor.
2239void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2240 LiveIntervals *LIS = DAG->getLIS();
2241 MachineInstr *Copy = CopySU->getInstr();
2242
2243 // Check for pure vreg copies.
2244 const MachineOperand &SrcOp = Copy->getOperand(1);
2245 Register SrcReg = SrcOp.getReg();
2246 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2247 return;
2248
2249 const MachineOperand &DstOp = Copy->getOperand(0);
2250 Register DstReg = DstOp.getReg();
2251 if (!DstReg.isVirtual() || DstOp.isDead())
2252 return;
2253
2254 // Check if either the dest or source is local. If it's live across a back
2255 // edge, it's not local. Note that if both vregs are live across the back
2256 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2257 // If both the copy's source and dest are local live intervals, then we
2258 // should treat the dest as the global for the purpose of adding
2259 // constraints. This adds edges from source's other uses to the copy.
2260 unsigned LocalReg = SrcReg;
2261 unsigned GlobalReg = DstReg;
2262 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2263 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2264 LocalReg = DstReg;
2265 GlobalReg = SrcReg;
2266 LocalLI = &LIS->getInterval(LocalReg);
2267 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2268 return;
2269 }
2270 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2271
2272 // Find the global segment after the start of the local LI.
2273 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2274 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2275 // local live range. We could create edges from other global uses to the local
2276 // start, but the coalescer should have already eliminated these cases, so
2277 // don't bother dealing with it.
2278 if (GlobalSegment == GlobalLI->end())
2279 return;
2280
2281 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2282 // returned the next global segment. But if GlobalSegment overlaps with
2283 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2284 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2285 if (GlobalSegment->contains(LocalLI->beginIndex()))
2286 ++GlobalSegment;
2287
2288 if (GlobalSegment == GlobalLI->end())
2289 return;
2290
2291 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2292 if (GlobalSegment != GlobalLI->begin()) {
2293 // Two address defs have no hole.
2294 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2295 GlobalSegment->start)) {
2296 return;
2297 }
2298 // If the prior global segment may be defined by the same two-address
2299 // instruction that also defines LocalLI, then can't make a hole here.
2300 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2301 LocalLI->beginIndex())) {
2302 return;
2303 }
2304 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2305 // it would be a disconnected component in the live range.
2306 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2307 "Disconnected LRG within the scheduling region.");
2308 }
2309 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2310 if (!GlobalDef)
2311 return;
2312
2313 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2314 if (!GlobalSU)
2315 return;
2316
2317 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2318 // constraining the uses of the last local def to precede GlobalDef.
2319 SmallVector<SUnit*,8> LocalUses;
2320 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2321 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2322 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2323 for (const SDep &Succ : LastLocalSU->Succs) {
2324 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2325 continue;
2326 if (Succ.getSUnit() == GlobalSU)
2327 continue;
2328 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2329 return;
2330 LocalUses.push_back(Succ.getSUnit());
2331 }
2332 // Open the top of the GlobalLI hole by constraining any earlier global uses
2333 // to precede the start of LocalLI.
2334 SmallVector<SUnit*,8> GlobalUses;
2335 MachineInstr *FirstLocalDef =
2336 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2337 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2338 for (const SDep &Pred : GlobalSU->Preds) {
2339 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2340 continue;
2341 if (Pred.getSUnit() == FirstLocalSU)
2342 continue;
2343 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2344 return;
2345 GlobalUses.push_back(Pred.getSUnit());
2346 }
2347 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2348 // Add the weak edges.
2349 for (SUnit *LU : LocalUses) {
2350 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2351 << GlobalSU->NodeNum << ")\n");
2352 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2353 }
2354 for (SUnit *GU : GlobalUses) {
2355 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2356 << FirstLocalSU->NodeNum << ")\n");
2357 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2358 }
2359}
2360
2361/// Callback from DAG postProcessing to create weak edges to encourage
2362/// copy elimination.
2363void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2364 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2365 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2366
2367 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2368 if (FirstPos == DAG->end())
2369 return;
2370 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2371 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2372 *priorNonDebug(DAG->end(), DAG->begin()));
2373
2374 for (SUnit &SU : DAG->SUnits) {
2375 if (!SU.getInstr()->isCopy())
2376 continue;
2377
2378 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2379 }
2380}
2381
2382//===----------------------------------------------------------------------===//
2383// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2384// and possibly other custom schedulers.
2385//===----------------------------------------------------------------------===//
2386
2387static const unsigned InvalidCycle = ~0U;
2388
2390
2391/// Given a Count of resource usage and a Latency value, return true if a
2392/// SchedBoundary becomes resource limited.
2393/// If we are checking after scheduling a node, we should return true when
2394/// we just reach the resource limit.
2395static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2396 unsigned Latency, bool AfterSchedNode) {
2397 int ResCntFactor = (int)(Count - (Latency * LFactor));
2398 if (AfterSchedNode)
2399 return ResCntFactor >= (int)LFactor;
2400 else
2401 return ResCntFactor > (int)LFactor;
2402}
2403
2405 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2406 // Destroying and reconstructing it is very expensive though. So keep
2407 // invalid, placeholder HazardRecs.
2408 if (HazardRec && HazardRec->isEnabled()) {
2409 delete HazardRec;
2410 HazardRec = nullptr;
2411 }
2412 Available.clear();
2413 Pending.clear();
2414 CheckPending = false;
2415 CurrCycle = 0;
2416 CurrMOps = 0;
2417 MinReadyCycle = std::numeric_limits<unsigned>::max();
2418 ExpectedLatency = 0;
2419 DependentLatency = 0;
2420 RetiredMOps = 0;
2421 MaxExecutedResCount = 0;
2422 ZoneCritResIdx = 0;
2423 IsResourceLimited = false;
2424 ReservedCycles.clear();
2425 ReservedResourceSegments.clear();
2426 ReservedCyclesIndex.clear();
2427 ResourceGroupSubUnitMasks.clear();
2428#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2429 // Track the maximum number of stall cycles that could arise either from the
2430 // latency of a DAG edge or the number of cycles that a processor resource is
2431 // reserved (SchedBoundary::ReservedCycles).
2432 MaxObservedStall = 0;
2433#endif
2434 // Reserve a zero-count for invalid CritResIdx.
2435 ExecutedResCounts.resize(1);
2436 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2437}
2438
2440init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2441 reset();
2442 if (!SchedModel->hasInstrSchedModel())
2443 return;
2445 for (SUnit &SU : DAG->SUnits) {
2446 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2447 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2448 * SchedModel->getMicroOpFactor();
2450 PI = SchedModel->getWriteProcResBegin(SC),
2451 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2452 unsigned PIdx = PI->ProcResourceIdx;
2453 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2454 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2455 RemainingCounts[PIdx] +=
2456 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2457 }
2458 }
2459}
2460
2462init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2463 reset();
2464 DAG = dag;
2465 SchedModel = smodel;
2466 Rem = rem;
2468 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2469 ReservedCyclesIndex.resize(ResourceCount);
2470 ExecutedResCounts.resize(ResourceCount);
2471 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2472 unsigned NumUnits = 0;
2473
2474 for (unsigned i = 0; i < ResourceCount; ++i) {
2475 ReservedCyclesIndex[i] = NumUnits;
2476 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2477 if (isUnbufferedGroup(i)) {
2478 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2479 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2480 U != UE; ++U)
2481 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2482 }
2483 }
2484
2485 ReservedCycles.resize(NumUnits, InvalidCycle);
2486 }
2487}
2488
2489/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2490/// these "soft stalls" differently than the hard stall cycles based on CPU
2491/// resources and computed by checkHazard(). A fully in-order model
2492/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2493/// available for scheduling until they are ready. However, a weaker in-order
2494/// model may use this for heuristics. For example, if a processor has in-order
2495/// behavior when reading certain resources, this may come into play.
2497 if (!SU->isUnbuffered)
2498 return 0;
2499
2500 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2501 if (ReadyCycle > CurrCycle)
2502 return ReadyCycle - CurrCycle;
2503 return 0;
2504}
2505
2506/// Compute the next cycle at which the given processor resource unit
2507/// can be scheduled.
2509 unsigned ReleaseAtCycle,
2510 unsigned AcquireAtCycle) {
2512 if (isTop())
2513 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2514 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2515
2516 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2517 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2518 }
2519
2520 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2521 // If this resource has never been used, always return cycle zero.
2522 if (NextUnreserved == InvalidCycle)
2523 return CurrCycle;
2524 // For bottom-up scheduling add the cycles needed for the current operation.
2525 if (!isTop())
2526 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2527 return NextUnreserved;
2528}
2529
2530/// Compute the next cycle at which the given processor resource can be
2531/// scheduled. Returns the next cycle and the index of the processor resource
2532/// instance in the reserved cycles vector.
2533std::pair<unsigned, unsigned>
2535 unsigned ReleaseAtCycle,
2536 unsigned AcquireAtCycle) {
2538 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2540 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2541 }
2542 unsigned MinNextUnreserved = InvalidCycle;
2543 unsigned InstanceIdx = 0;
2544 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2545 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2546 assert(NumberOfInstances > 0 &&
2547 "Cannot have zero instances of a ProcResource");
2548
2549 if (isUnbufferedGroup(PIdx)) {
2550 // If any subunits are used by the instruction, report that the
2551 // subunits of the resource group are available at the first cycle
2552 // in which the unit is available, effectively removing the group
2553 // record from hazarding and basing the hazarding decisions on the
2554 // subunit records. Otherwise, choose the first available instance
2555 // from among the subunits. Specifications which assign cycles to
2556 // both the subunits and the group or which use an unbuffered
2557 // group with buffered subunits will appear to schedule
2558 // strangely. In the first case, the additional cycles for the
2559 // group will be ignored. In the second, the group will be
2560 // ignored entirely.
2561 for (const MCWriteProcResEntry &PE :
2564 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2565 return std::make_pair(getNextResourceCycleByInstance(
2566 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2567 StartIndex);
2568
2569 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2570 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2571 unsigned NextUnreserved, NextInstanceIdx;
2572 std::tie(NextUnreserved, NextInstanceIdx) =
2573 getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2574 if (MinNextUnreserved > NextUnreserved) {
2575 InstanceIdx = NextInstanceIdx;
2576 MinNextUnreserved = NextUnreserved;
2577 }
2578 }
2579 return std::make_pair(MinNextUnreserved, InstanceIdx);
2580 }
2581
2582 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2583 ++I) {
2584 unsigned NextUnreserved =
2585 getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2587 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2588 << NextUnreserved << "c\n");
2589 if (MinNextUnreserved > NextUnreserved) {
2590 InstanceIdx = I;
2591 MinNextUnreserved = NextUnreserved;
2592 }
2593 }
2595 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2596 << "[" << InstanceIdx - StartIndex << "]"
2597 << " available @" << MinNextUnreserved << "c"
2598 << "\n");
2599 return std::make_pair(MinNextUnreserved, InstanceIdx);
2600}
2601
2602/// Does this SU have a hazard within the current instruction group.
2603///
2604/// The scheduler supports two modes of hazard recognition. The first is the
2605/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2606/// supports highly complicated in-order reservation tables
2607/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2608///
2609/// The second is a streamlined mechanism that checks for hazards based on
2610/// simple counters that the scheduler itself maintains. It explicitly checks
2611/// for instruction dispatch limitations, including the number of micro-ops that
2612/// can dispatch per cycle.
2613///
2614/// TODO: Also check whether the SU must start a new group.
2616 if (HazardRec->isEnabled()
2618 return true;
2619 }
2620
2621 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2622 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2623 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2624 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2625 return true;
2626 }
2627
2628 if (CurrMOps > 0 &&
2629 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2630 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2631 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2632 << (isTop() ? "begin" : "end") << " group\n");
2633 return true;
2634 }
2635
2637 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2638 for (const MCWriteProcResEntry &PE :
2641 unsigned ResIdx = PE.ProcResourceIdx;
2642 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2643 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2644 unsigned NRCycle, InstanceIdx;
2645 std::tie(NRCycle, InstanceIdx) =
2646 getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2647 if (NRCycle > CurrCycle) {
2648#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2649 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2650#endif
2651 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2652 << SchedModel->getResourceName(ResIdx)
2653 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2654 << "=" << NRCycle << "c\n");
2655 return true;
2656 }
2657 }
2658 }
2659 return false;
2660}
2661
2662// Find the unscheduled node in ReadySUs with the highest latency.
2665 SUnit *LateSU = nullptr;
2666 unsigned RemLatency = 0;
2667 for (SUnit *SU : ReadySUs) {
2668 unsigned L = getUnscheduledLatency(SU);
2669 if (L > RemLatency) {
2670 RemLatency = L;
2671 LateSU = SU;
2672 }
2673 }
2674 if (LateSU) {
2675 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2676 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2677 }
2678 return RemLatency;
2679}
2680
2681// Count resources in this zone and the remaining unscheduled
2682// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2683// resource index, or zero if the zone is issue limited.
2685getOtherResourceCount(unsigned &OtherCritIdx) {
2686 OtherCritIdx = 0;
2688 return 0;
2689
2690 unsigned OtherCritCount = Rem->RemIssueCount
2691 + (RetiredMOps * SchedModel->getMicroOpFactor());
2692 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2693 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2694 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2695 PIdx != PEnd; ++PIdx) {
2696 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2697 if (OtherCount > OtherCritCount) {
2698 OtherCritCount = OtherCount;
2699 OtherCritIdx = PIdx;
2700 }
2701 }
2702 if (OtherCritIdx) {
2703 LLVM_DEBUG(
2704 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2705 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2706 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2707 }
2708 return OtherCritCount;
2709}
2710
2711void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2712 unsigned Idx) {
2713 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2714
2715#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2716 // ReadyCycle was been bumped up to the CurrCycle when this node was
2717 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2718 // scheduling, so may now be greater than ReadyCycle.
2719 if (ReadyCycle > CurrCycle)
2720 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2721#endif
2722
2723 if (ReadyCycle < MinReadyCycle)
2724 MinReadyCycle = ReadyCycle;
2725
2726 // Check for interlocks first. For the purpose of other heuristics, an
2727 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2728 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2729 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2731
2732 if (!HazardDetected) {
2733 Available.push(SU);
2734
2735 if (InPQueue)
2737 return;
2738 }
2739
2740 if (!InPQueue)
2741 Pending.push(SU);
2742}
2743
2744/// Move the boundary of scheduled code by one cycle.
2745void SchedBoundary::bumpCycle(unsigned NextCycle) {
2746 if (SchedModel->getMicroOpBufferSize() == 0) {
2747 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2748 "MinReadyCycle uninitialized");
2749 if (MinReadyCycle > NextCycle)
2750 NextCycle = MinReadyCycle;
2751 }
2752 // Update the current micro-ops, which will issue in the next cycle.
2753 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2754 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2755
2756 // Decrement DependentLatency based on the next cycle.
2757 if ((NextCycle - CurrCycle) > DependentLatency)
2758 DependentLatency = 0;
2759 else
2760 DependentLatency -= (NextCycle - CurrCycle);
2761
2762 if (!HazardRec->isEnabled()) {
2763 // Bypass HazardRec virtual calls.
2764 CurrCycle = NextCycle;
2765 } else {
2766 // Bypass getHazardType calls in case of long latency.
2767 for (; CurrCycle != NextCycle; ++CurrCycle) {
2768 if (isTop())
2770 else
2772 }
2773 }
2774 CheckPending = true;
2775 IsResourceLimited =
2777 getScheduledLatency(), true);
2778
2779 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2780 << '\n');
2781}
2782
2783void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2784 ExecutedResCounts[PIdx] += Count;
2785 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2786 MaxExecutedResCount = ExecutedResCounts[PIdx];
2787}
2788
2789/// Add the given processor resource to this scheduled zone.
2790///
2791/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2792/// cycles during which this resource is released.
2793///
2794/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2795/// cycles at which the resource is aquired after issue (assuming no stalls).
2796///
2797/// \return the next cycle at which the instruction may execute without
2798/// oversubscribing resources.
2799unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2800 unsigned ReleaseAtCycle,
2801 unsigned NextCycle,
2802 unsigned AcquireAtCycle) {
2803 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2804 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2805 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2806 << ReleaseAtCycle << "x" << Factor << "u\n");
2807
2808 // Update Executed resources counts.
2809 incExecutedResources(PIdx, Count);
2810 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2811 Rem->RemainingCounts[PIdx] -= Count;
2812
2813 // Check if this resource exceeds the current critical resource. If so, it
2814 // becomes the critical resource.
2815 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2816 ZoneCritResIdx = PIdx;
2817 LLVM_DEBUG(dbgs() << " *** Critical resource "
2818 << SchedModel->getResourceName(PIdx) << ": "
2820 << "c\n");
2821 }
2822 // For reserved resources, record the highest cycle using the resource.
2823 unsigned NextAvailable, InstanceIdx;
2824 std::tie(NextAvailable, InstanceIdx) =
2825 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2826 if (NextAvailable > CurrCycle) {
2827 LLVM_DEBUG(dbgs() << " Resource conflict: "
2828 << SchedModel->getResourceName(PIdx)
2829 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2830 << " reserved until @" << NextAvailable << "\n");
2831 }
2832 return NextAvailable;
2833}
2834
2835/// Move the boundary of scheduled code by one SUnit.
2837 // Update the reservation table.
2838 if (HazardRec->isEnabled()) {
2839 if (!isTop() && SU->isCall) {
2840 // Calls are scheduled with their preceding instructions. For bottom-up
2841 // scheduling, clear the pipeline state before emitting.
2842 HazardRec->Reset();
2843 }
2845 // Scheduling an instruction may have made pending instructions available.
2846 CheckPending = true;
2847 }
2848 // checkHazard should prevent scheduling multiple instructions per cycle that
2849 // exceed the issue width.
2850 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2851 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2852 assert(
2853 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2854 "Cannot schedule this instruction's MicroOps in the current cycle.");
2855
2856 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2857 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2858
2859 unsigned NextCycle = CurrCycle;
2860 switch (SchedModel->getMicroOpBufferSize()) {
2861 case 0:
2862 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2863 break;
2864 case 1:
2865 if (ReadyCycle > NextCycle) {
2866 NextCycle = ReadyCycle;
2867 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2868 }
2869 break;
2870 default:
2871 // We don't currently model the OOO reorder buffer, so consider all
2872 // scheduled MOps to be "retired". We do loosely model in-order resource
2873 // latency. If this instruction uses an in-order resource, account for any
2874 // likely stall cycles.
2875 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2876 NextCycle = ReadyCycle;
2877 break;
2878 }
2879 RetiredMOps += IncMOps;
2880
2881 // Update resource counts and critical resource.
2883 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2884 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2885 Rem->RemIssueCount -= DecRemIssue;
2886 if (ZoneCritResIdx) {
2887 // Scale scheduled micro-ops for comparing with the critical resource.
2888 unsigned ScaledMOps =
2889 RetiredMOps * SchedModel->getMicroOpFactor();
2890
2891 // If scaled micro-ops are now more than the previous critical resource by
2892 // a full cycle, then micro-ops issue becomes critical.
2893 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2894 >= (int)SchedModel->getLatencyFactor()) {
2895 ZoneCritResIdx = 0;
2896 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2897 << ScaledMOps / SchedModel->getLatencyFactor()
2898 << "c\n");
2899 }
2900 }
2903 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2904 unsigned RCycle =
2905 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2906 PI->AcquireAtCycle);
2907 if (RCycle > NextCycle)
2908 NextCycle = RCycle;
2909 }
2910 if (SU->hasReservedResource) {
2911 // For reserved resources, record the highest cycle using the resource.
2912 // For top-down scheduling, this is the cycle in which we schedule this
2913 // instruction plus the number of cycles the operations reserves the
2914 // resource. For bottom-up is it simply the instruction's cycle.
2917 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2918 unsigned PIdx = PI->ProcResourceIdx;
2919 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2920
2922 unsigned ReservedUntil, InstanceIdx;
2923 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2924 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2925 if (isTop()) {
2926 ReservedResourceSegments[InstanceIdx].add(
2928 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2930 } else {
2931 ReservedResourceSegments[InstanceIdx].add(
2933 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2935 }
2936 } else {
2937
2938 unsigned ReservedUntil, InstanceIdx;
2939 std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2940 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2941 if (isTop()) {
2942 ReservedCycles[InstanceIdx] =
2943 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2944 } else
2945 ReservedCycles[InstanceIdx] = NextCycle;
2946 }
2947 }
2948 }
2949 }
2950 }
2951 // Update ExpectedLatency and DependentLatency.
2952 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2953 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2954 if (SU->getDepth() > TopLatency) {
2955 TopLatency = SU->getDepth();
2956 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2957 << SU->NodeNum << ") " << TopLatency << "c\n");
2958 }
2959 if (SU->getHeight() > BotLatency) {
2960 BotLatency = SU->getHeight();
2961 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2962 << SU->NodeNum << ") " << BotLatency << "c\n");
2963 }
2964 // If we stall for any reason, bump the cycle.
2965 if (NextCycle > CurrCycle)
2966 bumpCycle(NextCycle);
2967 else
2968 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2969 // resource limited. If a stall occurred, bumpCycle does this.
2970 IsResourceLimited =
2972 getScheduledLatency(), true);
2973
2974 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2975 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2976 // one cycle. Since we commonly reach the max MOps here, opportunistically
2977 // bump the cycle to avoid uselessly checking everything in the readyQ.
2978 CurrMOps += IncMOps;
2979
2980 // Bump the cycle count for issue group constraints.
2981 // This must be done after NextCycle has been adjust for all other stalls.
2982 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2983 // currCycle to X.
2984 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2985 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2986 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2987 << " group\n");
2988 bumpCycle(++NextCycle);
2989 }
2990
2991 while (CurrMOps >= SchedModel->getIssueWidth()) {
2992 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2993 << CurrCycle << '\n');
2994 bumpCycle(++NextCycle);
2995 }
2997}
2998
2999/// Release pending ready nodes in to the available queue. This makes them
3000/// visible to heuristics.
3002 // If the available queue is empty, it is safe to reset MinReadyCycle.
3003 if (Available.empty())
3004 MinReadyCycle = std::numeric_limits<unsigned>::max();
3005
3006 // Check to see if any of the pending instructions are ready to issue. If
3007 // so, add them to the available queue.
3008 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
3009 SUnit *SU = *(Pending.begin() + I);
3010 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
3011
3012 if (ReadyCycle < MinReadyCycle)
3013 MinReadyCycle = ReadyCycle;
3014
3015 if (Available.size() >= ReadyListLimit)
3016 break;
3017
3018 releaseNode(SU, ReadyCycle, true, I);
3019 if (E != Pending.size()) {
3020 --I;
3021 --E;
3022 }
3023 }
3024 CheckPending = false;
3025}
3026
3027/// Remove SU from the ready set for this boundary.
3029 if (Available.isInQueue(SU))
3031 else {
3032 assert(Pending.isInQueue(SU) && "bad ready count");
3034 }
3035}
3036
3037/// If this queue only has one ready candidate, return it. As a side effect,
3038/// defer any nodes that now hit a hazard, and advance the cycle until at least
3039/// one node is ready. If multiple instructions are ready, return NULL.
3041 if (CheckPending)
3043
3044 // Defer any ready instrs that now have a hazard.
3046 if (checkHazard(*I)) {
3047 Pending.push(*I);
3048 I = Available.remove(I);
3049 continue;
3050 }
3051 ++I;
3052 }
3053 for (unsigned i = 0; Available.empty(); ++i) {
3054// FIXME: Re-enable assert once PR20057 is resolved.
3055// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
3056// "permanent hazard");
3057 (void)i;
3058 bumpCycle(CurrCycle + 1);
3060 }
3061
3064
3065 if (Available.size() == 1)
3066 return *Available.begin();
3067 return nullptr;
3068}
3069
3070#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3071
3072/// Dump the content of the \ref ReservedCycles vector for the
3073/// resources that are used in the basic block.
3074///
3077 return;
3078
3079 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
3080 unsigned StartIdx = 0;
3081
3082 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
3083 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
3084 std::string ResName = SchedModel->getResourceName(ResIdx);
3085 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
3086 dbgs() << ResName << "(" << UnitIdx << ") = ";
3088 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
3089 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
3090 else
3091 dbgs() << "{ }\n";
3092 } else
3093 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
3094 }
3095 StartIdx += NumUnits;
3096 }
3097}
3098
3099// This is useful information to dump after bumpNode.
3100// Note that the Queue contents are more useful before pickNodeFromQueue.
3102 unsigned ResFactor;
3103 unsigned ResCount;
3104 if (ZoneCritResIdx) {
3105 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
3106 ResCount = getResourceCount(ZoneCritResIdx);
3107 } else {
3108 ResFactor = SchedModel->getMicroOpFactor();
3109 ResCount = RetiredMOps * ResFactor;
3110 }
3111 unsigned LFactor = SchedModel->getLatencyFactor();
3112 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
3113 << " Retired: " << RetiredMOps;
3114 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
3115 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
3116 << ResCount / ResFactor << " "
3117 << SchedModel->getResourceName(ZoneCritResIdx)
3118 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
3119 << (IsResourceLimited ? " - Resource" : " - Latency")
3120 << " limited.\n";
3123}
3124#endif
3125
3126//===----------------------------------------------------------------------===//
3127// GenericScheduler - Generic implementation of MachineSchedStrategy.
3128//===----------------------------------------------------------------------===//
3129
3132 const TargetSchedModel *SchedModel) {
3134 return;
3135
3136 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
3139 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
3140 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
3141 ResDelta.CritResources += PI->ReleaseAtCycle;
3142 if (PI->ProcResourceIdx == Policy.DemandResIdx)
3143 ResDelta.DemandedResources += PI->ReleaseAtCycle;
3144 }
3145}
3146
3147/// Compute remaining latency. We need this both to determine whether the
3148/// overall schedule has become latency-limited and whether the instructions
3149/// outside this zone are resource or latency limited.
3150///
3151/// The "dependent" latency is updated incrementally during scheduling as the
3152/// max height/depth of scheduled nodes minus the cycles since it was
3153/// scheduled:
3154/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
3155///
3156/// The "independent" latency is the max ready queue depth:
3157/// ILat = max N.depth for N in Available|Pending
3158///
3159/// RemainingLatency is the greater of independent and dependent latency.
3160///
3161/// These computations are expensive, especially in DAGs with many edges, so
3162/// only do them if necessary.
3163static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3164 unsigned RemLatency = CurrZone.getDependentLatency();
3165 RemLatency = std::max(RemLatency,
3166 CurrZone.findMaxLatency(CurrZone.Available.elements()));
3167 RemLatency = std::max(RemLatency,
3168 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3169 return RemLatency;
3170}
3171
3172/// Returns true if the current cycle plus remaning latency is greater than
3173/// the critical path in the scheduling region.
3174bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3175 SchedBoundary &CurrZone,
3176 bool ComputeRemLatency,
3177 unsigned &RemLatency) const {
3178 // The current cycle is already greater than the critical path, so we are
3179 // already latency limited and don't need to compute the remaining latency.
3180 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3181 return true;
3182
3183 // If we haven't scheduled anything yet, then we aren't latency limited.
3184 if (CurrZone.getCurrCycle() == 0)
3185 return false;
3186
3187 if (ComputeRemLatency)
3188 RemLatency = computeRemLatency(CurrZone);
3189
3190 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3191}
3192
3193/// Set the CandPolicy given a scheduling zone given the current resources and
3194/// latencies inside and outside the zone.
3196 SchedBoundary &CurrZone,
3197 SchedBoundary *OtherZone) {
3198 // Apply preemptive heuristics based on the total latency and resources
3199 // inside and outside this zone. Potential stalls should be considered before
3200 // following this policy.
3201
3202 // Compute the critical resource outside the zone.
3203 unsigned OtherCritIdx = 0;
3204 unsigned OtherCount =
3205 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3206
3207 bool OtherResLimited = false;
3208 unsigned RemLatency = 0;
3209 bool RemLatencyComputed = false;
3210 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3211 RemLatency = computeRemLatency(CurrZone);
3212 RemLatencyComputed = true;
3213 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3214 OtherCount, RemLatency, false);
3215 }
3216
3217 // Schedule aggressively for latency in PostRA mode. We don't check for
3218 // acyclic latency during PostRA, and highly out-of-order processors will
3219 // skip PostRA scheduling.
3220 if (!OtherResLimited &&
3221 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3222 RemLatency))) {
3223 Policy.ReduceLatency |= true;
3224 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3225 << " RemainingLatency " << RemLatency << " + "
3226 << CurrZone.getCurrCycle() << "c > CritPath "
3227 << Rem.CriticalPath << "\n");
3228 }
3229 // If the same resource is limiting inside and outside the zone, do nothing.
3230 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3231 return;
3232
3233 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3234 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3235 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3236 } if (OtherResLimited) dbgs()
3237 << " RemainingLimit: "
3238 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3239 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3240 << " Latency limited both directions.\n");
3241
3242 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3243 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3244
3245 if (OtherResLimited)
3246 Policy.DemandResIdx = OtherCritIdx;
3247}
3248
3249#ifndef NDEBUG
3252 switch (Reason) {
3253 case NoCand: return "NOCAND ";
3254 case Only1: return "ONLY1 ";
3255 case PhysReg: return "PHYS-REG ";
3256 case RegExcess: return "REG-EXCESS";
3257 case RegCritical: return "REG-CRIT ";
3258 case Stall: return "STALL ";
3259 case Cluster: return "CLUSTER ";
3260 case Weak: return "WEAK ";
3261 case RegMax: return "REG-MAX ";
3262 case ResourceReduce: return "RES-REDUCE";
3263 case ResourceDemand: return "RES-DEMAND";
3264 case TopDepthReduce: return "TOP-DEPTH ";
3265 case TopPathReduce: return "TOP-PATH ";
3266 case BotHeightReduce:return "BOT-HEIGHT";
3267 case BotPathReduce: return "BOT-PATH ";
3268 case NextDefUse: return "DEF-USE ";
3269 case NodeOrder: return "ORDER ";
3270 };
3271 llvm_unreachable("Unknown reason!");
3272}
3273
3276 unsigned ResIdx = 0;
3277 unsigned Latency = 0;
3278 switch (Cand.Reason) {
3279 default:
3280 break;
3281 case RegExcess:
3282 P = Cand.RPDelta.Excess;
3283 break;
3284 case RegCritical:
3285 P = Cand.RPDelta.CriticalMax;
3286 break;
3287 case RegMax:
3288 P = Cand.RPDelta.CurrentMax;
3289 break;
3290 case ResourceReduce:
3291 ResIdx = Cand.Policy.ReduceResIdx;
3292 break;
3293 case ResourceDemand:
3294 ResIdx = Cand.Policy.DemandResIdx;
3295 break;
3296 case TopDepthReduce:
3297 Latency = Cand.SU->getDepth();
3298 break;
3299 case TopPathReduce:
3300 Latency = Cand.SU->getHeight();
3301 break;
3302 case BotHeightReduce:
3303 Latency = Cand.SU->getHeight();
3304 break;
3305 case BotPathReduce:
3306 Latency = Cand.SU->getDepth();
3307 break;
3308 }
3309 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3310 if (P.isValid())
3311 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3312 << ":" << P.getUnitInc() << " ";
3313 else
3314 dbgs() << " ";
3315 if (ResIdx)
3316 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3317 else
3318 dbgs() << " ";
3319 if (Latency)
3320 dbgs() << " " << Latency << " cycles ";
3321 else
3322 dbgs() << " ";
3323 dbgs() << '\n';
3324}
3325#endif
3326
3327namespace llvm {
3328/// Return true if this heuristic determines order.
3329/// TODO: Consider refactor return type of these functions as integer or enum,
3330/// as we may need to differentiate whether TryCand is better than Cand.
3331bool tryLess(int TryVal, int CandVal,
3335 if (TryVal < CandVal) {
3336 TryCand.Reason = Reason;
3337 return true;
3338 }
3339 if (TryVal > CandVal) {
3340 if (Cand.Reason > Reason)
3341 Cand.Reason = Reason;
3342 return true;
3343 }
3344 return false;
3345}
3346
3347bool tryGreater(int TryVal, int CandVal,
3351 if (TryVal > CandVal) {
3352 TryCand.Reason = Reason;
3353 return true;
3354 }
3355 if (TryVal < CandVal) {
3356 if (Cand.Reason > Reason)
3357 Cand.Reason = Reason;
3358 return true;
3359 }
3360 return false;
3361}
3362
3365 SchedBoundary &Zone) {
3366 if (Zone.isTop()) {
3367 // Prefer the candidate with the lesser depth, but only if one of them has
3368 // depth greater than the total latency scheduled so far, otherwise either
3369 // of them could be scheduled now with no stall.
3370 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3371 Zone.getScheduledLatency()) {
3372 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3374 return true;
3375 }
3376 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3378 return true;
3379 } else {
3380 // Prefer the candidate with the lesser height, but only if one of them has
3381 // height greater than the total latency scheduled so far, otherwise either
3382 // of them could be scheduled now with no stall.
3383 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3384 Zone.getScheduledLatency()) {
3385 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3387 return true;
3388 }
3389 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3391 return true;
3392 }
3393 return false;
3394}
3395} // end namespace llvm
3396
3397static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3398 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3399 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3400}
3401
3403 tracePick(Cand.Reason, Cand.AtTop);
3404}
3405
3407 assert(dag->hasVRegLiveness() &&
3408 "(PreRA)GenericScheduler needs vreg liveness");
3409 DAG = static_cast<ScheduleDAGMILive*>(dag);
3410 SchedModel = DAG->getSchedModel();
3411 TRI = DAG->TRI;
3412
3414 DAG->computeDFSResult();
3415
3416 Rem.init(DAG, SchedModel);
3417 Top.init(DAG, SchedModel, &Rem);
3418 Bot.init(DAG, SchedModel, &Rem);
3419
3420 // Initialize resource counts.
3421
3422 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3423 // are disabled, then these HazardRecs will be disabled.
3425 if (!Top.HazardRec) {
3426 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3427 }
3428 if (!Bot.HazardRec) {
3429 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3430 }
3431 TopCand.SU = nullptr;
3432 BotCand.SU = nullptr;
3433}
3434
3435/// Initialize the per-region scheduling policy.
3438 unsigned NumRegionInstrs) {
3439 const MachineFunction &MF = *Begin->getMF();
3440 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3441
3442 // Avoid setting up the register pressure tracker for small regions to save
3443 // compile time. As a rough heuristic, only track pressure when the number of
3444 // schedulable instructions exceeds half the allocatable integer register file
3445 // that is the largest legal integer regiser type.
3447 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3449 if (TLI->isTypeLegal(LegalIntVT)) {
3450 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3451 TLI->getRegClassFor(LegalIntVT));
3452 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3453 break;
3454 }
3455 }
3456
3457 // For generic targets, we default to bottom-up, because it's simpler and more
3458 // compile-time optimizations have been implemented in that direction.
3460
3461 // Allow the subtarget to override default policy.
3462 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3463
3464 // After subtarget overrides, apply command line options.
3465 if (!EnableRegPressure) {
3468 }
3469
3472 RegionPolicy.OnlyBottomUp = false;
3473 } else if (PreRADirection == MISched::BottomUp) {
3474 RegionPolicy.OnlyTopDown = false;
3476 } else if (PreRADirection == MISched::Bidirectional) {
3477 RegionPolicy.OnlyBottomUp = false;
3478 RegionPolicy.OnlyTopDown = false;
3479 }
3480}
3481
3483 // Cannot completely remove virtual function even in release mode.
3484#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3485 dbgs() << "GenericScheduler RegionPolicy: "
3486 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3487 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3488 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3489 << "\n";
3490#endif
3491}
3492
3493/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3494/// critical path by more cycles than it takes to drain the instruction buffer.
3495/// We estimate an upper bounds on in-flight instructions as:
3496///
3497/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3498/// InFlightIterations = AcyclicPath / CyclesPerIteration
3499/// InFlightResources = InFlightIterations * LoopResources
3500///
3501/// TODO: Check execution resources in addition to IssueCount.
3504 return;
3505
3506 // Scaled number of cycles per loop iteration.
3507 unsigned IterCount =
3510 // Scaled acyclic critical path.
3511 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3512 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3513 unsigned InFlightCount =
3514 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3515 unsigned BufferLimit =
3517
3518 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3519
3520 LLVM_DEBUG(
3521 dbgs() << "IssueCycles="
3523 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3524 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3525 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3526 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3527 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3528}
3529
3532
3533 // Some roots may not feed into ExitSU. Check all of them in case.
3534 for (const SUnit *SU : Bot.Available) {
3535 if (SU->getDepth() > Rem.CriticalPath)
3536 Rem.CriticalPath = SU->getDepth();
3537 }
3538 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3540 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3541 }
3542
3544 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3545 checkAcyclicLatency();
3546 }
3547}
3548
3549namespace llvm {
3551 const PressureChange &CandP,
3555 const TargetRegisterInfo *TRI,
3556 const MachineFunction &MF) {
3557 // If one candidate decreases and the other increases, go with it.
3558 // Invalid candidates have UnitInc==0.
3559 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3560 Reason)) {
3561 return true;
3562 }
3563 // Do not compare the magnitude of pressure changes between top and bottom
3564 // boundary.
3565 if (Cand.AtTop != TryCand.AtTop)
3566 return false;
3567
3568 // If both candidates affect the same set in the same boundary, go with the
3569 // smallest increase.
3570 unsigned TryPSet = TryP.getPSetOrMax();
3571 unsigned CandPSet = CandP.getPSetOrMax();
3572 if (TryPSet == CandPSet) {
3573 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3574 Reason);
3575 }
3576
3577 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3578 std::numeric_limits<int>::max();
3579
3580 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3581 std::numeric_limits<int>::max();
3582
3583 // If the candidates are decreasing pressure, reverse priority.
3584 if (TryP.getUnitInc() < 0)
3585 std::swap(TryRank, CandRank);
3586 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3587}
3588
3589unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3590 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3591}
3592
3593/// Minimize physical register live ranges. Regalloc wants them adjacent to
3594/// their physreg def/use.
3595///
3596/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3597/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3598/// with the operation that produces or consumes the physreg. We'll do this when
3599/// regalloc has support for parallel copies.
3600int biasPhysReg(const SUnit *SU, bool isTop) {
3601 const MachineInstr *MI = SU->getInstr();
3602
3603 if (MI->isCopy()) {
3604 unsigned ScheduledOper = isTop ? 1 : 0;
3605 unsigned UnscheduledOper = isTop ? 0 : 1;
3606 // If we have already scheduled the physreg produce/consumer, immediately
3607 // schedule the copy.
3608 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3609 return 1;
3610 // If the physreg is at the boundary, defer it. Otherwise schedule it
3611 // immediately to free the dependent. We can hoist the copy later.
3612 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3613 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3614 return AtBoundary ? -1 : 1;
3615 }
3616
3617 if (MI->isMoveImmediate()) {
3618 // If we have a move immediate and all successors have been assigned, bias
3619 // towards scheduling this later. Make sure all register defs are to
3620 // physical registers.
3621 bool DoBias = true;
3622 for (const MachineOperand &Op : MI->defs()) {
3623 if (Op.isReg() && !Op.getReg().isPhysical()) {
3624 DoBias = false;
3625 break;
3626 }
3627 }
3628
3629 if (DoBias)
3630 return isTop ? -1 : 1;
3631 }
3632
3633 return 0;
3634}
3635} // end namespace llvm
3636
3638 bool AtTop,
3639 const RegPressureTracker &RPTracker,
3640 RegPressureTracker &TempTracker) {
3641 Cand.SU = SU;
3642 Cand.AtTop = AtTop;
3643 if (DAG->isTrackingPressure()) {
3644 if (AtTop) {
3645 TempTracker.getMaxDownwardPressureDelta(
3646 Cand.SU->getInstr(),
3647 Cand.RPDelta,
3648 DAG->getRegionCriticalPSets(),
3649 DAG->getRegPressure().MaxSetPressure);
3650 } else {
3651 if (VerifyScheduling) {
3652 TempTracker.getMaxUpwardPressureDelta(
3653 Cand.SU->getInstr(),
3654 &DAG->getPressureDiff(Cand.SU),
3655 Cand.RPDelta,
3656 DAG->getRegionCriticalPSets(),
3657 DAG->getRegPressure().MaxSetPressure);
3658 } else {
3659 RPTracker.getUpwardPressureDelta(
3660 Cand.SU->getInstr(),
3661 DAG->getPressureDiff(Cand.SU),
3662 Cand.RPDelta,
3663 DAG->getRegionCriticalPSets(),
3664 DAG->getRegPressure().MaxSetPressure);
3665 }
3666 }
3667 }
3668 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3669 << " Try SU(" << Cand.SU->NodeNum << ") "
3671 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3672}
3673
3674/// Apply a set of heuristics to a new candidate. Heuristics are currently
3675/// hierarchical. This may be more efficient than a graduated cost model because
3676/// we don't need to evaluate all aspects of the model for each node in the
3677/// queue. But it's really done to make the heuristics easier to debug and
3678/// statistically analyze.
3679///
3680/// \param Cand provides the policy and current best candidate.
3681/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3682/// \param Zone describes the scheduled zone that we are extending, or nullptr
3683/// if Cand is from a different zone than TryCand.
3684/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3686 SchedCandidate &TryCand,
3687 SchedBoundary *Zone) const {
3688 // Initialize the candidate if needed.
3689 if (!Cand.isValid()) {
3690 TryCand.Reason = NodeOrder;
3691 return true;
3692 }
3693
3694 // Bias PhysReg Defs and copies to their uses and defined respectively.
3695 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3696 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3697 return TryCand.Reason != NoCand;
3698
3699 // Avoid exceeding the target's limit.
3700 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3701 Cand.RPDelta.Excess,
3702 TryCand, Cand, RegExcess, TRI,
3703 DAG->MF))
3704 return TryCand.Reason != NoCand;
3705
3706 // Avoid increasing the max critical pressure in the scheduled region.
3707 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3708 Cand.RPDelta.CriticalMax,
3709 TryCand, Cand, RegCritical, TRI,
3710 DAG->MF))
3711 return TryCand.Reason != NoCand;
3712
3713 // We only compare a subset of features when comparing nodes between
3714 // Top and Bottom boundary. Some properties are simply incomparable, in many
3715 // other instances we should only override the other boundary if something
3716 // is a clear good pick on one boundary. Skip heuristics that are more
3717 // "tie-breaking" in nature.
3718 bool SameBoundary = Zone != nullptr;
3719 if (SameBoundary) {
3720 // For loops that are acyclic path limited, aggressively schedule for
3721 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3722 // heuristics to take precedence.
3723 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3724 tryLatency(TryCand, Cand, *Zone))
3725 return TryCand.Reason != NoCand;
3726
3727 // Prioritize instructions that read unbuffered resources by stall cycles.
3728 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3729 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3730 return TryCand.Reason != NoCand;
3731 }
3732
3733 // Keep clustered nodes together to encourage downstream peephole
3734 // optimizations which may reduce resource requirements.
3735 //
3736 // This is a best effort to set things up for a post-RA pass. Optimizations
3737 // like generating loads of multiple registers should ideally be done within
3738 // the scheduler pass by combining the loads during DAG postprocessing.
3739 const SUnit *CandNextClusterSU =
3740 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3741 const SUnit *TryCandNextClusterSU =
3742 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3743 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3744 Cand.SU == CandNextClusterSU,
3745 TryCand, Cand, Cluster))
3746 return TryCand.Reason != NoCand;
3747
3748 if (SameBoundary) {
3749 // Weak edges are for clustering and other constraints.
3750 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3751 getWeakLeft(Cand.SU, Cand.AtTop),
3752 TryCand, Cand, Weak))
3753 return TryCand.Reason != NoCand;
3754 }
3755
3756 // Avoid increasing the max pressure of the entire region.
3757 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3758 Cand.RPDelta.CurrentMax,
3759 TryCand, Cand, RegMax, TRI,
3760 DAG->MF))
3761 return TryCand.Reason != NoCand;
3762
3763 if (SameBoundary) {
3764 // Avoid critical resource consumption and balance the schedule.
3765 TryCand.initResourceDelta(DAG, SchedModel);
3767 TryCand, Cand, ResourceReduce))
3768 return TryCand.Reason != NoCand;
3771 TryCand, Cand, ResourceDemand))
3772 return TryCand.Reason != NoCand;
3773
3774 // Avoid serializing long latency dependence chains.
3775 // For acyclic path limited loops, latency was already checked above.
3777 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3778 return TryCand.Reason != NoCand;
3779
3780 // Fall through to original instruction order.
3781 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3782 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3783 TryCand.Reason = NodeOrder;
3784 return true;
3785 }
3786 }
3787
3788 return false;
3789}
3790
3791/// Pick the best candidate from the queue.
3792///
3793/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3794/// DAG building. To adjust for the current scheduling location we need to
3795/// maintain the number of vreg uses remaining to be top-scheduled.
3797 const CandPolicy &ZonePolicy,
3798 const RegPressureTracker &RPTracker,
3799 SchedCandidate &Cand) {
3800 // getMaxPressureDelta temporarily modifies the tracker.
3801 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3802
3803 ReadyQueue &Q = Zone.Available;
3804 for (SUnit *SU : Q) {
3805
3806 SchedCandidate TryCand(ZonePolicy);
3807 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3808 // Pass SchedBoundary only when comparing nodes from the same boundary.
3809 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3810 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3811 // Initialize resource delta if needed in case future heuristics query it.
3812 if (TryCand.ResDelta == SchedResourceDelta())
3813 TryCand.initResourceDelta(DAG, SchedModel);
3814 Cand.setBest(TryCand);
3816 }
3817 }
3818}
3819
3820/// Pick the best candidate node from either the top or bottom queue.
3822 // Schedule as far as possible in the direction of no choice. This is most
3823 // efficient, but also provides the best heuristics for CriticalPSets.
3824 if (SUnit *SU = Bot.pickOnlyChoice()) {
3825 IsTopNode = false;
3826 tracePick(Only1, false);
3827 return SU;
3828 }
3829 if (SUnit *SU = Top.pickOnlyChoice()) {
3830 IsTopNode = true;
3831 tracePick(Only1, true);
3832 return SU;
3833 }
3834 // Set the bottom-up policy based on the state of the current bottom zone and
3835 // the instructions outside the zone, including the top zone.
3836 CandPolicy BotPolicy;
3837 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3838 // Set the top-down policy based on the state of the current top zone and
3839 // the instructions outside the zone, including the bottom zone.
3840 CandPolicy TopPolicy;
3841 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3842
3843 // See if BotCand is still valid (because we previously scheduled from Top).
3844 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3845 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3846 BotCand.Policy != BotPolicy) {
3847 BotCand.reset(CandPolicy());
3848 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3849 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3850 } else {
3851 LLVM_DEBUG(traceCandidate(BotCand));
3852#ifndef NDEBUG
3853 if (VerifyScheduling) {
3854 SchedCandidate TCand;
3855 TCand.reset(CandPolicy());
3856 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3857 assert(TCand.SU == BotCand.SU &&
3858 "Last pick result should correspond to re-picking right now");
3859 }
3860#endif
3861 }
3862
3863 // Check if the top Q has a better candidate.
3864 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3865 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3866 TopCand.Policy != TopPolicy) {
3867 TopCand.reset(CandPolicy());
3868 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3869 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3870 } else {
3871 LLVM_DEBUG(traceCandidate(TopCand));
3872#ifndef NDEBUG
3873 if (VerifyScheduling) {
3874 SchedCandidate TCand;
3875 TCand.reset(CandPolicy());
3876 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3877 assert(TCand.SU == TopCand.SU &&
3878 "Last pick result should correspond to re-picking right now");
3879 }
3880#endif
3881 }
3882
3883 // Pick best from BotCand and TopCand.
3884 assert(BotCand.isValid());
3885 assert(TopCand.isValid());
3886 SchedCandidate Cand = BotCand;
3887 TopCand.Reason = NoCand;
3888 if (tryCandidate(Cand, TopCand, nullptr)) {
3889 Cand.setBest(TopCand);
3891 }
3892
3893 IsTopNode = Cand.AtTop;
3894 tracePick(Cand);
3895 return Cand.SU;
3896}
3897
3898/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3900 if (DAG->top() == DAG->bottom()) {
3901 assert(Top.Available.empty() && Top.Pending.empty() &&
3902 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3903 return nullptr;
3904 }
3905 SUnit *SU;
3906 do {
3908 SU = Top.pickOnlyChoice();
3909 if (!SU) {
3910 CandPolicy NoPolicy;
3911 TopCand.reset(NoPolicy);
3912 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3913 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3914 tracePick(TopCand);
3915 SU = TopCand.SU;
3916 }
3917 IsTopNode = true;
3918 } else if (RegionPolicy.OnlyBottomUp) {
3919 SU = Bot.pickOnlyChoice();
3920 if (!SU) {
3921 CandPolicy NoPolicy;
3922 BotCand.reset(NoPolicy);
3923 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3924 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3925 tracePick(BotCand);
3926 SU = BotCand.SU;
3927 }
3928 IsTopNode = false;
3929 } else {
3930 SU = pickNodeBidirectional(IsTopNode);
3931 }
3932 } while (SU->isScheduled);
3933
3934 // If IsTopNode, then SU is in Top.Available and must be removed. Otherwise,
3935 // if isTopReady(), then SU is in either Top.Available or Top.Pending.
3936 // If !IsTopNode, then SU is in Bot.Available and must be removed. Otherwise,
3937 // if isBottomReady(), then SU is in either Bot.Available or Bot.Pending.
3938 //
3939 // It is coincidental when !IsTopNode && isTopReady or when IsTopNode &&
3940 // isBottomReady. That is, it didn't factor into the decision to choose SU
3941 // because it isTopReady or isBottomReady, respectively. In fact, if the
3942 // RegionPolicy is OnlyTopDown or OnlyBottomUp, then the Bot queues and Top
3943 // queues respectivley contain the original roots and don't get updated when
3944 // picking a node. So if SU isTopReady on a OnlyBottomUp pick, then it was
3945 // because we schduled everything but the top roots. Conversley, if SU
3946 // isBottomReady on OnlyTopDown, then it was because we scheduled everything
3947 // but the bottom roots. If its in a queue even coincidentally, it should be
3948 // removed so it does not get re-picked in a subsequent pickNode call.
3949 if (SU->isTopReady())
3950 Top.removeReady(SU);
3951 if (SU->isBottomReady())
3952 Bot.removeReady(SU);
3953
3954 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3955 << *SU->getInstr());
3956 return SU;
3957}
3958
3960 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3961 if (!isTop)
3962 ++InsertPos;
3963 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3964
3965 // Find already scheduled copies with a single physreg dependence and move
3966 // them just above the scheduled instruction.
3967 for (SDep &Dep : Deps) {
3968 if (Dep.getKind() != SDep::Data ||
3969 !Register::isPhysicalRegister(Dep.getReg()))
3970 continue;
3971 SUnit *DepSU = Dep.getSUnit();
3972 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3973 continue;
3974 MachineInstr *Copy = DepSU->getInstr();
3975 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3976 continue;
3977 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3978 DAG->dumpNode(*Dep.getSUnit()));
3979 DAG->moveInstruction(Copy, InsertPos);
3980 }
3981}
3982
3983/// Update the scheduler's state after scheduling a node. This is the same node
3984/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3985/// update it's state based on the current cycle before MachineSchedStrategy
3986/// does.
3987///
3988/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3989/// them here. See comments in biasPhysReg.
3990void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3991 if (IsTopNode) {
3992 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3993 Top.bumpNode(SU);
3994 if (SU->hasPhysRegUses)
3995 reschedulePhysReg(SU, true);
3996 } else {
3997 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3998 Bot.bumpNode(SU);
3999 if (SU->hasPhysRegDefs)
4000 reschedulePhysReg(SU, false);
4001 }
4002}
4003
4004/// Create the standard converging machine scheduler. This will be used as the
4005/// default scheduler if the target does not set a default.
4007 ScheduleDAGMILive *DAG =
4008 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
4009 // Register DAG post-processors.
4010 //
4011 // FIXME: extend the mutation API to allow earlier mutations to instantiate
4012 // data and pass it to later mutations. Have a single mutation that gathers
4013 // the interesting nodes in one pass.
4015
4016 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4017 // Add MacroFusion mutation if fusions are not empty.
4018 const auto &MacroFusions = STI.getMacroFusions();
4019 if (!MacroFusions.empty())
4020 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4021 return DAG;
4022}
4023
4025 return createGenericSchedLive(C);
4026}
4027
4029GenericSchedRegistry("converge", "Standard converging scheduler.",
4031
4032//===----------------------------------------------------------------------===//
4033// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
4034//===----------------------------------------------------------------------===//
4035
4037 DAG = Dag;
4038 SchedModel = DAG->getSchedModel();
4039 TRI = DAG->TRI;
4040
4041 Rem.init(DAG, SchedModel);
4042 Top.init(DAG, SchedModel, &Rem);
4043 Bot.init(DAG, SchedModel, &Rem);
4044
4045 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
4046 // or are disabled, then these HazardRecs will be disabled.
4048 if (!Top.HazardRec) {
4049 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4050 }
4051 if (!Bot.HazardRec) {
4052 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
4053 }
4054}
4055
4058 unsigned NumRegionInstrs) {
4059 const MachineFunction &MF = *Begin->getMF();
4060
4061 // Default to top-down because it was implemented first and existing targets
4062 // expect that behavior by default.
4064 RegionPolicy.OnlyBottomUp = false;
4065
4066 // Allow the subtarget to override default policy.
4068
4069 // After subtarget overrides, apply command line options.
4072 RegionPolicy.OnlyBottomUp = false;
4073 } else if (PostRADirection == MISched::BottomUp) {
4074 RegionPolicy.OnlyTopDown = false;
4077 RegionPolicy.OnlyBottomUp = false;
4078 RegionPolicy.OnlyTopDown = false;
4079 }
4080}
4081
4084
4085 // Some roots may not feed into ExitSU. Check all of them in case.
4086 for (const SUnit *SU : Bot.Available) {
4087 if (SU->getDepth() > Rem.CriticalPath)
4088 Rem.CriticalPath = SU->getDepth();
4089 }
4090 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
4092 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
4093 }
4094}
4095
4096/// Apply a set of heuristics to a new candidate for PostRA scheduling.
4097///
4098/// \param Cand provides the policy and current best candidate.
4099/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
4100/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
4102 SchedCandidate &TryCand) {
4103 // Initialize the candidate if needed.
4104 if (!Cand.isValid()) {
4105 TryCand.Reason = NodeOrder;
4106 return true;
4107 }
4108
4109 // Prioritize instructions that read unbuffered resources by stall cycles.
4110 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
4111 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
4112 return TryCand.Reason != NoCand;
4113
4114 // Keep clustered nodes together.
4115 const SUnit *CandNextClusterSU =
4116 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
4117 const SUnit *TryCandNextClusterSU =
4118 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
4119 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
4120 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
4121 return TryCand.Reason != NoCand;
4122
4123 // Avoid critical resource consumption and balance the schedule.
4125 TryCand, Cand, ResourceReduce))
4126 return TryCand.Reason != NoCand;
4129 TryCand, Cand, ResourceDemand))
4130 return TryCand.Reason != NoCand;
4131
4132 // We only compare a subset of features when comparing nodes between
4133 // Top and Bottom boundary.
4134 if (Cand.AtTop == TryCand.AtTop) {
4135 // Avoid serializing long latency dependence chains.
4136 if (Cand.Policy.ReduceLatency &&
4137 tryLatency(TryCand, Cand, Cand.AtTop ? Top : Bot))
4138 return TryCand.Reason != NoCand;
4139 }
4140
4141 // Fall through to original instruction order.
4142 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
4143 TryCand.Reason = NodeOrder;
4144 return true;
4145 }
4146
4147 return false;
4148}
4149
4151 SchedCandidate &Cand) {
4152 ReadyQueue &Q = Zone.Available;
4153 for (SUnit *SU : Q) {
4154 SchedCandidate TryCand(Cand.Policy);
4155 TryCand.SU = SU;
4156 TryCand.AtTop = Zone.isTop();
4157 TryCand.initResourceDelta(DAG, SchedModel);
4158 if (tryCandidate(Cand, TryCand)) {
4159 Cand.setBest(TryCand);
4161 }
4162 }
4163}
4164
4165/// Pick the best candidate node from either the top or bottom queue.
4167 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
4168 // out common parts.
4169
4170 // Schedule as far as possible in the direction of no choice. This is most
4171 // efficient, but also provides the best heuristics for CriticalPSets.
4172 if (SUnit *SU = Bot.pickOnlyChoice()) {
4173 IsTopNode = false;
4174 tracePick(Only1, false);
4175 return SU;
4176 }
4177 if (SUnit *SU = Top.pickOnlyChoice()) {
4178 IsTopNode = true;
4179 tracePick(Only1, true);
4180 return SU;
4181 }
4182 // Set the bottom-up policy based on the state of the current bottom zone and
4183 // the instructions outside the zone, including the top zone.
4184 CandPolicy BotPolicy;
4185 setPolicy(BotPolicy, /*IsPostRA=*/true, Bot, &Top);
4186 // Set the top-down policy based on the state of the current top zone and
4187 // the instructions outside the zone, including the bottom zone.
4188 CandPolicy TopPolicy;
4189 setPolicy(TopPolicy, /*IsPostRA=*/true, Top, &Bot);
4190
4191 // See if BotCand is still valid (because we previously scheduled from Top).
4192 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4193 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4194 BotCand.Policy != BotPolicy) {
4195 BotCand.reset(CandPolicy());
4196 pickNodeFromQueue(Bot, BotCand);
4197 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4198 } else {
4199 LLVM_DEBUG(traceCandidate(BotCand));
4200#ifndef NDEBUG
4201 if (VerifyScheduling) {
4202 SchedCandidate TCand;
4203 TCand.reset(CandPolicy());
4204 pickNodeFromQueue(Bot, BotCand);
4205 assert(TCand.SU == BotCand.SU &&
4206 "Last pick result should correspond to re-picking right now");
4207 }
4208#endif
4209 }
4210
4211 // Check if the top Q has a better candidate.
4212 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4213 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4214 TopCand.Policy != TopPolicy) {
4215 TopCand.reset(CandPolicy());
4216 pickNodeFromQueue(Top, TopCand);
4217 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4218 } else {
4219 LLVM_DEBUG(traceCandidate(TopCand));
4220#ifndef NDEBUG
4221 if (VerifyScheduling) {
4222 SchedCandidate TCand;
4223 TCand.reset(CandPolicy());
4224 pickNodeFromQueue(Top, TopCand);
4225 assert(TCand.SU == TopCand.SU &&
4226 "Last pick result should correspond to re-picking right now");
4227 }
4228#endif
4229 }
4230
4231 // Pick best from BotCand and TopCand.
4232 assert(BotCand.isValid());
4233 assert(TopCand.isValid());
4234 SchedCandidate Cand = BotCand;
4235 TopCand.Reason = NoCand;
4236 if (tryCandidate(Cand, TopCand)) {
4237 Cand.setBest(TopCand);
4239 }
4240
4241 IsTopNode = Cand.AtTop;
4242 tracePick(Cand);
4243 return Cand.SU;
4244}
4245
4246/// Pick the next node to schedule.
4248 if (DAG->top() == DAG->bottom()) {
4249 assert(Top.Available.empty() && Top.Pending.empty() &&
4250 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4251 return nullptr;
4252 }
4253 SUnit *SU;
4254 do {
4256 SU = Bot.pickOnlyChoice();
4257 if (SU) {
4258 tracePick(Only1, true);
4259 } else {
4260 CandPolicy NoPolicy;
4261 BotCand.reset(NoPolicy);
4262 // Set the bottom-up policy based on the state of the current bottom
4263 // zone and the instructions outside the zone, including the top zone.
4264 setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
4265 pickNodeFromQueue(Bot, BotCand);
4266 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4267 tracePick(BotCand);
4268 SU = BotCand.SU;
4269 }
4270 IsTopNode = false;
4271 } else if (RegionPolicy.OnlyTopDown) {
4272 SU = Top.pickOnlyChoice();
4273 if (SU) {
4274 tracePick(Only1, true);
4275 } else {
4276 CandPolicy NoPolicy;
4277 TopCand.reset(NoPolicy);
4278 // Set the top-down policy based on the state of the current top zone
4279 // and the instructions outside the zone, including the bottom zone.
4280 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4281 pickNodeFromQueue(Top, TopCand);
4282 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4283 tracePick(TopCand);
4284 SU = TopCand.SU;
4285 }
4286 IsTopNode = true;
4287 } else {
4288 SU = pickNodeBidirectional(IsTopNode);
4289 }
4290 } while (SU->isScheduled);
4291
4292 if (SU->isTopReady())
4293 Top.removeReady(SU);
4294 if (SU->isBottomReady())
4295 Bot.removeReady(SU);
4296
4297 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4298 << *SU->getInstr());
4299 return SU;
4300}
4301
4302/// Called after ScheduleDAGMI has scheduled an instruction and updated
4303/// scheduled/remaining flags in the DAG nodes.
4304void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4305 if (IsTopNode) {
4306 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4307 Top.bumpNode(SU);
4308 } else {
4309 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4310 Bot.bumpNode(SU);
4311 }
4312}
4313
4315 ScheduleDAGMI *DAG =
4316 new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
4317 /*RemoveKillFlags=*/true);
4318 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4319 // Add MacroFusion mutation if fusions are not empty.
4320 const auto &MacroFusions = STI.getMacroFusions();
4321 if (!MacroFusions.empty())
4322 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4323 return DAG;
4324}
4325
4326//===----------------------------------------------------------------------===//
4327// ILP Scheduler. Currently for experimental analysis of heuristics.
4328//===----------------------------------------------------------------------===//
4329
4330namespace {
4331
4332/// Order nodes by the ILP metric.
4333struct ILPOrder {
4334 const SchedDFSResult *DFSResult = nullptr;
4335 const BitVector *ScheduledTrees = nullptr;
4336 bool MaximizeILP;
4337
4338 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4339
4340 /// Apply a less-than relation on node priority.
4341 ///
4342 /// (Return true if A comes after B in the Q.)
4343 bool operator()(const SUnit *A, const SUnit *B) const {
4344 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4345 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4346 if (SchedTreeA != SchedTreeB) {
4347 // Unscheduled trees have lower priority.
4348 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4349 return ScheduledTrees->test(SchedTreeB);
4350
4351 // Trees with shallower connections have lower priority.
4352 if (DFSResult->getSubtreeLevel(SchedTreeA)
4353 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4354 return DFSResult->getSubtreeLevel(SchedTreeA)
4355 < DFSResult->getSubtreeLevel(SchedTreeB);
4356 }
4357 }
4358 if (MaximizeILP)
4359 return DFSResult->getILP(A) < DFSResult->getILP(B);
4360 else
4361 return DFSResult->getILP(A) > DFSResult->getILP(B);
4362 }
4363};
4364
4365/// Schedule based on the ILP metric.
4366class ILPScheduler : public MachineSchedStrategy {
4367 ScheduleDAGMILive *DAG = nullptr;
4368 ILPOrder Cmp;
4369
4370 std::vector<SUnit*> ReadyQ;
4371
4372public:
4373 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4374
4375 void initialize(ScheduleDAGMI *dag) override {
4376 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4377 DAG = static_cast<ScheduleDAGMILive*>(dag);
4378 DAG->computeDFSResult();
4379 Cmp.DFSResult = DAG->getDFSResult();
4380 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4381 ReadyQ.clear();
4382 }
4383
4384 void registerRoots() override {
4385 // Restore the heap in ReadyQ with the updated DFS results.
4386 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4387 }
4388
4389 /// Implement MachineSchedStrategy interface.
4390 /// -----------------------------------------
4391
4392 /// Callback to select the highest priority node from the ready Q.
4393 SUnit *pickNode(bool &IsTopNode) override {
4394 if (ReadyQ.empty()) return nullptr;
4395 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4396 SUnit *SU = ReadyQ.back();
4397 ReadyQ.pop_back();
4398 IsTopNode = false;
4399 LLVM_DEBUG(dbgs() << "Pick node "
4400 << "SU(" << SU->NodeNum << ") "
4401 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4402 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4403 << " @"
4404 << DAG->getDFSResult()->getSubtreeLevel(
4405 DAG->getDFSResult()->getSubtreeID(SU))
4406 << '\n'
4407 << "Scheduling " << *SU->getInstr());
4408 return SU;
4409 }
4410
4411 /// Scheduler callback to notify that a new subtree is scheduled.
4412 void scheduleTree(unsigned SubtreeID) override {
4413 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4414 }
4415
4416 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4417 /// DFSResults, and resort the priority Q.
4418 void schedNode(SUnit *SU, bool IsTopNode) override {
4419 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4420 }
4421
4422 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4423
4424 void releaseBottomNode(SUnit *SU) override {
4425 ReadyQ.push_back(SU);
4426 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4427 }
4428};
4429
4430} // end anonymous namespace
4431
4433 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4434}
4436 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4437}
4438
4440 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4442 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4443
4444//===----------------------------------------------------------------------===//
4445// Machine Instruction Shuffler for Correctness Testing
4446//===----------------------------------------------------------------------===//
4447
4448#ifndef NDEBUG
4449namespace {
4450
4451/// Apply a less-than relation on the node order, which corresponds to the
4452/// instruction order prior to scheduling. IsReverse implements greater-than.
4453template<bool IsReverse>
4454struct SUnitOrder {
4455 bool operator()(SUnit *A, SUnit *B) const {
4456 if (IsReverse)
4457 return A->NodeNum > B->NodeNum;
4458 else
4459 return A->NodeNum < B->NodeNum;
4460 }
4461};
4462
4463/// Reorder instructions as much as possible.
4464class InstructionShuffler : public MachineSchedStrategy {
4465 bool IsAlternating;
4466 bool IsTopDown;
4467
4468 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4469 // gives nodes with a higher number higher priority causing the latest
4470 // instructions to be scheduled first.
4472 TopQ;
4473
4474 // When scheduling bottom-up, use greater-than as the queue priority.
4476 BottomQ;
4477
4478public:
4479 InstructionShuffler(bool alternate, bool topdown)
4480 : IsAlternating(alternate), IsTopDown(topdown) {}
4481
4482 void initialize(ScheduleDAGMI*) override {
4483 TopQ.clear();
4484 BottomQ.clear();
4485 }
4486
4487 /// Implement MachineSchedStrategy interface.
4488 /// -----------------------------------------
4489
4490 SUnit *pickNode(bool &IsTopNode) override {
4491 SUnit *SU;
4492 if (IsTopDown) {
4493 do {
4494 if (TopQ.empty()) return nullptr;
4495 SU = TopQ.top();
4496 TopQ.pop();
4497 } while (SU->isScheduled);
4498 IsTopNode = true;
4499 } else {
4500 do {
4501 if (BottomQ.empty()) return nullptr;
4502 SU = BottomQ.top();
4503 BottomQ.pop();
4504 } while (SU->isScheduled);
4505 IsTopNode = false;
4506 }
4507 if (IsAlternating)
4508 IsTopDown = !IsTopDown;
4509 return SU;
4510 }
4511
4512 void schedNode(SUnit *SU, bool IsTopNode) override {}
4513
4514 void releaseTopNode(SUnit *SU) override {
4515 TopQ.push(SU);
4516 }
4517 void releaseBottomNode(SUnit *SU) override {
4518 BottomQ.push(SU);
4519 }
4520};
4521
4522} // end anonymous namespace
4523
4525 bool Alternate =
4527 bool TopDown = PreRADirection != MISched::BottomUp;
4528 return new ScheduleDAGMILive(
4529 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4530}
4531
4533 "shuffle", "Shuffle machine instructions alternating directions",
4535#endif // !NDEBUG
4536
4537//===----------------------------------------------------------------------===//
4538// GraphWriter support for ScheduleDAGMILive.
4539//===----------------------------------------------------------------------===//
4540
4541#ifndef NDEBUG
4542namespace llvm {
4543
4544template<> struct GraphTraits<
4546
4547template<>
4550
4551 static std::string getGraphName(const ScheduleDAG *G) {
4552 return std::string(G->MF.getName());
4553 }
4554
4556 return true;
4557 }
4558
4559 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4560 if (ViewMISchedCutoff == 0)
4561 return false;
4562 return (Node->Preds.size() > ViewMISchedCutoff
4563 || Node->Succs.size() > ViewMISchedCutoff);
4564 }
4565
4566 /// If you want to override the dot attributes printed for a particular
4567 /// edge, override this method.
4568 static std::string getEdgeAttributes(const SUnit *Node,
4569 SUnitIterator EI,
4570 const ScheduleDAG *Graph) {
4571 if (EI.isArtificialDep())
4572 return "color=cyan,style=dashed";
4573 if (EI.isCtrlDep())
4574 return "color=blue,style=dashed";
4575 return "";
4576 }
4577
4578 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4579 std::string Str;
4580 raw_string_ostream SS(Str);
4581 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4582 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4583 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4584 SS << "SU:" << SU->NodeNum;
4585 if (DFS)
4586 SS << " I:" << DFS->getNumInstrs(SU);
4587 return Str;
4588 }
4589
4590 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4591 return G->getGraphNodeLabel(SU);
4592 }
4593
4594 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4595 std::string Str("shape=Mrecord");
4596 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4597 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4598 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4599 if (DFS) {
4600 Str += ",style=filled,fillcolor=\"#";
4601 Str += DOT::getColorString(DFS->getSubtreeID(N));
4602 Str += '"';
4603 }
4604 return Str;
4605 }
4606};
4607
4608} // end namespace llvm
4609#endif // NDEBUG
4610
4611/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4612/// rendered using 'dot'.
4613void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4614#ifndef NDEBUG
4615 ViewGraph(this, Name, false, Title);
4616#else
4617 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4618 << "systems with Graphviz or gv!\n";
4619#endif // NDEBUG
4620}
4621
4622/// Out-of-line implementation with no arguments is handy for gdb.
4624 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4625}
4626
4627/// Sort predicate for the intervals stored in an instance of
4628/// ResourceSegments. Intervals are always disjoint (no intersection
4629/// for any pairs of intervals), therefore we can sort the totality of
4630/// the intervals by looking only at the left boundary.
4633 return A.first < B.first;
4634}
4635
4636unsigned ResourceSegments::getFirstAvailableAt(
4637 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4638 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4639 IntervalBuilder) const {
4640 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4641 sortIntervals) &&
4642 "Cannot execute on an un-sorted set of intervals.");
4643
4644 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4645 // a ResourceSegment interval for that situation.
4646 if (AcquireAtCycle == ReleaseAtCycle)
4647 return CurrCycle;
4648
4649 unsigned RetCycle = CurrCycle;
4650 ResourceSegments::IntervalTy NewInterval =
4651 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4652 for (auto &Interval : _Intervals) {
4653 if (!intersects(NewInterval, Interval))
4654 continue;
4655
4656 // Move the interval right next to the top of the one it
4657 // intersects.
4658 assert(Interval.second > NewInterval.first &&
4659 "Invalid intervals configuration.");
4660 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4661 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4662 }
4663 return RetCycle;
4664}
4665
4667 const unsigned CutOff) {
4668 assert(A.first <= A.second && "Cannot add negative resource usage");
4669 assert(CutOff > 0 && "0-size interval history has no use.");
4670 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4671 // instruction needed the resource to be available but does not use it.
4672 // However, ResourceSegment represents an interval that is closed on the left
4673 // and open on the right. It is impossible to represent an empty interval when
4674 // the left is closed. Do not add it to Intervals.
4675 if (A.first == A.second)
4676 return;
4677
4678 assert(all_of(_Intervals,
4679 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4680 return !intersects(A, Interval);
4681 }) &&
4682 "A resource is being overwritten");
4683 _Intervals.push_back(A);
4684
4685 sortAndMerge();
4686
4687 // Do not keep the full history of the intervals, just the
4688 // latest #CutOff.
4689 while (_Intervals.size() > CutOff)
4690 _Intervals.pop_front();
4691}
4692
4695 assert(A.first <= A.second && "Invalid interval");
4696 assert(B.first <= B.second && "Invalid interval");
4697
4698 // Share one boundary.
4699 if ((A.first == B.first) || (A.second == B.second))
4700 return true;
4701
4702 // full intersersect: [ *** ) B
4703 // [***) A
4704 if ((A.first > B.first) && (A.second < B.second))
4705 return true;
4706
4707 // right intersect: [ ***) B
4708 // [*** ) A
4709 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4710 return true;
4711
4712 // left intersect: [*** ) B
4713 // [ ***) A
4714 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4715 return true;
4716
4717 return false;
4718}
4719
4720void ResourceSegments::sortAndMerge() {
4721 if (_Intervals.size() <= 1)
4722 return;
4723
4724 // First sort the collection.
4725 _Intervals.sort(sortIntervals);
4726
4727 // can use next because I have at least 2 elements in the list
4728 auto next = std::next(std::begin(_Intervals));
4729 auto E = std::end(_Intervals);
4730 for (; next != E; ++next) {
4731 if (std::prev(next)->second >= next->first) {
4732 next->first = std::prev(next)->first;
4733 _Intervals.erase(std::prev(next));
4734 continue;
4735 }
4736 }
4737}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
std::string Name
bool End
Definition: ELF_riscv.cpp:480
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static bool sortIntervals(const ResourceSegments::IntervalTy &A, const ResourceSegments::IntervalTy &B)
Sort predicate for the intervals stored in an instance of ResourceSegments.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static cl::opt< unsigned > MIResourceCutOff("misched-resource-cutoff", cl::Hidden, cl::desc("Number of intervals to track"), cl::init(10))
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
std::pair< uint64_t, uint64_t > Interval
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class recording the (high level) value of a variable.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
bool test(unsigned Idx) const
Definition: BitVector.h:461
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:335
BitVector & set()
Definition: BitVector.h:351
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents an Operation in the Expression.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
Register getReg() const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
Definition: LiveInterval.h:429
iterator begin()
Definition: LiveInterval.h:215
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool hasValue() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream.
nonconst_iterator getNonConstIterator() const
Representation of each machine instruction.
Definition: MachineInstr.h:71
bool isCopy() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
MachinePassRegistry - Track the registration of machine passes.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static MachinePassRegistry< ScheduleDAGCtor > Registry
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
MachineSchedulerPass(const TargetMachine *TM)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand)
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PostMachineSchedulerPass(const TargetMachine *TM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Capture a change in pressure for a single pressure set.
unsigned getPSetOrMax() const
unsigned getPSet() const
List of PressureChanges in order of increasing, unique PSetID.
void dump(const TargetRegisterInfo &TRI) const
void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
Helpers for implementing custom MachineSchedStrategy classes.
void push(SUnit *SU)
iterator find(SUnit *SU)
ArrayRef< SUnit * > elements()
bool isInQueue(SUnit *SU) const
std::vector< SUnit * >::iterator iterator
bool empty() const
StringRef getName() const
unsigned size() const
iterator remove(iterator I)
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void setPos(MachineBasicBlock::const_iterator Pos)
ArrayRef< unsigned > getLiveThru() const
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
void advance()
Advance across the current instruction.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
RegisterPassParser class - Handle the addition of new machine passes.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
void add(IntervalTy A, const unsigned CutOff=10)
Adds an interval [a, b) to the collection of the instance.
static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
These function return the interval used by a resource in bottom and top scheduling.
static bool intersects(IntervalTy A, IntervalTy B)
Checks whether intervals intersect.
std::pair< int64_t, int64_t > IntervalTy
Represents an interval of discrete integer values closed on the left and open on the right: [a,...
static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, unsigned ReleaseAtCycle)
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:504
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
bool isArtificialDep() const
Definition: ScheduleDAG.h:686
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:683
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:287
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:278
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:275
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:300
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:424
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:303
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:416
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:296
unsigned NumPredsLeft
Definition: ScheduleDAG.h:274
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:292
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:279
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:301
unsigned WeakPredsLeft
Definition: ScheduleDAG.h:276
bool isBottomReady() const
Definition: ScheduleDAG.h:467
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:291
bool isTopReady() const
Definition: ScheduleDAG.h:464
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:277
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
Each Scheduling boundary is associated with ready queues.
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource unit can be scheduled.
void releasePending()
Release pending ready nodes in to the available queue.
unsigned getDependentLatency() const
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
void incExecutedResources(unsigned PIdx, unsigned Count)
bool isResourceLimited() const
const TargetSchedModel * SchedModel
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
unsigned findMaxLatency(ArrayRef< SUnit * > ReadySUs)
ScheduleDAGMI * DAG
void dumpReservedCycles() const
Dump the state of the information that tracks resource usage.
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
SchedRemainder * Rem
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx=0)
Release SU to make it ready.
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle, unsigned StartAtCycle)
Add the given processor resource to this scheduled zone.
ScheduleHazardRecognizer * HazardRec
bool isUnbufferedGroup(unsigned PIdx) const
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
unsigned getResourceCount(unsigned ResIdx) const
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
void dumpScheduledState() const
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
unsigned getZoneCritResIdx() const
unsigned getUnscheduledLatency(SUnit *SU) const
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
DumpDirection
The direction that should be used to dump the scheduled Sequence.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void setDumpDirection(DumpDirection D)
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
VReg2SUnitMultiMap VRegUses
Maps vregs to the SUnits of their uses in the current scheduling region.
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
PressureDiff & getPressureDiff(const SUnit *SU)
SchedDFSResult * DFSResult
Information about DAG subtrees.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
RegPressureTracker BotRPTracker
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void updatePressureDiffs(ArrayRef< VRegMaskOrUnit > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
RegisterClassInfo * RegClassInfo
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegPressureTracker RPTracker
bool ShouldTrackPressure
Register pressure in this region computed by initRegPressure.
void dump() const override
BitVector & getScheduledTrees()
MachineBasicBlock::iterator LiveRegionEnd
RegPressureTracker TopRPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
const SUnit * NextClusterSucc
void dumpScheduleTraceTopDown() const
Print execution trace of the schedule top-down or bottom-up.
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
MachineBasicBlock::iterator top() const
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator bottom() const
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
LiveIntervals * getLIS() const
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
void dumpScheduleTraceBottomUp() const
~ScheduleDAGMI() override
void finishBlock() override
Cleans up after scheduling in the given block.
LiveIntervals * LIS
const SUnit * getNextClusterPred() const
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
const SUnit * getNextClusterSucc() const
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Mutate the DAG as a postpass after normal DAG building.
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:578
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:576
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:580
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
void dumpNodeAll(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:581
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual void AdvanceCycle()
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:176
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:254
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
iterator find(const KeyT &Key)
Find an element by its key.
void clear()
Clears the set.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Register getReg() const
Information about stack frame layout on the target.
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:81
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
Provide an instruction scheduling machine model to CodeGen passes.
const char * getResourceName(unsigned PIdx) const
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const InstrItineraryData * getInstrItineraries() const
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::vector< MacroFusionPredTy > getMacroFusions() const
Get the list of MacroFusion predicates.
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
virtual void overridePostRASchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic post-ra scheduling policy within a region.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
int getNumOccurrences() const
Definition: CommandLine.h:399
Base class for the machine scheduler classes.
void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags)
Main driver for both MachineScheduler and PostMachineScheduler.
Impl class for MachineScheduler.
void setMFAM(MachineFunctionAnalysisManager *MFAM)
void setLegacyPass(MachineFunctionPass *P)
bool run(MachineFunction &MF, const TargetMachine &TM, const RequiredAnalyses &Analyses)
ScheduleDAGInstrs * createMachineScheduler()
Instantiate a ScheduleDAGInstrs that will be owned by the caller.
Impl class for PostMachineScheduler.
bool run(MachineFunction &Func, const TargetMachine &TM, const RequiredAnalyses &Analyses)
void setMFAM(MachineFunctionAnalysisManager *MFAM)
ScheduleDAGInstrs * createPostMachineScheduler()
Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by the caller.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:91
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1309
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
cl::opt< bool > PrintDAGs
unsigned getWeakLeft(const SUnit *SU, bool isTop)
std::unique_ptr< ScheduleDAGMutation > createMacroFusionDAGMutation(ArrayRef< MacroFusionPredTy > Predicates, bool BranchOnly=false)
Create a DAG scheduling mutation to pair instructions back to back for instructions that benefit acco...
FormattedString right_justify(StringRef Str, unsigned Width)
right_justify - add spaces before string so total output is Width characters.
Definition: Format.h:153
cl::opt< MISched::Direction > PostRADirection("misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), cl::init(MISched::Unspecified), cl::values(clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling")))
void initializePostMachineSchedulerLegacyPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:92
cl::opt< bool > MISchedDumpReservedCycles("misched-dump-reserved-cycles", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
cl::opt< bool > VerifyScheduling
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< bool > ViewMISchedDAGs
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void initializeMachineSchedulerLegacyPass(PassRegistry &)
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:146
cl::opt< MISched::Direction > PreRADirection
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)
If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
cl::opt< bool > MischedDetailResourceBooking("misched-detail-resource-booking", cl::Hidden, cl::init(false), cl::desc("Show details of invoking getNextResoufceCycle."))
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
static std::string getGraphName(const ScheduleDAG *G)
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G)
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void reset(const CandPolicy &NewPolicy)
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Status of an instruction's critical resource consumption.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:56
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:66
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
RegisterClassInfo * RegClassInfo
const TargetMachine * TM
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
PressureChange CriticalMax
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
SmallVector< VRegMaskOrUnit, 8 > LiveOutRegs
SmallVector< VRegMaskOrUnit, 8 > LiveInRegs
List of live in virtual registers or physical register units.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Summarize the unscheduled region.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SmallVector< unsigned, 16 > RemainingCounts
An individual mapping from virtual register number to SUnit.