LLVM 22.0.0git
WindowScheduler.cpp
Go to the documentation of this file.
1//======----------- WindowScheduler.cpp - window scheduler -------------======//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Window Scheduling software pipelining algorithm.
10//
11// The fundamental concept of the window scheduling algorithm involves folding
12// the original MBB at a specific position, followed by list scheduling on the
13// folded MIs. The optimal scheduling result is then chosen from various folding
14// positions as the final scheduling outcome.
15//
16// The primary challenge in this algorithm lies in generating the folded MIs and
17// establishing their dependencies. We have innovatively employed a new MBB,
18// created by copying the original MBB three times, known as TripleMBB. This
19// TripleMBB enables the convenient implementation of MI folding and dependency
20// establishment. To facilitate the algorithm's implementation, we have also
21// devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
22//
23// Another challenge in the algorithm is the scheduling of phis. Semantically,
24// it is difficult to place the phis in the window and perform list scheduling.
25// Therefore, we schedule these phis separately after each list scheduling.
26//
27// The provided implementation is designed for use before the Register Allocator
28// (RA). If the target requires implementation after RA, it is recommended to
29// reimplement analyseII(), schedulePhi(), and expand(). Additionally,
30// target-specific logic can be added in initialize(), preProcess(), and
31// postProcess().
32//
33// Lastly, it is worth mentioning that getSearchIndexes() is an important
34// function. We have experimented with more complex heuristics on downstream
35// target and achieved favorable results.
36//
37//===----------------------------------------------------------------------===//
39#include "llvm/ADT/Statistic.h"
46#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "pipeliner"
53
54namespace {
55STATISTIC(NumTryWindowSchedule,
56 "Number of loops that we attempt to use window scheduling");
57STATISTIC(NumTryWindowSearch,
58 "Number of times that we run list schedule in the window scheduling");
59STATISTIC(NumWindowSchedule,
60 "Number of loops that we successfully use window scheduling");
61STATISTIC(NumFailAnalyseII,
62 "Window scheduling abort due to the failure of the II analysis");
63
65 WindowSearchNum("window-search-num",
66 cl::desc("The number of searches per loop in the window "
67 "algorithm. 0 means no search number limit."),
69
70cl::opt<unsigned> WindowSearchRatio(
71 "window-search-ratio",
72 cl::desc("The ratio of searches per loop in the window algorithm. 100 "
73 "means search all positions in the loop, while 0 means not "
74 "performing any search."),
75 cl::Hidden, cl::init(40));
76
77cl::opt<unsigned> WindowIICoeff(
78 "window-ii-coeff",
80 "The coefficient used when initializing II in the window algorithm."),
82
83cl::opt<unsigned> WindowRegionLimit(
84 "window-region-limit",
86 "The lower limit of the scheduling region in the window algorithm."),
88
89cl::opt<unsigned> WindowDiffLimit(
90 "window-diff-limit",
91 cl::desc("The lower limit of the difference between best II and base II in "
92 "the window algorithm. If the difference is smaller than "
93 "this lower limit, window scheduling will not be performed."),
95} // namespace
96
97// WindowIILimit serves as an indicator of abnormal scheduling results and could
98// potentially be referenced by the derived target window scheduler.
100 WindowIILimit("window-ii-limit",
101 cl::desc("The upper limit of II in the window algorithm."),
102 cl::Hidden, cl::init(1000));
103
105 : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML),
106 Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()),
107 TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) {
108 TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
109 createMachineScheduler(/*OnlyBuildGraph=*/true));
110}
111
113 if (!initialize()) {
114 LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
115 return false;
116 }
117 // The window algorithm is time-consuming, and its compilation time should be
118 // taken into consideration.
119 TimeTraceScope Scope("WindowSearch");
120 ++NumTryWindowSchedule;
121 // Performing the relevant processing before window scheduling.
122 preProcess();
123 // The main window scheduling begins.
124 std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler());
125 auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);
126 for (unsigned Idx : SearchIndexes) {
127 OriToCycle.clear();
128 ++NumTryWindowSearch;
129 // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
130 // be added to Idx.
131 unsigned Offset = Idx + SchedPhiNum;
133 SchedDAG->startBlock(MBB);
134 SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum);
135 SchedDAG->schedule();
136 LLVM_DEBUG(SchedDAG->dump());
137 unsigned II = analyseII(*SchedDAG, Offset);
138 if (II == WindowIILimit) {
140 LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
141 ++NumFailAnalyseII;
142 continue;
143 }
147 LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is "
148 << II << ".\n");
149 }
150 // Performing the relevant processing after window scheduling.
151 postProcess();
152 // Check whether the scheduling result is valid.
153 if (!isScheduleValid()) {
154 LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
155 return false;
156 }
157 LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
158 << " and Best II is " << BestII << ".\n");
159 // Expand the scheduling result to prologue, kernel, and epilogue.
160 expand();
161 ++NumWindowSchedule;
162 return true;
163}
164
167 return OnlyBuildGraph
168 ? new ScheduleDAGMI(
169 Context, std::make_unique<PostGenericScheduler>(Context),
170 true)
172}
173
176 LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
177 return false;
178 }
179 // Initialized the member variables used by window algorithm.
180 OriMIs.clear();
181 TriMIs.clear();
182 TriToOri.clear();
183 OriToCycle.clear();
184 SchedResult.clear();
185 SchedPhiNum = 0;
186 SchedInstrNum = 0;
187 BestII = UINT_MAX;
188 BestOffset = 0;
189 BaseII = 0;
190 // List scheduling used in the window algorithm depends on LiveIntervals.
191 if (!Context->LIS) {
192 LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
193 return false;
194 }
195 // Check each MI in MBB.
196 SmallSet<Register, 8> PrevDefs;
197 SmallSet<Register, 8> PrevUses;
198 auto IsLoopCarried = [&](MachineInstr &Phi) {
199 // Two cases are checked here: (1)The virtual register defined by the
200 // preceding phi is used by the succeeding phi;(2)The preceding phi uses the
201 // virtual register defined by the succeeding phi.
202 if (PrevUses.count(Phi.getOperand(0).getReg()))
203 return true;
204 PrevDefs.insert(Phi.getOperand(0).getReg());
205 for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
206 if (PrevDefs.count(Phi.getOperand(I).getReg()))
207 return true;
208 PrevUses.insert(Phi.getOperand(I).getReg());
209 }
210 return false;
211 };
212 auto PLI = TII->analyzeLoopForPipelining(MBB);
213 for (auto &MI : *MBB) {
214 if (MI.isMetaInstruction() || MI.isTerminator())
215 continue;
216 if (MI.isPHI()) {
217 if (IsLoopCarried(MI)) {
218 LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");
219 return false;
220 }
221 ++SchedPhiNum;
222 ++BestOffset;
223 } else
225 if (TII->isSchedulingBoundary(MI, MBB, *MF)) {
227 dbgs() << "Boundary MI is not allowed in window scheduling!\n");
228 return false;
229 }
230 if (PLI->shouldIgnoreForPipelining(&MI)) {
231 LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "
232 "window scheduling!\n");
233 return false;
234 }
235 for (auto &Def : MI.all_defs())
236 if (Def.isReg() && Def.getReg().isPhysical()) {
237 LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
238 "window scheduling!\n");
239 return false;
240 }
241 }
242 if (SchedInstrNum <= WindowRegionLimit) {
243 LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
244 return false;
245 }
246 return true;
247}
248
250 // Prior to window scheduling, it's necessary to backup the original MBB,
251 // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB.
252 backupMBB();
254 TripleDAG->startBlock(MBB);
255 TripleDAG->enterRegion(
257 std::distance(MBB->begin(), MBB->getFirstTerminator()));
258 TripleDAG->buildSchedGraph(Context->AA);
259}
260
262 // After window scheduling, it's necessary to clear the TripleDAG and restore
263 // to the original MBB.
264 TripleDAG->exitRegion();
265 TripleDAG->finishBlock();
266 restoreMBB();
267}
268
270 for (auto &MI : MBB->instrs())
271 OriMIs.push_back(&MI);
272 // Remove MIs and the corresponding live intervals.
273 for (auto &MI : make_early_inc_range(*MBB)) {
275 MBB->remove(&MI);
276 }
277}
278
280 // Erase MIs and the corresponding live intervals.
281 for (auto &MI : make_early_inc_range(*MBB)) {
283 MI.eraseFromParent();
284 }
285 // Restore MBB to the state before window scheduling.
288}
289
291 const unsigned DuplicateNum = 3;
292 TriMIs.clear();
293 TriToOri.clear();
294 assert(OriMIs.size() > 0 && "The Original MIs were not backed up!");
295 // Step 1: Performing the first copy of MBB instructions, excluding
296 // terminators. At the same time, we back up the anti-register of phis.
297 // DefPairs hold the old and new define register pairs.
299 for (auto *MI : OriMIs) {
300 if (MI->isMetaInstruction() || MI->isTerminator())
301 continue;
302 if (MI->isPHI())
303 if (Register AntiReg = getAntiRegister(MI))
304 DefPairs[MI->getOperand(0).getReg()] = AntiReg;
305 auto *NewMI = MF->CloneMachineInstr(MI);
306 MBB->push_back(NewMI);
307 TriMIs.push_back(NewMI);
308 TriToOri[NewMI] = MI;
309 }
310 // Step 2: Performing the remaining two copies of MBB instructions excluding
311 // phis, and the last one contains terminators. At the same time, registers
312 // are updated accordingly.
313 for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {
314 for (auto *MI : OriMIs) {
315 if (MI->isPHI() || MI->isMetaInstruction() ||
316 (MI->isTerminator() && Cnt < DuplicateNum - 1))
317 continue;
318 auto *NewMI = MF->CloneMachineInstr(MI);
320 // New defines are updated.
321 for (auto MO : NewMI->all_defs())
322 if (MO.isReg() && MO.getReg().isVirtual()) {
323 Register NewDef =
325 NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI);
326 NewDefs[MO.getReg()] = NewDef;
327 }
328 // New uses are updated.
329 for (auto DefRegPair : DefPairs)
330 if (NewMI->readsRegister(DefRegPair.first, TRI)) {
331 Register NewUse = DefRegPair.second;
332 // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3':
333 //
334 // BB.3: DefPairs
335 // ==================================
336 // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7)
337 // ...
338 // ==================================
339 // ...
340 // %4 = sub i32 %1, %3
341 // ...
342 // %7 = add i32 %5, %6
343 // ...
344 // ----------------------------------
345 // ...
346 // %8 = sub i32 %7, %3 (%1,%7),(%4,%8)
347 // ...
348 // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9)
349 // ...
350 // ----------------------------------
351 // ...
352 // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9)
353 // ... ^
354 // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11)
355 // ...
356 // ==================================
357 // < Terminators >
358 // ==================================
359 if (auto It = DefPairs.find(NewUse); It != DefPairs.end())
360 NewUse = It->second;
361 NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI);
362 }
363 // DefPairs is updated at last.
364 for (auto &NewDef : NewDefs)
365 DefPairs[NewDef.first] = NewDef.second;
366 MBB->push_back(NewMI);
367 TriMIs.push_back(NewMI);
368 TriToOri[NewMI] = MI;
369 }
370 }
371 // Step 3: The registers used by phis are updated, and they are generated in
372 // the third copy of MBB.
373 // In the privious example, the old phi is:
374 // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]
375 // The new phi is:
376 // %1 = phi i32 [%2, %BB.1], [%11, %BB.3]
377 for (auto &Phi : MBB->phis()) {
378 for (auto DefRegPair : DefPairs)
379 if (Phi.readsRegister(DefRegPair.first, TRI))
380 Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI);
381 }
383}
384
386 // After list scheduling, the MBB is restored in one traversal.
387 for (size_t I = 0; I < TriMIs.size(); ++I) {
388 auto *MI = TriMIs[I];
389 auto OldPos = MBB->begin();
390 std::advance(OldPos, I);
391 auto CurPos = MI->getIterator();
392 if (CurPos != OldPos) {
393 MBB->splice(OldPos, MBB, CurPos);
394 Context->LIS->handleMove(*MI, /*UpdateFlags=*/false);
395 }
396 }
397}
398
400 unsigned SearchRatio) {
401 // We use SearchRatio to get the index range, and then evenly get the indexes
402 // according to the SearchNum. This is a simple huristic. Depending on the
403 // characteristics of the target, more complex algorithms can be used for both
404 // performance and compilation time.
405 assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!");
406 unsigned MaxIdx = SchedInstrNum * SearchRatio / 100;
407 unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;
408 SmallVector<unsigned> SearchIndexes;
409 for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step)
410 SearchIndexes.push_back(Idx);
411 return SearchIndexes;
412}
413
415 // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1.
416 unsigned MaxDepth = 1;
417 for (auto &SU : DAG.SUnits)
418 MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth);
419 return MaxDepth * WindowIICoeff;
420}
421
423 unsigned Offset) {
424 int InitII = getEstimatedII(DAG);
425 ResourceManager RM(Subtarget, &DAG);
426 RM.init(InitII);
427 // ResourceManager and DAG are used to calculate the maximum cycle for the
428 // scheduled MIs. Since MIs in the Region have already been scheduled, the
429 // emit cycles can be estimated in order here.
430 int CurCycle = 0;
432 for (auto &MI : Range) {
433 auto *SU = DAG.getSUnit(&MI);
434 int ExpectCycle = CurCycle;
435 // The predecessors of current MI determine its earliest issue cycle.
436 for (auto &Pred : SU->Preds) {
437 if (Pred.isWeak())
438 continue;
439 auto *PredMI = Pred.getSUnit()->getInstr();
440 int PredCycle = getOriCycle(PredMI);
441 ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
442 }
443 // Zero cost instructions do not need to check resource.
444 if (!TII->isZeroCost(MI.getOpcode())) {
445 // ResourceManager can be used to detect resource conflicts between the
446 // current MI and the previously inserted MIs.
447 while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
448 ++CurCycle;
449 if (CurCycle == (int)WindowIILimit)
450 return CurCycle;
451 }
452 RM.reserveResources(*SU, CurCycle);
453 }
454 OriToCycle[getOriMI(&MI)] = CurCycle;
455 LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
456 << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
457 }
458 LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n");
459 return CurCycle;
460}
461
462// By utilizing TripleDAG, we can easily establish dependencies between A and B.
463// Based on the MaxCycle and the issue cycle of A and B, we can determine
464// whether it is necessary to add a stall cycle. This is because, without
465// inserting the stall cycle, the latency constraint between A and B cannot be
466// satisfied. The details are as follows:
467//
468// New MBB:
469// ========================================
470// < Phis >
471// ======================================== (sliding direction)
472// MBB copy 1 |
473// V
474//
475// ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window-----
476// | |
477// ===================V==================== |
478// MBB copy 2 < MI B > |
479// |
480// < MI A > V
481// ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------
482// :
483// ===================V====================
484// MBB copy 3 < MI B'>
485//
486//
487//
488//
489// ========================================
490// < Terminators >
491// ========================================
492int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
493 int MaxStallCycle = 0;
494 int CurrentII = MaxCycle + 1;
496 for (auto &MI : Range) {
497 auto *SU = TripleDAG->getSUnit(&MI);
498 int DefCycle = getOriCycle(&MI);
499 for (auto &Succ : SU->Succs) {
500 if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
501 continue;
502 // If the expected cycle does not exceed CurrentII, no check is needed.
503 if (DefCycle + (int)Succ.getLatency() <= CurrentII)
504 continue;
505 // If the cycle of the scheduled MI A is less than that of the scheduled
506 // MI B, the scheduling will fail because the lifetime of the
507 // corresponding register exceeds II.
508 auto *SuccMI = Succ.getSUnit()->getInstr();
509 int UseCycle = getOriCycle(SuccMI);
510 if (DefCycle < UseCycle)
511 return WindowIILimit;
512 // Get the stall cycle introduced by the register between two trips.
513 int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
514 MaxStallCycle = std::max(MaxStallCycle, StallCycle);
515 }
516 }
517 LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n");
518 return MaxStallCycle;
519}
520
522 LLVM_DEBUG(dbgs() << "Start analyzing II:\n");
523 int MaxCycle = calculateMaxCycle(DAG, Offset);
524 if (MaxCycle == (int)WindowIILimit)
525 return MaxCycle;
526 int StallCycle = calculateStallCycle(Offset, MaxCycle);
527 if (StallCycle == (int)WindowIILimit)
528 return StallCycle;
529 // The value of II is equal to the maximum execution cycle plus 1.
530 return MaxCycle + StallCycle + 1;
531}
532
534 LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n");
535 for (auto &Phi : MBB->phis()) {
536 int LateCycle = INT_MAX;
537 auto *SU = TripleDAG->getSUnit(&Phi);
538 for (auto &Succ : SU->Succs) {
539 // Phi doesn't have any Anti successors.
540 if (Succ.getKind() != SDep::Data)
541 continue;
542 // Phi is scheduled before the successor of stage 0. The issue cycle of
543 // phi is the latest cycle in this interval.
544 auto *SuccMI = Succ.getSUnit()->getInstr();
545 int Cycle = getOriCycle(SuccMI);
546 if (getOriStage(getOriMI(SuccMI), Offset) == 0)
547 LateCycle = std::min(LateCycle, Cycle);
548 }
549 // The anti-dependency of phi need to be handled separately in the same way.
550 if (Register AntiReg = getAntiRegister(&Phi)) {
551 auto *AntiMI = MRI->getVRegDef(AntiReg);
552 // AntiReg may be defined outside the kernel MBB.
553 if (AntiMI->getParent() == MBB) {
554 auto AntiCycle = getOriCycle(AntiMI);
555 if (getOriStage(getOriMI(AntiMI), Offset) == 0)
556 LateCycle = std::min(LateCycle, AntiCycle);
557 }
558 }
559 // If there is no limit to the late cycle, a default value is given.
560 if (LateCycle == INT_MAX)
561 LateCycle = (int)(II - 1);
562 LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi);
563 // The issue cycle of phi is set to the latest cycle in the interval.
564 auto *OriPhi = getOriMI(&Phi);
565 OriToCycle[OriPhi] = LateCycle;
566 }
567}
568
570 unsigned II) {
571 // At each issue cycle, phi is placed before MIs in stage 0. So the simplest
572 // way is to put phi at the beginning of the current cycle.
575 for (auto &Phi : MBB->phis())
576 CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi));
577 for (auto &MI : Range)
578 CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI));
579 // Each MI is assigned a separate ordered Id, which is used as a sort marker
580 // in the following expand process.
582 int Id = 0;
583 for (int Cycle = 0; Cycle < (int)II; ++Cycle) {
584 auto It = CycleToMIs.find(Cycle);
585 if (It == CycleToMIs.end())
586 continue;
587 for (auto *MI : It->second)
588 IssueOrder[MI] = Id++;
589 }
590 return IssueOrder;
591}
592
594 // At the first update, Offset is equal to SchedPhiNum. At this time, only
595 // BestII, BestOffset, and BaseII need to be updated.
596 if (Offset == SchedPhiNum) {
597 BestII = II;
599 BaseII = II;
600 return;
601 }
602 // The update will only continue if the II is smaller than BestII and the II
603 // is sufficiently small.
604 if ((II >= BestII) || (II + WindowDiffLimit > BaseII))
605 return;
606 BestII = II;
608 // Record the result of the current list scheduling, noting that each MI is
609 // stored unordered in SchedResult.
610 SchedResult.clear();
611 auto IssueOrder = getIssueOrder(Offset, II);
612 for (auto &Pair : OriToCycle) {
613 assert(IssueOrder.count(Pair.first) && "Cannot find original MI!");
614 SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,
615 getOriStage(Pair.first, Offset),
616 IssueOrder[Pair.first]));
617 }
618}
619
621 // The MIs in the SchedResult are sorted by the issue order ID.
623 [](const std::tuple<MachineInstr *, int, int, int> &A,
624 const std::tuple<MachineInstr *, int, int, int> &B) {
625 return std::get<3>(A) < std::get<3>(B);
626 });
627 // Use the scheduling infrastructure for expansion, noting that InstrChanges
628 // is not supported here.
629 DenseMap<MachineInstr *, int> Cycles, Stages;
630 std::vector<MachineInstr *> OrderedInsts;
631 for (auto &Info : SchedResult) {
632 auto *MI = std::get<0>(Info);
633 OrderedInsts.push_back(MI);
634 Cycles[MI] = std::get<1>(Info);
635 Stages[MI] = std::get<2>(Info);
636 LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI]
637 << "]: " << *MI);
638 }
639 ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
640 std::move(Stages));
643 MSE.expand();
644 MSE.cleanup();
645}
646
649 for (MachineInstr &MI : *MBB)
650 for (const MachineOperand &MO : MI.operands()) {
651 if (!MO.isReg() || MO.getReg() == 0)
652 continue;
653 Register Reg = MO.getReg();
654 if (!is_contained(UsedRegs, Reg))
655 UsedRegs.push_back(Reg);
656 }
657 Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs);
658}
659
662 auto RegionBegin = MBB->begin();
663 std::advance(RegionBegin, Offset);
664 auto RegionEnd = RegionBegin;
665 std::advance(RegionEnd, Num);
666 return make_range(RegionBegin, RegionEnd);
667}
668
670 assert(TriToOri.count(NewMI) && "Cannot find original MI!");
671 auto *OriMI = TriToOri[NewMI];
672 assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!");
673 return OriToCycle[OriMI];
674}
675
677 assert(TriToOri.count(NewMI) && "Cannot find original MI!");
678 return TriToOri[NewMI];
679}
680
682 assert(llvm::is_contained(OriMIs, OriMI) && "Cannot find OriMI in OriMIs!");
683 // If there is no instruction fold, all MI stages are 0.
684 if (Offset == SchedPhiNum)
685 return 0;
686 // For those MIs with an ID less than the Offset, their stages are set to 0,
687 // while the rest are set to 1.
688 unsigned Id = 0;
689 for (auto *MI : OriMIs) {
690 if (MI->isMetaInstruction())
691 continue;
692 if (MI == OriMI)
693 break;
694 ++Id;
695 }
696 return Id >= (size_t)Offset ? 1 : 0;
697}
698
700 assert(Phi->isPHI() && "Expecting PHI!");
701 Register AntiReg;
702 for (auto MO : Phi->uses()) {
703 if (MO.isReg())
704 AntiReg = MO.getReg();
705 else if (MO.isMBB() && MO.getMBB() == MBB)
706 return AntiReg;
707 }
708 return 0;
709}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Target-Independent Code Generator Pass Configuration Options pass.
static cl::opt< unsigned > WindowIILimit("window-ii-limit", cl::desc("The upper limit of II in the window algorithm."), cl::Hidden, cl::init(1000))
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
A possibly irreducible generalization of a Loop.
LLVM_ABI void repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, ArrayRef< Register > OrigRegs)
Update live intervals for instructions in a range of iterators.
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
void push_back(MachineInstr *MI)
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Representation of each machine instruction.
Definition: MachineInstr.h:72
MachineOperand class - Representation of each machine instruction operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Represents a schedule for a single-block loop.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:55
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:399
A ScheduleDAG for scheduling lists of MachineInstr.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588
LLVM_ABI void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)
Removes machine instruction (bundle) MI from the mapping.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
virtual bool enableWindowScheduler() const
True if the subtarget should run WindowScheduler.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Definition: TimeProfiler.h:182
MachineInstr * getOriMI(MachineInstr *NewMI)
Get the original MI from which the new MI is cloned.
virtual ScheduleDAGInstrs * createMachineScheduler(bool OnlyBuildGraph=false)
Two types of ScheduleDAGs are needed, one for creating dependency graphs only, and the other for list...
unsigned SchedPhiNum
SchedPhiNum records the number of phi in the original MBB, and the scheduling starts with MI after ph...
virtual void preProcess()
Add some related processing before running window scheduling.
MachineSchedContext * Context
virtual void restoreTripleMBB()
Restore the order of MIs in TripleMBB after each list scheduling.
virtual void postProcess()
Add some related processing after running window scheduling.
DenseMap< MachineInstr *, int > OriToCycle
OriToCycle keeps the mappings between the original MI and its issue cycle.
virtual int calculateMaxCycle(ScheduleDAGInstrs &DAG, unsigned Offset)
Calculate MIs execution cycle after list scheduling.
MachineRegisterInfo * MRI
MachineBasicBlock * MBB
unsigned BestII
BestII and BestOffset record the characteristics of the best scheduling result and are used together ...
void backupMBB()
Back up the MIs in the original MBB and remove them from MBB.
DenseMap< MachineInstr *, MachineInstr * > TriToOri
TriToOri keeps the mappings between the MI clones in TripleMBB and their original MI.
int getEstimatedII(ScheduleDAGInstrs &DAG)
Estimate a II value at which all MIs will be scheduled successfully.
virtual void expand()
Using the scheduling infrastructure to expand the results of window scheduling.
DenseMap< MachineInstr *, int > getIssueOrder(unsigned Offset, unsigned II)
Get the final issue order of all scheduled MIs including phis.
Register getAntiRegister(MachineInstr *Phi)
Gets the register in phi which is generated from the current MBB.
unsigned getOriStage(MachineInstr *OriMI, unsigned Offset)
Get the scheduling stage, where the stage of the new MI is identical to the original MI.
const TargetRegisterInfo * TRI
virtual void updateLiveIntervals()
Update the live intervals for all registers used within MBB.
const TargetSubtargetInfo * Subtarget
std::unique_ptr< ScheduleDAGInstrs > TripleDAG
To innovatively identify the dependencies between MIs across two trips, we construct a DAG for a new ...
unsigned BaseII
BaseII is the II obtained when the window offset is SchedPhiNum.
virtual void schedulePhi(int Offset, unsigned &II)
Phis are scheduled separately after each list scheduling.
MachineFunction * MF
virtual unsigned analyseII(ScheduleDAGInstrs &DAG, unsigned Offset)
Analyzes the II value after each list scheduling.
int getOriCycle(MachineInstr *NewMI)
Get the issue cycle of the new MI based on the cycle of the original MI.
unsigned SchedInstrNum
SchedInstrNum records the MIs involved in scheduling in the original MBB, excluding debug instruction...
const TargetInstrInfo * TII
virtual void generateTripleMBB()
Make three copies of the original MBB to generate a new TripleMBB.
iterator_range< MachineBasicBlock::iterator > getScheduleRange(unsigned Offset, unsigned Num)
Gets the iterator range of MIs in the scheduling window.
virtual bool isScheduleValid()
Check whether the final result of window scheduling is valid.
virtual int calculateStallCycle(unsigned Offset, int MaxCycle)
Calculate the stall cycle between two trips after list scheduling.
virtual void updateScheduleResult(unsigned Offset, unsigned II)
Update the scheduling result after each list scheduling.
SmallVector< MachineInstr * > OriMIs
OriMIs keeps the MIs removed from the original MBB.
virtual bool initialize()
Initializes the algorithm and determines if it can be executed.
SmallVector< std::tuple< MachineInstr *, int, int, int >, 256 > SchedResult
SchedResult keeps the result of each list scheduling, and the format of the tuple is <MI pointer,...
virtual SmallVector< unsigned > getSearchIndexes(unsigned SearchNum, unsigned SearchRatio)
Give the folding position in the window algorithm, where different heuristics can be used.
void restoreMBB()
Erase the MIs in current MBB and restore the original MIs.
WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
SmallVector< MachineInstr * > TriMIs
TriMIs keeps the MIs of TripleMBB, which is used to restore TripleMBB.
A range adaptor for a pair of iterators.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const TargetMachine * TM