LLVM 21.0.0git
MachinePipeliner.h
Go to the documentation of this file.
1//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10//
11// Software pipelining (SWP) is an instruction scheduling technique for loops
12// that overlap loop iterations and exploits ILP via a compiler transformation.
13//
14// Swing Modulo Scheduling is an implementation of software pipelining
15// that generates schedules that are near optimal in terms of initiation
16// interval, register requirements, and stage count. See the papers:
17//
18// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20// Conference on Parallel Architectures and Compilation Techiniques.
21//
22// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24// Transactions on Computers, Vol. 50, No. 3, 2001.
25//
26// "An Implementation of Swing Modulo Scheduling With Extensions for
27// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28// Urbana-Champaign, 2005.
29//
30//
31// The SMS algorithm consists of three main steps after computing the minimal
32// initiation interval (MII).
33// 1) Analyze the dependence graph and compute information about each
34// instruction in the graph.
35// 2) Order the nodes (instructions) by priority based upon the heuristics
36// described in the algorithm.
37// 3) Attempt to schedule the nodes in the specified order using the MII.
38//
39//===----------------------------------------------------------------------===//
40#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
41#define LLVM_CODEGEN_MACHINEPIPELINER_H
42
43#include "llvm/ADT/STLExtras.h"
44#include "llvm/ADT/SetVector.h"
55
56#include <deque>
57
58namespace llvm {
59
60class AAResults;
61class NodeSet;
62class SMSchedule;
63
64extern cl::opt<bool> SwpEnableCopyToPhi;
65extern cl::opt<int> SwpForceIssueWidth;
66
67/// The main class in the implementation of the target independent
68/// software pipeliner pass.
70public:
71 MachineFunction *MF = nullptr;
73 const MachineLoopInfo *MLI = nullptr;
74 const MachineDominatorTree *MDT = nullptr;
76 const TargetInstrInfo *TII = nullptr;
78 bool disabledByPragma = false;
79 unsigned II_setByPragma = 0;
80
81#ifndef NDEBUG
82 static int NumTries;
83#endif
84
85 /// Cache the target analysis information about the loop.
86 struct LoopInfo {
92 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
93 nullptr;
94 };
96
97 static char ID;
98
101 }
102
104
105 void getAnalysisUsage(AnalysisUsage &AU) const override;
106
107private:
108 void preprocessPhiNodes(MachineBasicBlock &B);
109 bool canPipelineLoop(MachineLoop &L);
110 bool scheduleLoop(MachineLoop &L);
111 bool swingModuloScheduler(MachineLoop &L);
112 void setPragmaPipelineOptions(MachineLoop &L);
113 bool runWindowScheduler(MachineLoop &L);
114 bool useSwingModuloScheduler();
115 bool useWindowScheduler(bool Changed);
116};
117
118/// Represents a dependence between two instruction.
120 SUnit *Dst = nullptr;
121 SDep Pred;
122 unsigned Distance = 0;
123
124public:
125 /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
126 /// \p Dep in the original DAG. This pair has no information about the
127 /// direction of the edge, so we need to pass an additional argument \p
128 /// IsSucc.
129 SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc)
130 : Dst(PredOrSucc), Pred(Dep), Distance(0u) {
131 SUnit *Src = Dep.getSUnit();
132
133 if (IsSucc) {
134 std::swap(Src, Dst);
135 Pred.setSUnit(Src);
136 }
137
138 // An anti-dependence to PHI means loop-carried dependence.
139 if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
140 Distance = 1;
141 std::swap(Src, Dst);
142 auto Reg = Pred.getReg();
143 Pred = SDep(Src, SDep::Kind::Data, Reg);
144 }
145 }
146
147 /// Returns the SUnit from which the edge comes (source node).
148 SUnit *getSrc() const { return Pred.getSUnit(); }
149
150 /// Returns the SUnit to which the edge points (destination node).
151 SUnit *getDst() const { return Dst; }
152
153 /// Returns the latency value for the edge.
154 unsigned getLatency() const { return Pred.getLatency(); }
155
156 /// Sets the latency for the edge.
157 void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
158
159 /// Returns the distance value for the edge.
160 unsigned getDistance() const { return Distance; }
161
162 /// Sets the distance value for the edge.
163 void setDistance(unsigned D) { Distance = D; }
164
165 /// Returns the register associated with the edge.
166 Register getReg() const { return Pred.getReg(); }
167
168 /// Returns true if the edge represents anti dependence.
169 bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
170
171 /// Returns true if the edge represents output dependence.
172 bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
173
174 /// Returns true if the edge represents a dependence that is not data, anti or
175 /// output dependence.
176 bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
177
178 /// Returns true if the edge represents unknown scheduling barrier.
179 bool isBarrier() const { return Pred.isBarrier(); }
180
181 /// Returns true if the edge represents an artificial dependence.
182 bool isArtificial() const { return Pred.isArtificial(); }
183
184 /// Tests if this is a Data dependence that is associated with a register.
185 bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
186
187 /// Returns true for DDG nodes that we ignore when computing the cost
188 /// functions. We ignore the back-edge recurrence in order to avoid unbounded
189 /// recursion in the calculation of the ASAP, ALAP, etc functions.
190 bool ignoreDependence(bool IgnoreAnti) const;
191};
192
193/// Represents dependencies between instructions. This class is a wrapper of
194/// `SUnits` and its dependencies to manipulate back-edges in a natural way.
195/// Currently it only supports back-edges via PHI, which are expressed as
196/// anti-dependencies in the original DAG.
197/// FIXME: Support any other loop-carried dependencies
200
201 struct SwingSchedulerDDGEdges {
202 EdgesType Preds;
203 EdgesType Succs;
204 };
205
206 void initEdges(SUnit *SU);
207
208 SUnit *EntrySU;
209 SUnit *ExitSU;
210
211 std::vector<SwingSchedulerDDGEdges> EdgesVec;
212 SwingSchedulerDDGEdges EntrySUEdges;
213 SwingSchedulerDDGEdges ExitSUEdges;
214
215 void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
216
217 SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
218 const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
219
220public:
221 SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU);
222
223 const EdgesType &getInEdges(const SUnit *SU) const;
224
225 const EdgesType &getOutEdges(const SUnit *SU) const;
226};
227
228/// This class builds the dependence graph for the instructions in a loop,
229/// and attempts to schedule the instructions using the SMS algorithm.
232
233 std::unique_ptr<SwingSchedulerDDG> DDG;
234
235 /// The minimum initiation interval between iterations for this schedule.
236 unsigned MII = 0;
237 /// The maximum initiation interval between iterations for this schedule.
238 unsigned MAX_II = 0;
239 /// Set to true if a valid pipelined schedule is found for the loop.
240 bool Scheduled = false;
242 LiveIntervals &LIS;
243 const RegisterClassInfo &RegClassInfo;
244 unsigned II_setByPragma = 0;
245 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
246
247 /// A topological ordering of the SUnits, which is needed for changing
248 /// dependences and iterating over the SUnits.
250
251 struct NodeInfo {
252 int ASAP = 0;
253 int ALAP = 0;
254 int ZeroLatencyDepth = 0;
255 int ZeroLatencyHeight = 0;
256
257 NodeInfo() = default;
258 };
259 /// Computed properties for each node in the graph.
260 std::vector<NodeInfo> ScheduleInfo;
261
262 enum OrderKind { BottomUp = 0, TopDown = 1 };
263 /// Computed node ordering for scheduling.
264 SetVector<SUnit *> NodeOrder;
265
270
271 /// Instructions to change when emitting the final schedule.
273
274 /// We may create a new instruction, so remember it because it
275 /// must be deleted when the pass is finished.
277
278 /// Ordered list of DAG postprocessing steps.
279 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
280
281 /// Helper class to implement Johnson's circuit finding algorithm.
282 class Circuits {
283 std::vector<SUnit> &SUnits;
284 SetVector<SUnit *> Stack;
285 BitVector Blocked;
288 // Node to Index from ScheduleDAGTopologicalSort
289 std::vector<int> *Node2Idx;
290 unsigned NumPaths = 0u;
291 static unsigned MaxPaths;
292
293 public:
294 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
295 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
296 Node2Idx = new std::vector<int>(SUs.size());
297 unsigned Idx = 0;
298 for (const auto &NodeNum : Topo)
299 Node2Idx->at(NodeNum) = Idx++;
300 }
301 Circuits &operator=(const Circuits &other) = delete;
302 Circuits(const Circuits &other) = delete;
303 ~Circuits() { delete Node2Idx; }
304
305 /// Reset the data structures used in the circuit algorithm.
306 void reset() {
307 Stack.clear();
308 Blocked.reset();
309 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
310 NumPaths = 0;
311 }
312
313 void createAdjacencyStructure(SwingSchedulerDAG *DAG);
314 bool circuit(int V, int S, NodeSetType &NodeSets,
315 const SwingSchedulerDAG *DAG, bool HasBackedge = false);
316 void unblock(int U);
317 };
318
319 struct CopyToPhiMutation : public ScheduleDAGMutation {
320 void apply(ScheduleDAGInstrs *DAG) override;
321 };
322
323public:
325 const RegisterClassInfo &rci, unsigned II,
327 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
328 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
329 Topo(SUnits, &ExitSU) {
330 P.MF->getSubtarget().getSMSMutations(Mutations);
332 Mutations.push_back(std::make_unique<CopyToPhiMutation>());
333 }
334
335 void schedule() override;
336 void finishBlock() override;
337
338 /// Return true if the loop kernel has been scheduled.
339 bool hasNewSchedule() { return Scheduled; }
340
341 /// Return the earliest time an instruction may be scheduled.
342 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
343
344 /// Return the latest time an instruction my be scheduled.
345 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
346
347 /// The mobility function, which the number of slots in which
348 /// an instruction may be scheduled.
349 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
350
351 /// The depth, in the dependence graph, for a node.
352 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
353
354 /// The maximum unweighted length of a path from an arbitrary node to the
355 /// given node in which each edge has latency 0
357 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
358 }
359
360 /// The height, in the dependence graph, for a node.
361 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
362
363 /// The maximum unweighted length of a path from the given node to an
364 /// arbitrary node in which each edge has latency 0
366 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
367 }
368
369 bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const;
370
371 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
372
373 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
374
375 /// Return the new base register that was stored away for the changed
376 /// instruction.
377 unsigned getInstrBaseReg(SUnit *SU) const {
379 InstrChanges.find(SU);
380 if (It != InstrChanges.end())
381 return It->second.first;
382 return 0;
383 }
384
385 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
386 Mutations.push_back(std::move(Mutation));
387 }
388
389 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
390
391 const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
392
393 bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
394 const MachineInstr *OtherMI) const;
395
396private:
397 void addLoopCarriedDependences(AAResults *AA);
398 void updatePhiDependences();
399 void changeDependences();
400 unsigned calculateResMII();
401 unsigned calculateRecMII(NodeSetType &RecNodeSets);
402 void findCircuits(NodeSetType &NodeSets);
403 void fuseRecs(NodeSetType &NodeSets);
404 void removeDuplicateNodes(NodeSetType &NodeSets);
405 void computeNodeFunctions(NodeSetType &NodeSets);
406 void registerPressureFilter(NodeSetType &NodeSets);
407 void colocateNodeSets(NodeSetType &NodeSets);
408 void checkNodeSets(NodeSetType &NodeSets);
409 void groupRemainingNodes(NodeSetType &NodeSets);
410 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
411 SetVector<SUnit *> &NodesAdded);
412 void computeNodeOrder(NodeSetType &NodeSets);
413 void checkValidNodeOrder(const NodeSetType &Circuits) const;
414 bool schedulePipeline(SMSchedule &Schedule);
415 bool computeDelta(const MachineInstr &MI, int &Delta) const;
416 MachineInstr *findDefInLoop(Register Reg);
417 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
418 unsigned &OffsetPos, unsigned &NewBase,
419 int64_t &NewOffset);
420 void postProcessDAG();
421 /// Set the Minimum Initiation Interval for this schedule attempt.
422 void setMII(unsigned ResMII, unsigned RecMII);
423 /// Set the Maximum Initiation Interval for this schedule attempt.
424 void setMAX_II();
425};
426
427/// A NodeSet contains a set of SUnit DAG nodes with additional information
428/// that assigns a priority to the set.
429class NodeSet {
430 SetVector<SUnit *> Nodes;
431 bool HasRecurrence = false;
432 unsigned RecMII = 0;
433 int MaxMOV = 0;
434 unsigned MaxDepth = 0;
435 unsigned Colocate = 0;
436 SUnit *ExceedPressure = nullptr;
437 unsigned Latency = 0;
438
439public:
441
442 NodeSet() = default;
444 : Nodes(S, E), HasRecurrence(true) {
445 // Calculate the latency of this node set.
446 // Example to demonstrate the calculation:
447 // Given: N0 -> N1 -> N2 -> N0
448 // Edges:
449 // (N0 -> N1, 3)
450 // (N0 -> N1, 5)
451 // (N1 -> N2, 2)
452 // (N2 -> N0, 1)
453 // The total latency which is a lower bound of the recurrence MII is the
454 // longest path from N0 back to N0 given only the edges of this node set.
455 // In this example, the latency is: 5 + 2 + 1 = 8.
456 //
457 // Hold a map from each SUnit in the circle to the maximum distance from the
458 // source node by only considering the nodes.
459 const SwingSchedulerDDG *DDG = DAG->getDDG();
460 DenseMap<SUnit *, unsigned> SUnitToDistance;
461 for (auto *Node : Nodes)
462 SUnitToDistance[Node] = 0;
463
464 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
465 SUnit *U = Nodes[I - 1];
466 SUnit *V = Nodes[I % Nodes.size()];
467 for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
468 SUnit *SuccSUnit = Succ.getDst();
469 if (V != SuccSUnit)
470 continue;
471 unsigned &DU = SUnitToDistance[U];
472 unsigned &DV = SUnitToDistance[V];
473 if (DU + Succ.getLatency() > DV)
474 DV = DU + Succ.getLatency();
475 }
476 }
477 // Handle a back-edge in loop carried dependencies
478 SUnit *FirstNode = Nodes[0];
479 SUnit *LastNode = Nodes[Nodes.size() - 1];
480
481 for (auto &PI : DDG->getInEdges(LastNode)) {
482 // If we have an order dep that is potentially loop carried then a
483 // back-edge exists between the last node and the first node that isn't
484 // modeled in the DAG. Handle it manually by adding 1 to the distance of
485 // the last node.
486 if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||
487 !DAG->isLoopCarriedDep(PI))
488 continue;
489 SUnitToDistance[FirstNode] =
490 std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1);
491 }
492
493 // The latency is the distance from the source node to itself.
494 Latency = SUnitToDistance[Nodes.front()];
495 }
496
497 bool insert(SUnit *SU) { return Nodes.insert(SU); }
498
499 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
500
501 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
502 return Nodes.remove_if(P);
503 }
504
505 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
506
507 bool hasRecurrence() { return HasRecurrence; };
508
509 unsigned size() const { return Nodes.size(); }
510
511 bool empty() const { return Nodes.empty(); }
512
513 SUnit *getNode(unsigned i) const { return Nodes[i]; };
514
515 void setRecMII(unsigned mii) { RecMII = mii; };
516
517 void setColocate(unsigned c) { Colocate = c; };
518
519 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
520
521 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
522
523 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
524
525 int getRecMII() { return RecMII; }
526
527 /// Summarize node functions for the entire node set.
529 for (SUnit *SU : *this) {
530 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
531 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
532 }
533 }
534
535 unsigned getLatency() { return Latency; }
536
537 unsigned getMaxDepth() { return MaxDepth; }
538
539 void clear() {
540 Nodes.clear();
541 RecMII = 0;
542 HasRecurrence = false;
543 MaxMOV = 0;
544 MaxDepth = 0;
545 Colocate = 0;
546 ExceedPressure = nullptr;
547 }
548
549 operator SetVector<SUnit *> &() { return Nodes; }
550
551 /// Sort the node sets by importance. First, rank them by recurrence MII,
552 /// then by mobility (least mobile done first), and finally by depth.
553 /// Each node set may contain a colocate value which is used as the first
554 /// tie breaker, if it's set.
555 bool operator>(const NodeSet &RHS) const {
556 if (RecMII == RHS.RecMII) {
557 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
558 return Colocate < RHS.Colocate;
559 if (MaxMOV == RHS.MaxMOV)
560 return MaxDepth > RHS.MaxDepth;
561 return MaxMOV < RHS.MaxMOV;
562 }
563 return RecMII > RHS.RecMII;
564 }
565
566 bool operator==(const NodeSet &RHS) const {
567 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
568 MaxDepth == RHS.MaxDepth;
569 }
570
571 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
572
573 iterator begin() { return Nodes.begin(); }
574 iterator end() { return Nodes.end(); }
575 void print(raw_ostream &os) const;
576
577#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
578 LLVM_DUMP_METHOD void dump() const;
579#endif
580};
581
582// 16 was selected based on the number of ProcResource kinds for all
583// existing Subtargets, so that SmallVector don't need to resize too often.
584static const int DefaultProcResSize = 16;
585
587private:
588 const MCSubtargetInfo *STI;
589 const MCSchedModel &SM;
590 const TargetSubtargetInfo *ST;
591 const TargetInstrInfo *TII;
593 const bool UseDFA;
594 /// DFA resources for each slot
596 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
597 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
599 /// The number of scheduled micro operations for each slot. Micro operations
600 /// are assumed to be scheduled one per cycle, starting with the cycle in
601 /// which the instruction is scheduled.
602 llvm::SmallVector<int> NumScheduledMops;
603 /// Each processor resource is associated with a so-called processor resource
604 /// mask. This vector allows to correlate processor resource IDs with
605 /// processor resource masks. There is exactly one element per each processor
606 /// resource declared by the scheduling model.
608 int InitiationInterval = 0;
609 /// The number of micro operations that can be scheduled at a cycle.
610 int IssueWidth;
611
612 int calculateResMIIDFA() const;
613 /// Check if MRT is overbooked
614 bool isOverbooked() const;
615 /// Reserve resources on MRT
616 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
617 /// Unreserve resources on MRT
618 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
619
620 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
621 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
622 /// II).
623 int positiveModulo(int Dividend, int Divisor) const {
624 assert(Divisor > 0);
625 int R = Dividend % Divisor;
626 if (R < 0)
627 R += Divisor;
628 return R;
629 }
630
631#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
632 LLVM_DUMP_METHOD void dumpMRT() const;
633#endif
634
635public:
637 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
638 DAG(DAG), UseDFA(ST->useDFAforSMS()),
639 ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
640 IssueWidth(SM.IssueWidth) {
641 initProcResourceVectors(SM, ProcResourceMasks);
642 if (IssueWidth <= 0)
643 // If IssueWidth is not specified, set a sufficiently large value
644 IssueWidth = 100;
645 if (SwpForceIssueWidth > 0)
646 IssueWidth = SwpForceIssueWidth;
647 }
648
651
652 /// Check if the resources occupied by a machine instruction are available
653 /// in the current state.
654 bool canReserveResources(SUnit &SU, int Cycle);
655
656 /// Reserve the resources occupied by a machine instruction and change the
657 /// current state to reflect that change.
658 void reserveResources(SUnit &SU, int Cycle);
659
660 int calculateResMII() const;
661
662 /// Initialize resources with the initiation interval II.
663 void init(int II);
664};
665
666/// This class represents the scheduled code. The main data structure is a
667/// map from scheduled cycle to instructions. During scheduling, the
668/// data structure explicitly represents all stages/iterations. When
669/// the algorithm finshes, the schedule is collapsed into a single stage,
670/// which represents instructions from different loop iterations.
671///
672/// The SMS algorithm allows negative values for cycles, so the first cycle
673/// in the schedule is the smallest cycle value.
675private:
676 /// Map from execution cycle to instructions.
677 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
678
679 /// Map from instruction to execution cycle.
680 std::map<SUnit *, int> InstrToCycle;
681
682 /// Keep track of the first cycle value in the schedule. It starts
683 /// as zero, but the algorithm allows negative values.
684 int FirstCycle = 0;
685
686 /// Keep track of the last cycle value in the schedule.
687 int LastCycle = 0;
688
689 /// The initiation interval (II) for the schedule.
690 int InitiationInterval = 0;
691
692 /// Target machine information.
693 const TargetSubtargetInfo &ST;
694
695 /// Virtual register information.
697
698 ResourceManager ProcItinResources;
699
700public:
702 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
703 ProcItinResources(&ST, DAG) {}
704
705 void reset() {
706 ScheduledInstrs.clear();
707 InstrToCycle.clear();
708 FirstCycle = 0;
709 LastCycle = 0;
710 InitiationInterval = 0;
711 }
712
713 /// Set the initiation interval for this schedule.
715 InitiationInterval = ii;
716 ProcItinResources.init(ii);
717 }
718
719 /// Return the initiation interval for this schedule.
720 int getInitiationInterval() const { return InitiationInterval; }
721
722 /// Return the first cycle in the completed schedule. This
723 /// can be a negative value.
724 int getFirstCycle() const { return FirstCycle; }
725
726 /// Return the last cycle in the finalized schedule.
727 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
728
729 /// Return the cycle of the earliest scheduled instruction in the dependence
730 /// chain.
732 const SwingSchedulerDDG *DDG);
733
734 /// Return the cycle of the latest scheduled instruction in the dependence
735 /// chain.
737 const SwingSchedulerDDG *DDG);
738
739 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
740 SwingSchedulerDAG *DAG);
741 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
742
743 /// Iterators for the cycle to instruction map.
747
748 /// Return true if the instruction is scheduled at the specified stage.
749 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
750 return (stageScheduled(SU) == (int)StageNum);
751 }
752
753 /// Return the stage for a scheduled instruction. Return -1 if
754 /// the instruction has not been scheduled.
755 int stageScheduled(SUnit *SU) const {
756 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
757 if (it == InstrToCycle.end())
758 return -1;
759 return (it->second - FirstCycle) / InitiationInterval;
760 }
761
762 /// Return the cycle for a scheduled instruction. This function normalizes
763 /// the first cycle to be 0.
764 unsigned cycleScheduled(SUnit *SU) const {
765 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
766 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
767 return (it->second - FirstCycle) % InitiationInterval;
768 }
769
770 /// Return the maximum stage count needed for this schedule.
771 unsigned getMaxStageCount() {
772 return (LastCycle - FirstCycle) / InitiationInterval;
773 }
774
775 /// Return the instructions that are scheduled at the specified cycle.
776 std::deque<SUnit *> &getInstructions(int cycle) {
777 return ScheduledInstrs[cycle];
778 }
779
783
784 std::deque<SUnit *>
786 const std::deque<SUnit *> &Instrs) const;
787
788 bool
793 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
794 std::deque<SUnit *> &Insts) const;
795 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
797 MachineOperand &MO) const;
798
800 const SwingSchedulerDDG *DDG) const;
801 void print(raw_ostream &os) const;
802 void dump() const;
803};
804
805} // end namespace llvm
806
807#endif // LLVM_CODEGEN_MACHINEPIPELINER_H
unsigned const MachineRegisterInfo * MRI
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
unsigned Reg
uint64_t IntrinsicInst * II
#define P(N)
PowerPC VSX FMA Mutation
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
Value * RHS
Represent the analysis usage information of a pass.
BitVector & reset()
Definition: BitVector.h:392
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Generic base class for all target subtargets.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Representation of each machine instruction.
Definition: MachineInstr.h:71
MachineOperand class - Representation of each machine instruction operand.
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFunction * MF
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
const InstrItineraryData * InstrItins
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
SetVector< SUnit * >::const_iterator iterator
void print(raw_ostream &os) const
bool isExceedSU(SUnit *SU)
void insert(iterator S, iterator E)
iterator begin()
void setRecMII(unsigned mii)
void computeNodeSetInfo(SwingSchedulerDAG *SSD)
Summarize node functions for the entire node set.
unsigned getMaxDepth()
unsigned count(SUnit *SU) const
NodeSet()=default
void setColocate(unsigned c)
unsigned getLatency()
NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
bool operator>(const NodeSet &RHS) const
Sort the node sets by importance.
int compareRecMII(NodeSet &RHS)
unsigned size() const
bool operator!=(const NodeSet &RHS) const
bool insert(SUnit *SU)
LLVM_DUMP_METHOD void dump() const
bool operator==(const NodeSet &RHS) const
bool remove_if(UnaryPredicate P)
bool empty() const
void setExceedPressure(SUnit *SU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:498
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:504
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:501
bool isBarrier() const
Tests if this is an Order dependence that is marked as a barrier.
Definition: ScheduleDAG.h:174
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
A ScheduleDAG for scheduling lists of MachineInstr.
const MachineLoopInfo * MLI
Mutate the DAG as a postpass after normal DAG building.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:720
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:581
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:237
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:70
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool hasNewSchedule()
Return true if the loop kernel has been scheduled.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci, unsigned II, TargetInstrInfo::PipelinerLoopInfo *PLI)
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
static bool classof(const ScheduleDAGInstrs *DAG)
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
Register getReg() const
Returns the register associated with the edge.
void setDistance(unsigned D)
Sets the distance value for the edge.
bool isBarrier() const
Returns true if the edge represents unknown scheduling barrier.
void setLatency(unsigned Latency)
Sets the latency for the edge.
bool isAntiDep() const
Returns true if the edge represents anti dependence.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
bool isArtificial() const
Returns true if the edge represents an artificial dependence.
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
bool isOrderDep() const
Returns true if the edge represents a dependence that is not data, anti or output dependence.
unsigned getLatency() const
Returns the latency value for the edge.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
unsigned getDistance() const
Returns the distance value for the edge.
bool isOutputDep() const
Returns true if the edge represents output dependence.
SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc)
Creates an edge corresponding to an edge represented by PredOrSucc and Dep in the original DAG.
Represents dependencies between instructions.
const EdgesType & getInEdges(const SUnit *SU) const
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
std::set< NodeId > NodeSet
Definition: RDFGraph.h:551
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMachinePipelinerPass(PassRegistry &)
cl::opt< bool > SwpEnableCopyToPhi
static const int DefaultProcResSize
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:121
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:256
Cache the target analysis information about the loop.
SmallVector< MachineOperand, 4 > BrCond
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo