LLVM 22.0.0git
MachinePipeliner.h
Go to the documentation of this file.
1//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10//
11// Software pipelining (SWP) is an instruction scheduling technique for loops
12// that overlap loop iterations and exploits ILP via a compiler transformation.
13//
14// Swing Modulo Scheduling is an implementation of software pipelining
15// that generates schedules that are near optimal in terms of initiation
16// interval, register requirements, and stage count. See the papers:
17//
18// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20// Conference on Parallel Architectures and Compilation Techiniques.
21//
22// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24// Transactions on Computers, Vol. 50, No. 3, 2001.
25//
26// "An Implementation of Swing Modulo Scheduling With Extensions for
27// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28// Urbana-Champaign, 2005.
29//
30//
31// The SMS algorithm consists of three main steps after computing the minimal
32// initiation interval (MII).
33// 1) Analyze the dependence graph and compute information about each
34// instruction in the graph.
35// 2) Order the nodes (instructions) by priority based upon the heuristics
36// described in the algorithm.
37// 3) Attempt to schedule the nodes in the specified order using the MII.
38//
39//===----------------------------------------------------------------------===//
40#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
41#define LLVM_CODEGEN_MACHINEPIPELINER_H
42
43#include "llvm/ADT/STLExtras.h"
44#include "llvm/ADT/SetVector.h"
55
56#include <deque>
57
58namespace llvm {
59
60class AAResults;
61class NodeSet;
62class SMSchedule;
63
66
67/// The main class in the implementation of the target independent
68/// software pipeliner pass.
70public:
71 MachineFunction *MF = nullptr;
73 const MachineLoopInfo *MLI = nullptr;
74 const MachineDominatorTree *MDT = nullptr;
76 const TargetInstrInfo *TII = nullptr;
78 bool disabledByPragma = false;
79 unsigned II_setByPragma = 0;
80
81#ifndef NDEBUG
82 static int NumTries;
83#endif
84
85 /// Cache the target analysis information about the loop.
86 struct LoopInfo {
92 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
93 nullptr;
94 };
96
97 static char ID;
98
101 }
102
104
105 void getAnalysisUsage(AnalysisUsage &AU) const override;
106
107private:
108 void preprocessPhiNodes(MachineBasicBlock &B);
109 bool canPipelineLoop(MachineLoop &L);
110 bool scheduleLoop(MachineLoop &L);
111 bool swingModuloScheduler(MachineLoop &L);
112 void setPragmaPipelineOptions(MachineLoop &L);
113 bool runWindowScheduler(MachineLoop &L);
114 bool useSwingModuloScheduler();
115 bool useWindowScheduler(bool Changed);
116};
117
118/// Represents a dependence between two instruction.
120 SUnit *Dst = nullptr;
121 SDep Pred;
122 unsigned Distance = 0;
123 bool IsValidationOnly = false;
124
125public:
126 /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
127 /// \p Dep in the original DAG. This pair has no information about the
128 /// direction of the edge, so we need to pass an additional argument \p
129 /// IsSucc.
130 SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc,
131 bool IsValidationOnly)
132 : Dst(PredOrSucc), Pred(Dep), Distance(0u),
133 IsValidationOnly(IsValidationOnly) {
134 SUnit *Src = Dep.getSUnit();
135
136 if (IsSucc) {
137 std::swap(Src, Dst);
138 Pred.setSUnit(Src);
139 }
140
141 // An anti-dependence to PHI means loop-carried dependence.
142 if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
143 Distance = 1;
144 std::swap(Src, Dst);
145 auto Reg = Pred.getReg();
146 Pred = SDep(Src, SDep::Kind::Data, Reg);
147 }
148 }
149
150 /// Returns the SUnit from which the edge comes (source node).
151 SUnit *getSrc() const { return Pred.getSUnit(); }
152
153 /// Returns the SUnit to which the edge points (destination node).
154 SUnit *getDst() const { return Dst; }
155
156 /// Returns the latency value for the edge.
157 unsigned getLatency() const { return Pred.getLatency(); }
158
159 /// Sets the latency for the edge.
160 void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
161
162 /// Returns the distance value for the edge.
163 unsigned getDistance() const { return Distance; }
164
165 /// Sets the distance value for the edge.
166 void setDistance(unsigned D) { Distance = D; }
167
168 /// Returns the register associated with the edge.
169 Register getReg() const { return Pred.getReg(); }
170
171 /// Returns true if the edge represents anti dependence.
172 bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
173
174 /// Returns true if the edge represents output dependence.
175 bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
176
177 /// Returns true if the edge represents a dependence that is not data, anti or
178 /// output dependence.
179 bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
180
181 /// Returns true if the edge represents unknown scheduling barrier.
182 bool isBarrier() const { return Pred.isBarrier(); }
183
184 /// Returns true if the edge represents an artificial dependence.
185 bool isArtificial() const { return Pred.isArtificial(); }
186
187 /// Tests if this is a Data dependence that is associated with a register.
188 bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
189
190 /// Returns true for DDG nodes that we ignore when computing the cost
191 /// functions. We ignore the back-edge recurrence in order to avoid unbounded
192 /// recursion in the calculation of the ASAP, ALAP, etc functions.
193 bool ignoreDependence(bool IgnoreAnti) const;
194
195 /// Returns true if this edge is intended to be used only for validating the
196 /// schedule.
197 bool isValidationOnly() const { return IsValidationOnly; }
198};
199
200/// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't
201/// assume cycle dependencies as the name suggests, such dependencies must be
202/// handled separately. After DAG construction is finished, these dependencies
203/// are added to SwingSchedulerDDG.
204/// TODO: Also handle output-dependencies introduced by physical registers.
208
210
211 const OrderDep *getOrderDepOrNull(SUnit *Key) const {
212 auto Ite = OrderDeps.find(Key);
213 if (Ite == OrderDeps.end())
214 return nullptr;
215 return &Ite->second;
216 }
217
218 /// Adds some edges to the original DAG that correspond to loop-carried
219 /// dependencies. Historically, loop-carried edges are represented by using
220 /// non-loop-carried edges in the original DAG. This function appends such
221 /// edges to preserve the previous behavior.
222 void modifySUnits(std::vector<SUnit> &SUnits, const TargetInstrInfo *TII);
223
224 void dump(SUnit *SU, const TargetRegisterInfo *TRI,
225 const MachineRegisterInfo *MRI) const;
226};
227
228/// This class provides APIs to retrieve edges from/to an SUnit node, with a
229/// particular focus on loop-carried dependencies. Since SUnit is not designed
230/// to represent such edges, handling them directly using its APIs has required
231/// non-trivial logic in the past. This class serves as a wrapper around SUnit,
232/// offering a simpler interface for managing these dependencies.
235
236 struct SwingSchedulerDDGEdges {
237 EdgesType Preds;
238 EdgesType Succs;
239 };
240
241 void initEdges(SUnit *SU);
242
243 SUnit *EntrySU;
244 SUnit *ExitSU;
245
246 std::vector<SwingSchedulerDDGEdges> EdgesVec;
247 SwingSchedulerDDGEdges EntrySUEdges;
248 SwingSchedulerDDGEdges ExitSUEdges;
249
250 /// Edges that are used only when validating the schedule. These edges are
251 /// not considered to drive the optimization heuristics.
252 SmallVector<SwingSchedulerDDGEdge, 8> ValidationOnlyEdges;
253
254 /// Adds a NON-validation-only edge to the DDG. Assumes to be called only by
255 /// the ctor.
256 void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
257
258 SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
259 const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
260
261public:
262 SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU,
263 const LoopCarriedEdges &LCE);
264
265 const EdgesType &getInEdges(const SUnit *SU) const;
266
267 const EdgesType &getOutEdges(const SUnit *SU) const;
268
269 bool isValidSchedule(const SMSchedule &Schedule) const;
270};
271
272/// This class builds the dependence graph for the instructions in a loop,
273/// and attempts to schedule the instructions using the SMS algorithm.
276
277 std::unique_ptr<SwingSchedulerDDG> DDG;
278
279 /// The minimum initiation interval between iterations for this schedule.
280 unsigned MII = 0;
281 /// The maximum initiation interval between iterations for this schedule.
282 unsigned MAX_II = 0;
283 /// Set to true if a valid pipelined schedule is found for the loop.
284 bool Scheduled = false;
286 LiveIntervals &LIS;
287 const RegisterClassInfo &RegClassInfo;
288 unsigned II_setByPragma = 0;
289 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
290
291 /// A topological ordering of the SUnits, which is needed for changing
292 /// dependences and iterating over the SUnits.
294
295 struct NodeInfo {
296 int ASAP = 0;
297 int ALAP = 0;
298 int ZeroLatencyDepth = 0;
299 int ZeroLatencyHeight = 0;
300
301 NodeInfo() = default;
302 };
303 /// Computed properties for each node in the graph.
304 std::vector<NodeInfo> ScheduleInfo;
305
306 enum OrderKind { BottomUp = 0, TopDown = 1 };
307 /// Computed node ordering for scheduling.
308 SetVector<SUnit *> NodeOrder;
309
314
315 /// Instructions to change when emitting the final schedule.
317
318 /// We may create a new instruction, so remember it because it
319 /// must be deleted when the pass is finished.
321
322 /// Ordered list of DAG postprocessing steps.
323 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
324
325 /// Used to compute single-iteration dependencies (i.e., buildSchedGraph).
326 AliasAnalysis *AA;
327
328 /// Used to compute loop-carried dependencies (i.e.,
329 /// addLoopCarriedDependences).
330 BatchAAResults BAA;
331
332 /// Helper class to implement Johnson's circuit finding algorithm.
333 class Circuits {
334 std::vector<SUnit> &SUnits;
335 SetVector<SUnit *> Stack;
336 BitVector Blocked;
339 // Node to Index from ScheduleDAGTopologicalSort
340 std::vector<int> *Node2Idx;
341 unsigned NumPaths = 0u;
342 static unsigned MaxPaths;
343
344 public:
345 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
346 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
347 Node2Idx = new std::vector<int>(SUs.size());
348 unsigned Idx = 0;
349 for (const auto &NodeNum : Topo)
350 Node2Idx->at(NodeNum) = Idx++;
351 }
352 Circuits &operator=(const Circuits &other) = delete;
353 Circuits(const Circuits &other) = delete;
354 ~Circuits() { delete Node2Idx; }
355
356 /// Reset the data structures used in the circuit algorithm.
357 void reset() {
358 Stack.clear();
359 Blocked.reset();
360 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
361 NumPaths = 0;
362 }
363
364 void createAdjacencyStructure(SwingSchedulerDAG *DAG);
365 bool circuit(int V, int S, NodeSetType &NodeSets,
366 const SwingSchedulerDAG *DAG, bool HasBackedge = false);
367 void unblock(int U);
368 };
369
370 struct CopyToPhiMutation : public ScheduleDAGMutation {
371 void apply(ScheduleDAGInstrs *DAG) override;
372 };
373
374public:
376 const RegisterClassInfo &rci, unsigned II,
378 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
379 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
380 Topo(SUnits, &ExitSU), AA(AA), BAA(*AA) {
381 P.MF->getSubtarget().getSMSMutations(Mutations);
383 Mutations.push_back(std::make_unique<CopyToPhiMutation>());
385 }
386
387 void schedule() override;
388 void finishBlock() override;
389
390 /// Return true if the loop kernel has been scheduled.
391 bool hasNewSchedule() { return Scheduled; }
392
393 /// Return the earliest time an instruction may be scheduled.
394 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
395
396 /// Return the latest time an instruction my be scheduled.
397 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
398
399 /// The mobility function, which the number of slots in which
400 /// an instruction may be scheduled.
401 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
402
403 /// The depth, in the dependence graph, for a node.
404 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
405
406 /// The maximum unweighted length of a path from an arbitrary node to the
407 /// given node in which each edge has latency 0
409 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
410 }
411
412 /// The height, in the dependence graph, for a node.
413 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
414
415 /// The maximum unweighted length of a path from the given node to an
416 /// arbitrary node in which each edge has latency 0
418 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
419 }
420
421 bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const;
422
423 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
424
425 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
426
427 /// Return the new base register that was stored away for the changed
428 /// instruction.
431 InstrChanges.find(SU);
432 if (It != InstrChanges.end())
433 return It->second.first;
434 return Register();
435 }
436
437 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
438 Mutations.push_back(std::move(Mutation));
439 }
440
441 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
442
443 const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
444
445 bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
446 const MachineInstr *OtherMI) const;
447
448private:
449 LoopCarriedEdges addLoopCarriedDependences();
450 void updatePhiDependences();
451 void changeDependences();
452 unsigned calculateResMII();
453 unsigned calculateRecMII(NodeSetType &RecNodeSets);
454 void findCircuits(NodeSetType &NodeSets);
455 void fuseRecs(NodeSetType &NodeSets);
456 void removeDuplicateNodes(NodeSetType &NodeSets);
457 void computeNodeFunctions(NodeSetType &NodeSets);
458 void registerPressureFilter(NodeSetType &NodeSets);
459 void colocateNodeSets(NodeSetType &NodeSets);
460 void checkNodeSets(NodeSetType &NodeSets);
461 void groupRemainingNodes(NodeSetType &NodeSets);
462 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
463 SetVector<SUnit *> &NodesAdded);
464 void computeNodeOrder(NodeSetType &NodeSets);
465 void checkValidNodeOrder(const NodeSetType &Circuits) const;
466 bool schedulePipeline(SMSchedule &Schedule);
467 bool computeDelta(const MachineInstr &MI, int &Delta) const;
468 MachineInstr *findDefInLoop(Register Reg);
469 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
470 unsigned &OffsetPos, Register &NewBase,
471 int64_t &NewOffset);
472 void postProcessDAG();
473 /// Set the Minimum Initiation Interval for this schedule attempt.
474 void setMII(unsigned ResMII, unsigned RecMII);
475 /// Set the Maximum Initiation Interval for this schedule attempt.
476 void setMAX_II();
477};
478
479/// A NodeSet contains a set of SUnit DAG nodes with additional information
480/// that assigns a priority to the set.
481class NodeSet {
482 SetVector<SUnit *> Nodes;
483 bool HasRecurrence = false;
484 unsigned RecMII = 0;
485 int MaxMOV = 0;
486 unsigned MaxDepth = 0;
487 unsigned Colocate = 0;
488 SUnit *ExceedPressure = nullptr;
489 unsigned Latency = 0;
490
491public:
493
494 NodeSet() = default;
496 : Nodes(S, E), HasRecurrence(true) {
497 // Calculate the latency of this node set.
498 // Example to demonstrate the calculation:
499 // Given: N0 -> N1 -> N2 -> N0
500 // Edges:
501 // (N0 -> N1, 3)
502 // (N0 -> N1, 5)
503 // (N1 -> N2, 2)
504 // (N2 -> N0, 1)
505 // The total latency which is a lower bound of the recurrence MII is the
506 // longest path from N0 back to N0 given only the edges of this node set.
507 // In this example, the latency is: 5 + 2 + 1 = 8.
508 //
509 // Hold a map from each SUnit in the circle to the maximum distance from the
510 // source node by only considering the nodes.
511 const SwingSchedulerDDG *DDG = DAG->getDDG();
512 DenseMap<SUnit *, unsigned> SUnitToDistance;
513 for (auto *Node : Nodes)
514 SUnitToDistance[Node] = 0;
515
516 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
517 SUnit *U = Nodes[I - 1];
518 SUnit *V = Nodes[I % Nodes.size()];
519 for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
520 SUnit *SuccSUnit = Succ.getDst();
521 if (V != SuccSUnit)
522 continue;
523 unsigned &DU = SUnitToDistance[U];
524 unsigned &DV = SUnitToDistance[V];
525 if (DU + Succ.getLatency() > DV)
526 DV = DU + Succ.getLatency();
527 }
528 }
529 // Handle a back-edge in loop carried dependencies
530 SUnit *FirstNode = Nodes[0];
531 SUnit *LastNode = Nodes[Nodes.size() - 1];
532
533 for (auto &PI : DDG->getInEdges(LastNode)) {
534 // If we have an order dep that is potentially loop carried then a
535 // back-edge exists between the last node and the first node that isn't
536 // modeled in the DAG. Handle it manually by adding 1 to the distance of
537 // the last node.
538 if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||
539 !DAG->isLoopCarriedDep(PI))
540 continue;
541 unsigned &First = SUnitToDistance[FirstNode];
542 unsigned Last = SUnitToDistance[LastNode];
543 First = std::max(First, Last + 1);
544 }
545
546 // The latency is the distance from the source node to itself.
547 Latency = SUnitToDistance[Nodes.front()];
548 }
549
550 bool insert(SUnit *SU) { return Nodes.insert(SU); }
551
552 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
553
554 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
555 return Nodes.remove_if(P);
556 }
557
558 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
559
560 bool hasRecurrence() { return HasRecurrence; };
561
562 unsigned size() const { return Nodes.size(); }
563
564 bool empty() const { return Nodes.empty(); }
565
566 SUnit *getNode(unsigned i) const { return Nodes[i]; };
567
568 void setRecMII(unsigned mii) { RecMII = mii; };
569
570 void setColocate(unsigned c) { Colocate = c; };
571
572 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
573
574 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
575
576 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
577
578 int getRecMII() { return RecMII; }
579
580 /// Summarize node functions for the entire node set.
582 for (SUnit *SU : *this) {
583 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
584 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
585 }
586 }
587
588 unsigned getLatency() { return Latency; }
589
590 unsigned getMaxDepth() { return MaxDepth; }
591
592 void clear() {
593 Nodes.clear();
594 RecMII = 0;
595 HasRecurrence = false;
596 MaxMOV = 0;
597 MaxDepth = 0;
598 Colocate = 0;
599 ExceedPressure = nullptr;
600 }
601
602 operator SetVector<SUnit *> &() { return Nodes; }
603
604 /// Sort the node sets by importance. First, rank them by recurrence MII,
605 /// then by mobility (least mobile done first), and finally by depth.
606 /// Each node set may contain a colocate value which is used as the first
607 /// tie breaker, if it's set.
608 bool operator>(const NodeSet &RHS) const {
609 if (RecMII == RHS.RecMII) {
610 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
611 return Colocate < RHS.Colocate;
612 if (MaxMOV == RHS.MaxMOV)
613 return MaxDepth > RHS.MaxDepth;
614 return MaxMOV < RHS.MaxMOV;
615 }
616 return RecMII > RHS.RecMII;
617 }
618
619 bool operator==(const NodeSet &RHS) const {
620 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
621 MaxDepth == RHS.MaxDepth;
622 }
623
624 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
625
626 iterator begin() { return Nodes.begin(); }
627 iterator end() { return Nodes.end(); }
628 void print(raw_ostream &os) const;
629
630#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
631 LLVM_DUMP_METHOD void dump() const;
632#endif
633};
634
635// 16 was selected based on the number of ProcResource kinds for all
636// existing Subtargets, so that SmallVector don't need to resize too often.
637static const int DefaultProcResSize = 16;
638
640private:
641 const MCSubtargetInfo *STI;
642 const MCSchedModel &SM;
643 const TargetSubtargetInfo *ST;
644 const TargetInstrInfo *TII;
646 const bool UseDFA;
647 /// DFA resources for each slot
649 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
650 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
652 /// The number of scheduled micro operations for each slot. Micro operations
653 /// are assumed to be scheduled one per cycle, starting with the cycle in
654 /// which the instruction is scheduled.
655 llvm::SmallVector<int> NumScheduledMops;
656 /// Each processor resource is associated with a so-called processor resource
657 /// mask. This vector allows to correlate processor resource IDs with
658 /// processor resource masks. There is exactly one element per each processor
659 /// resource declared by the scheduling model.
661 int InitiationInterval = 0;
662 /// The number of micro operations that can be scheduled at a cycle.
663 int IssueWidth;
664
665 int calculateResMIIDFA() const;
666 /// Check if MRT is overbooked
667 bool isOverbooked() const;
668 /// Reserve resources on MRT
669 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
670 /// Unreserve resources on MRT
671 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
672
673 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
674 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
675 /// II).
676 int positiveModulo(int Dividend, int Divisor) const {
677 assert(Divisor > 0);
678 int R = Dividend % Divisor;
679 if (R < 0)
680 R += Divisor;
681 return R;
682 }
683
684#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685 LLVM_DUMP_METHOD void dumpMRT() const;
686#endif
687
688public:
690 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
691 DAG(DAG), UseDFA(ST->useDFAforSMS()),
692 ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
693 IssueWidth(SM.IssueWidth) {
694 initProcResourceVectors(SM, ProcResourceMasks);
695 if (IssueWidth <= 0)
696 // If IssueWidth is not specified, set a sufficiently large value
697 IssueWidth = 100;
698 if (SwpForceIssueWidth > 0)
699 IssueWidth = SwpForceIssueWidth;
700 }
701
704
705 /// Check if the resources occupied by a machine instruction are available
706 /// in the current state.
707 bool canReserveResources(SUnit &SU, int Cycle);
708
709 /// Reserve the resources occupied by a machine instruction and change the
710 /// current state to reflect that change.
711 void reserveResources(SUnit &SU, int Cycle);
712
713 int calculateResMII() const;
714
715 /// Initialize resources with the initiation interval II.
716 void init(int II);
717};
718
719/// This class represents the scheduled code. The main data structure is a
720/// map from scheduled cycle to instructions. During scheduling, the
721/// data structure explicitly represents all stages/iterations. When
722/// the algorithm finshes, the schedule is collapsed into a single stage,
723/// which represents instructions from different loop iterations.
724///
725/// The SMS algorithm allows negative values for cycles, so the first cycle
726/// in the schedule is the smallest cycle value.
728private:
729 /// Map from execution cycle to instructions.
730 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
731
732 /// Map from instruction to execution cycle.
733 std::map<SUnit *, int> InstrToCycle;
734
735 /// Keep track of the first cycle value in the schedule. It starts
736 /// as zero, but the algorithm allows negative values.
737 int FirstCycle = 0;
738
739 /// Keep track of the last cycle value in the schedule.
740 int LastCycle = 0;
741
742 /// The initiation interval (II) for the schedule.
743 int InitiationInterval = 0;
744
745 /// Target machine information.
746 const TargetSubtargetInfo &ST;
747
748 /// Virtual register information.
750
751 ResourceManager ProcItinResources;
752
753public:
755 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
756 ProcItinResources(&ST, DAG) {}
757
758 void reset() {
759 ScheduledInstrs.clear();
760 InstrToCycle.clear();
761 FirstCycle = 0;
762 LastCycle = 0;
763 InitiationInterval = 0;
764 }
765
766 /// Set the initiation interval for this schedule.
768 InitiationInterval = ii;
769 ProcItinResources.init(ii);
770 }
771
772 /// Return the initiation interval for this schedule.
773 int getInitiationInterval() const { return InitiationInterval; }
774
775 /// Return the first cycle in the completed schedule. This
776 /// can be a negative value.
777 int getFirstCycle() const { return FirstCycle; }
778
779 /// Return the last cycle in the finalized schedule.
780 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
781
782 /// Return the cycle of the earliest scheduled instruction in the dependence
783 /// chain.
785 const SwingSchedulerDDG *DDG);
786
787 /// Return the cycle of the latest scheduled instruction in the dependence
788 /// chain.
790 const SwingSchedulerDDG *DDG);
791
792 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
793 SwingSchedulerDAG *DAG);
794 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
795
796 /// Iterators for the cycle to instruction map.
800
801 /// Return true if the instruction is scheduled at the specified stage.
802 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
803 return (stageScheduled(SU) == (int)StageNum);
804 }
805
806 /// Return the stage for a scheduled instruction. Return -1 if
807 /// the instruction has not been scheduled.
808 int stageScheduled(SUnit *SU) const {
809 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
810 if (it == InstrToCycle.end())
811 return -1;
812 return (it->second - FirstCycle) / InitiationInterval;
813 }
814
815 /// Return the cycle for a scheduled instruction. This function normalizes
816 /// the first cycle to be 0.
817 unsigned cycleScheduled(SUnit *SU) const {
818 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
819 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
820 return (it->second - FirstCycle) % InitiationInterval;
821 }
822
823 /// Return the maximum stage count needed for this schedule.
824 unsigned getMaxStageCount() {
825 return (LastCycle - FirstCycle) / InitiationInterval;
826 }
827
828 /// Return the instructions that are scheduled at the specified cycle.
829 std::deque<SUnit *> &getInstructions(int cycle) {
830 return ScheduledInstrs[cycle];
831 }
832
836
837 std::deque<SUnit *>
839 const std::deque<SUnit *> &Instrs) const;
840
841 bool
846 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
847 std::deque<SUnit *> &Insts) const;
848 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
850 MachineOperand &MO) const;
851
853 const SwingSchedulerDDG *DDG) const;
854 void print(raw_ostream &os) const;
855 void dump() const;
856};
857
858} // end namespace llvm
859
860#endif // LLVM_CODEGEN_MACHINEPIPELINER_H
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
Register Reg
Register const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define P(N)
PowerPC VSX FMA Mutation
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
Value * RHS
A private abstract base class describing the concept of an individual alias analysis implementation.
Represent the analysis usage information of a pass.
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void enableCrossIterationMode()
Assume that values may come from different cycle iterations.
BitVector & reset()
Definition: BitVector.h:392
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
Generic base class for all target subtargets.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Representation of each machine instruction.
Definition: MachineInstr.h:72
MachineOperand class - Representation of each machine instruction operand.
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFunction * MF
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
const InstrItineraryData * InstrItins
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
SetVector< SUnit * >::const_iterator iterator
void print(raw_ostream &os) const
bool isExceedSU(SUnit *SU)
void insert(iterator S, iterator E)
iterator begin()
void setRecMII(unsigned mii)
void computeNodeSetInfo(SwingSchedulerDAG *SSD)
Summarize node functions for the entire node set.
unsigned getMaxDepth()
unsigned count(SUnit *SU) const
NodeSet()=default
void setColocate(unsigned c)
unsigned getLatency()
NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
bool operator>(const NodeSet &RHS) const
Sort the node sets by importance.
int compareRecMII(NodeSet &RHS)
unsigned size() const
bool operator!=(const NodeSet &RHS) const
bool insert(SUnit *SU)
LLVM_DUMP_METHOD void dump() const
bool operator==(const NodeSet &RHS) const
bool remove_if(UnaryPredicate P)
bool empty() const
void setExceedPressure(SUnit *SU)
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:99
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Scheduling dependency.
Definition: ScheduleDAG.h:51
SUnit * getSUnit() const
Definition: ScheduleDAG.h:507
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:513
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:57
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:58
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:56
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:55
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:510
Register getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:216
bool isBarrier() const
Tests if this is an Order dependence that is marked as a barrier.
Definition: ScheduleDAG.h:174
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
A ScheduleDAG for scheduling lists of MachineInstr.
const MachineLoopInfo * MLI
Mutate the DAG as a postpass after normal DAG building.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:729
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:586
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:590
A vector that has set insertion semantics.
Definition: SetVector.h:59
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:247
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:119
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:72
void clear()
Completely clear the SetVector.
Definition: SetVector.h:284
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:279
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:109
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool hasNewSchedule()
Return true if the loop kernel has been scheduled.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci, unsigned II, TargetInstrInfo::PipelinerLoopInfo *PLI, AliasAnalysis *AA)
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
static bool classof(const ScheduleDAGInstrs *DAG)
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
Register getReg() const
Returns the register associated with the edge.
void setDistance(unsigned D)
Sets the distance value for the edge.
bool isBarrier() const
Returns true if the edge represents unknown scheduling barrier.
void setLatency(unsigned Latency)
Sets the latency for the edge.
SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc, bool IsValidationOnly)
Creates an edge corresponding to an edge represented by PredOrSucc and Dep in the original DAG.
bool isAntiDep() const
Returns true if the edge represents anti dependence.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
bool isArtificial() const
Returns true if the edge represents an artificial dependence.
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
bool isOrderDep() const
Returns true if the edge represents a dependence that is not data, anti or output dependence.
unsigned getLatency() const
Returns the latency value for the edge.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
bool isValidationOnly() const
Returns true if this edge is intended to be used only for validating the schedule.
unsigned getDistance() const
Returns the distance value for the edge.
bool isOutputDep() const
Returns true if the edge represents output dependence.
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
template class LLVM_TEMPLATE_ABI opt< bool >
Definition: CommandLine.cpp:79
template class LLVM_TEMPLATE_ABI opt< int >
Definition: CommandLine.cpp:81
std::set< NodeId > NodeSet
Definition: RDFGraph.h:551
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI void initializeMachinePipelinerPass(PassRegistry &)
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
cl::opt< bool > SwpEnableCopyToPhi
static const int DefaultProcResSize
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
Represents loop-carried dependencies.
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:123
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:258
Cache the target analysis information about the loop.
SmallVector< MachineOperand, 4 > BrCond
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo