LLVM 22.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
21
22namespace llvm {
23
24class SIMachineFunctionInfo;
25class SIRegisterInfo;
26class GCNSubtarget;
27class GCNSchedStage;
28
29enum class GCNSchedStageID : unsigned {
36};
37
38#ifndef NDEBUG
39raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
40#endif
41
42/// This is a minimal scheduler strategy. The main difference between this
43/// and the GenericScheduler is that GCNSchedStrategy uses different
44/// heuristics to determine excess/critical pressure sets.
46protected:
47 SUnit *pickNodeBidirectional(bool &IsTopNode);
48
49 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
50 const RegPressureTracker &RPTracker,
51 SchedCandidate &Cand, bool IsBottomUp);
52
53 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
54 const RegPressureTracker &RPTracker,
55 const SIRegisterInfo *SRI, unsigned SGPRPressure,
56 unsigned VGPRPressure, bool IsBottomUp);
57
58 std::vector<unsigned> Pressure;
59
60 std::vector<unsigned> MaxPressure;
61
63
65
67
69
70 // Scheduling stages for this strategy.
72
73 // Pointer to the current SchedStageID.
75
76 // GCN RP Tracker for top-down scheduling
78
79 // GCN RP Tracker for botttom-up scheduling
81
82public:
83 // schedule() have seen register pressure over the critical limits and had to
84 // track register pressure for actual scheduling heuristics.
86
87 // Schedule known to have excess register pressure. Be more conservative in
88 // increasing ILP and preserving VGPRs.
89 bool KnownExcessRP = false;
90
91 // An error margin is necessary because of poor performance of the generic RP
92 // tracker and can be adjusted up for tuning heuristics to try and more
93 // aggressively reduce register pressure.
94 unsigned ErrorMargin = 3;
95
96 // Bias for SGPR limits under a high register pressure.
97 const unsigned HighRPSGPRBias = 7;
98
99 // Bias for VGPR limits under a high register pressure.
100 const unsigned HighRPVGPRBias = 7;
101
103
105
106 unsigned SGPRLimitBias = 0;
107
108 unsigned VGPRLimitBias = 0;
109
111
112 SUnit *pickNode(bool &IsTopNode) override;
113
114 void schedNode(SUnit *SU, bool IsTopNode) override;
115
116 void initialize(ScheduleDAGMI *DAG) override;
117
118 unsigned getTargetOccupancy() { return TargetOccupancy; }
119
120 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
121
123
124 // Advances stage. Returns true if there are remaining stages.
125 bool advanceStage();
126
127 bool hasNextStage() const;
128
130
132
134};
135
136/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
137/// maximum number of waves per simd).
139public:
141 bool IsLegacyScheduler = false);
142};
143
144/// The goal of this scheduling strategy is to maximize ILP for a single wave
145/// (i.e. latency hiding).
147protected:
148 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
149 SchedBoundary *Zone) const override;
150
151public:
153};
154
155/// The goal of this scheduling strategy is to maximize memory clause for a
156/// single wave.
158protected:
159 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
160 SchedBoundary *Zone) const override;
161
162public:
164};
165
167 unsigned ScheduleLength;
168 unsigned BubbleCycles;
169
170public:
172 ScheduleMetrics(unsigned L, unsigned BC)
173 : ScheduleLength(L), BubbleCycles(BC) {}
174 unsigned getLength() const { return ScheduleLength; }
175 unsigned getBubbles() const { return BubbleCycles; }
176 unsigned getMetric() const {
177 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
178 // Metric is zero if the amount of bubbles is less than 1% which is too
179 // small. So, return 1.
180 return Metric ? Metric : 1;
181 }
182 static const unsigned ScaleFactor;
183};
184
186 dbgs() << "\n Schedule Metric (scaled by "
188 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
189 << Sm.getLength() << " ]\n";
190 return OS;
191}
192
193class GCNScheduleDAGMILive;
196 // The live in/out pressure as indexed by the first or last MI in the region
197 // before scheduling.
199 // The mapping of RegionIDx to key instruction
200 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
201 // Whether we are calculating LiveOuts or LiveIns
202 bool IsLiveOut;
203
204public:
207 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
208 // Build the Instr->LiveReg and RegionIdx->Instr maps
209 void buildLiveRegMap();
210
211 // Retrieve the LiveReg for a given RegionIdx
213 assert(IdxToInstruction.contains(RegionIdx));
214 MachineInstr *Key = IdxToInstruction[RegionIdx];
215 return RegionLiveRegMap[Key];
216 }
217};
218
219/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
220/// boundary is inclusive, the upper boundary is exclusive.
222 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
223
225 friend class GCNSchedStage;
229 friend class PreRARematStage;
231 friend class RegionPressureMap;
232
233 const GCNSubtarget &ST;
234
236
237 // Occupancy target at the beginning of function scheduling cycle.
238 unsigned StartingOccupancy;
239
240 // Minimal real occupancy recorder for the function.
241 unsigned MinOccupancy;
242
243 // Vector of regions recorder for later rescheduling
245
246 // Record regions with high register pressure.
247 BitVector RegionsWithHighRP;
248
249 // Record regions with excess register pressure over the physical register
250 // limit. Register pressure in these regions usually will result in spilling.
251 BitVector RegionsWithExcessRP;
252
253 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
254 BitVector RegionsWithIGLPInstrs;
255
256 // Region live-in cache.
258
259 // Region pressure cache.
261
262 // Temporary basic block live-in cache.
264
265 // The map of the initial first region instruction to region live in registers
267
268 // Calculate the map of the initial first region instruction to region live in
269 // registers
271
272 // Calculate the map of the initial last region instruction to region live out
273 // registers
275 getRegionLiveOutMap() const;
276
277 // The live out registers per region. These are internally stored as a map of
278 // the initial last region instruction to region live out registers, but can
279 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
280 RegionPressureMap RegionLiveOuts;
281
282 // Return current region pressure.
283 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
284
285 // Compute and cache live-ins and pressure for all regions in block.
286 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
287
288 /// If necessary, updates a region's boundaries following insertion ( \p NewMI
289 /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
290 /// For an MI removal, this must be called before the MI is actually erased
291 /// from its parent MBB.
292 void updateRegionBoundaries(RegionBoundaries &RegionBounds,
294 MachineInstr *NewMI);
295
296 void runSchedStages();
297
298 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
299
300public:
302 std::unique_ptr<MachineSchedStrategy> S);
303
304 void schedule() override;
305
306 void finalizeSchedule() override;
307};
308
309// GCNSchedStrategy applies multiple scheduling stages to a function.
311protected:
313
315
317
319
321
323
324 // The current block being scheduled.
326
327 // Current region index.
328 unsigned RegionIdx = 0;
329
330 // Record the original order of instructions before scheduling.
331 std::vector<MachineInstr *> Unsched;
332
333 // RP before scheduling the current region.
335
336 // RP after scheduling the current region.
338
339 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
340
342
343public:
344 // Initialize state for a scheduling stage. Returns false if the current stage
345 // should be skipped.
346 virtual bool initGCNSchedStage();
347
348 // Finalize state after finishing a scheduling pass on the function.
349 virtual void finalizeGCNSchedStage();
350
351 // Setup for scheduling a region. Returns false if the current region should
352 // be skipped.
353 virtual bool initGCNRegion();
354
355 // Track whether a new region is also a new MBB.
356 void setupNewBlock();
357
358 // Finalize state after scheudling a region.
359 void finalizeGCNRegion();
360
361 // Check result of scheduling.
362 void checkScheduling();
363
364 // computes the given schedule virtual execution time in clocks
365 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
367 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
368 DenseMap<unsigned, unsigned> &ReadyCycles,
369 const TargetSchedModel &SM);
370
371 // Returns true if scheduling should be reverted.
372 virtual bool shouldRevertScheduling(unsigned WavesAfter);
373
374 // Returns true if current region has known excess pressure.
375 bool isRegionWithExcessRP() const {
376 return DAG.RegionsWithExcessRP[RegionIdx];
377 }
378
379 // The region number this stage is currently working on
380 unsigned getRegionIdx() { return RegionIdx; }
381
382 // Returns true if the new schedule may result in more spilling.
383 bool mayCauseSpilling(unsigned WavesAfter);
384
385 // Attempt to revert scheduling for this region.
386 void revertScheduling();
387
389
390 virtual ~GCNSchedStage() = default;
391};
392
394public:
395 bool shouldRevertScheduling(unsigned WavesAfter) override;
396
399};
400
402private:
403 // Save the initial occupancy before starting this stage.
404 unsigned InitialOccupancy;
405
406public:
407 bool initGCNSchedStage() override;
408
409 void finalizeGCNSchedStage() override;
410
411 bool initGCNRegion() override;
412
413 bool shouldRevertScheduling(unsigned WavesAfter) override;
414
417};
418
419// Retry function scheduling if we found resulting occupancy and it is
420// lower than used for other scheduling passes. This will give more freedom
421// to schedule low register pressure blocks.
423public:
424 bool initGCNSchedStage() override;
425
426 bool initGCNRegion() override;
427
428 bool shouldRevertScheduling(unsigned WavesAfter) override;
429
432};
433
434/// Attempts to reduce function spilling or, if there is no spilling, to
435/// increase function occupancy by one with respect to ArchVGPR usage by sinking
436/// trivially rematerializable instructions to their use. When the stage
437/// estimates reducing spilling or increasing occupancy is possible, as few
438/// instructions as possible are rematerialized to reduce potential negative
439/// effects on function latency.
441private:
442 /// Useful information about a rematerializable instruction.
443 struct RematInstruction {
444 /// Single use of the rematerializable instruction's defined register,
445 /// located in a different block.
447 /// Rematerialized version of \p DefMI, set in
448 /// PreRARematStage::rematerialize. Used for reverting rematerializations.
449 MachineInstr *RematMI;
450 /// Set of regions in which the rematerializable instruction's defined
451 /// register is a live-in.
452 SmallDenseSet<unsigned, 4> LiveInRegions;
453
454 RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
455 };
456
457 /// Maps all MIs to their parent region. MI terminators are considered to be
458 /// outside the region they delimitate, and as such are not stored in the map.
460 /// Parent MBB to each region, in region order.
462 /// Collects instructions to rematerialize.
464 /// Collects regions whose live-ins or register pressure will change due to
465 /// rematerializations.
467 /// In case we need to rollback rematerializations, save lane masks for all
468 /// rematerialized registers in all regions in which they are live-ins.
470 /// After successful stage initialization, indicates which regions should be
471 /// rescheduled.
472 BitVector RescheduleRegions;
473 /// The target occupancy the stage is trying to achieve. Empty when the
474 /// objective is spilling reduction.
475 std::optional<unsigned> TargetOcc;
476 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
477 /// Smaller than or equal to the target occupancy.
478 unsigned AchievedOcc;
479
480 /// Returns whether remat can reduce spilling or increase function occupancy
481 /// by 1 through rematerialization. If it can do one, collects instructions in
482 /// PreRARematStage::Rematerializations and sets the target occupancy in
483 /// PreRARematStage::TargetOccupancy.
484 bool canIncreaseOccupancyOrReduceSpill();
485
486 /// Whether the MI is trivially rematerializable and does not have any virtual
487 /// register use.
488 bool isTriviallyReMaterializable(const MachineInstr &MI);
489
490 /// Rematerializes all instructions in PreRARematStage::Rematerializations
491 /// and stores the achieved occupancy after remat in
492 /// PreRARematStage::AchievedOcc.
493 void rematerialize();
494
495 /// If remat alone did not increase occupancy to the target one, rollbacks all
496 /// rematerializations and resets live-ins/RP in all regions impacted by the
497 /// stage to their pre-stage values.
498 void finalizeGCNSchedStage() override;
499
500 /// \p Returns true if all the uses in \p InstToRemat defined at \p
501 /// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
502 /// reg uses.
503 bool allUsesAvailableAt(const MachineInstr *InstToRemat,
504 SlotIndex OriginalIdx, SlotIndex RematIdx) const;
505
506public:
507 bool initGCNSchedStage() override;
508
509 bool initGCNRegion() override;
510
511 bool shouldRevertScheduling(unsigned WavesAfter) override;
512
514 : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {}
515};
516
518public:
519 bool shouldRevertScheduling(unsigned WavesAfter) override;
520
523};
524
526public:
527 bool shouldRevertScheduling(unsigned WavesAfter) override;
528
532};
533
535private:
536 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
537
538 bool HasIGLPInstrs = false;
539
540public:
541 void schedule() override;
542
543 void finalizeSchedule() override;
544
546 std::unique_ptr<MachineSchedStrategy> S,
547 bool RemoveKillFlags);
548};
549
550} // End namespace llvm
551
552#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
This file implements a map that provides insertion order iteration.
raw_pwrite_stream & OS
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:168
The goal of this scheduling strategy is to maximize ILP for a single wave (i.e.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
The goal of this scheduling strategy is to maximize memory clause for a single wave.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
virtual bool initGCNRegion()
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
SmallVector< GCNSchedStageID, 4 > SchedStages
SUnit * pickNodeBidirectional(bool &IsTopNode)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
std::vector< unsigned > MaxPressure
GCNSchedStageID getCurrentStage()
MachineFunction * MF
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
const unsigned HighRPVGPRBias
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
ScheduleDAGMILive * DAG
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Representation of each machine instruction.
Definition: MachineInstr.h:72
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Attempts to reduce function spilling or, if there is no spilling, to increase function occupancy by o...
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:66
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:283
typename SuperClass::iterator iterator
Definition: SmallVector.h:578
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...