LLVM 22.0.0git
MCSchedule.h
Go to the documentation of this file.
1//===-- llvm/MC/MCSchedule.h - Scheduling -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the classes used to describe a subtarget's machine model
10// for scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_MC_MCSCHEDULE_H
15#define LLVM_MC_MCSCHEDULE_H
16
17#include "llvm/ADT/StringRef.h"
19#include "llvm/MC/MCInstrDesc.h"
22#include <cassert>
23#include <optional>
24
25namespace llvm {
26
27template <typename T> class ArrayRef;
28struct InstrItinerary;
29class MCSubtargetInfo;
30class MCInstrInfo;
31class MCInst;
32class MCInstrDesc;
34
35/// Define a kind of processor resource that will be modeled by the scheduler.
37 const char *Name;
38 unsigned NumUnits; // Number of resource of this kind
39 unsigned SuperIdx; // Index of the resources kind that contains this kind.
40
41 // Number of resources that may be buffered.
42 //
43 // Buffered resources (BufferSize != 0) may be consumed at some indeterminate
44 // cycle after dispatch. This should be used for out-of-order cpus when
45 // instructions that use this resource can be buffered in a reservaton
46 // station.
47 //
48 // Unbuffered resources (BufferSize == 0) always consume their resource some
49 // fixed number of cycles after dispatch. If a resource is unbuffered, then
50 // the scheduler will avoid scheduling instructions with conflicting resources
51 // in the same cycle. This is for in-order cpus, or the in-order portion of
52 // an out-of-order cpus.
54
55 // If the resource has sub-units, a pointer to the first element of an array
56 // of `NumUnits` elements containing the ProcResourceIdx of the sub units.
57 // nullptr if the resource does not have sub-units.
58 const unsigned *SubUnitsIdxBegin;
59
60 bool operator==(const MCProcResourceDesc &Other) const {
61 return NumUnits == Other.NumUnits && SuperIdx == Other.SuperIdx
62 && BufferSize == Other.BufferSize;
63 }
64};
65
66/// Identify one of the processor resource kinds consumed by a
67/// particular scheduling class for the specified number of cycles.
70 /// Cycle at which the resource will be released by an instruction,
71 /// relatively to the cycle in which the instruction is issued
72 /// (assuming no stalls inbetween).
74 /// Cycle at which the resource will be aquired by an instruction,
75 /// relatively to the cycle in which the instruction is issued
76 /// (assuming no stalls inbetween).
78
80 return ProcResourceIdx == Other.ProcResourceIdx &&
81 ReleaseAtCycle == Other.ReleaseAtCycle &&
82 AcquireAtCycle == Other.AcquireAtCycle;
83 }
84};
85
86/// Specify the latency in cpu cycles for a particular scheduling class and def
87/// index. -1 indicates an invalid latency. Heuristics would typically consider
88/// an instruction with invalid latency to have infinite latency. Also identify
89/// the WriteResources of this def. When the operand expands to a sequence of
90/// writes, this ID is the last write in the sequence.
92 int16_t Cycles;
94
96 return Cycles == Other.Cycles && WriteResourceID == Other.WriteResourceID;
97 }
98};
99
100/// Specify the number of cycles allowed after instruction issue before a
101/// particular use operand reads its registers. This effectively reduces the
102/// write's latency. Here we allow negative cycles for corner cases where
103/// latency increases. This rule only applies when the entry's WriteResource
104/// matches the write's WriteResource.
105///
106/// MCReadAdvanceEntries are sorted first by operand index (UseIdx), then by
107/// WriteResourceIdx.
109 unsigned UseIdx;
112
114 return UseIdx == Other.UseIdx && WriteResourceID == Other.WriteResourceID
115 && Cycles == Other.Cycles;
116 }
117};
118
119/// Summarize the scheduling resources required for an instruction of a
120/// particular scheduling class.
121///
122/// Defined as an aggregate struct for creating tables with initializer lists.
124 static const unsigned short InvalidNumMicroOps = (1U << 13) - 1;
125 static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1;
126
127#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
129#endif
134 uint16_t WriteProcResIdx; // First index into WriteProcResTable.
136 uint16_t WriteLatencyIdx; // First index into WriteLatencyTable.
138 uint16_t ReadAdvanceIdx; // First index into ReadAdvanceTable.
140
141 bool isValid() const {
143 }
144 bool isVariant() const {
146 }
147};
148
149/// Specify the cost of a register definition in terms of number of physical
150/// register allocated at register renaming stage. For example, AMD Jaguar.
151/// natively supports 128-bit data types, and operations on 256-bit registers
152/// (i.e. YMM registers) are internally split into two COPs (complex operations)
153/// and each COP updates a physical register. Basically, on Jaguar, a YMM
154/// register write effectively consumes two physical registers. That means,
155/// the cost of a YMM write in the BtVer2 model is 2.
161
162/// A register file descriptor.
163///
164/// This struct allows to describe processor register files. In particular, it
165/// helps describing the size of the register file, as well as the cost of
166/// allocating a register file at register renaming stage.
167/// FIXME: this struct can be extended to provide information about the number
168/// of read/write ports to the register file. A value of zero for field
169/// 'NumPhysRegs' means: this register file has an unbounded number of physical
170/// registers.
172 const char *Name;
175 // Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
177 // A value of zero means: there is no limit in the number of moves that can be
178 // eliminated every cycle.
180 // Ture if this register file only knows how to optimize register moves from
181 // known zero registers.
183};
184
185/// Provide extra details about the machine processor.
186///
187/// This is a collection of "optional" processor information that is not
188/// normally used by the LLVM machine schedulers, but that can be consumed by
189/// external tools like llvm-mca to improve the quality of the peformance
190/// analysis.
192 // Actual size of the reorder buffer in hardware.
194 // Number of instructions retired per cycle.
200 unsigned LoadQueueID;
201 unsigned StoreQueueID;
202};
203
204/// Machine model for scheduling, bundling, and heuristics.
205///
206/// The machine model directly provides basic information about the
207/// microarchitecture to the scheduler in the form of properties. It also
208/// optionally refers to scheduler resource tables and itinerary
209/// tables. Scheduler resource tables model the latency and cost for each
210/// instruction type. Itinerary tables are an independent mechanism that
211/// provides a detailed reservation table describing each cycle of instruction
212/// execution. Subtargets may define any or all of the above categories of data
213/// depending on the type of CPU and selected scheduler.
214///
215/// The machine independent properties defined here are used by the scheduler as
216/// an abstract machine model. A real micro-architecture has a number of
217/// buffers, queues, and stages. Declaring that a given machine-independent
218/// abstract property corresponds to a specific physical property across all
219/// subtargets can't be done. Nonetheless, the abstract model is
220/// useful. Futhermore, subtargets typically extend this model with processor
221/// specific resources to model any hardware features that can be exploited by
222/// scheduling heuristics and aren't sufficiently represented in the abstract.
223///
224/// The abstract pipeline is built around the notion of an "issue point". This
225/// is merely a reference point for counting machine cycles. The physical
226/// machine will have pipeline stages that delay execution. The scheduler does
227/// not model those delays because they are irrelevant as long as they are
228/// consistent. Inaccuracies arise when instructions have different execution
229/// delays relative to each other, in addition to their intrinsic latency. Those
230/// special cases can be handled by TableGen constructs such as, ReadAdvance,
231/// which reduces latency when reading data, and ReleaseAtCycles, which consumes
232/// a processor resource when writing data for a number of abstract
233/// cycles.
234///
235/// TODO: One tool currently missing is the ability to add a delay to
236/// ReleaseAtCycles. That would be easy to add and would likely cover all cases
237/// currently handled by the legacy itinerary tables.
238///
239/// A note on out-of-order execution and, more generally, instruction
240/// buffers. Part of the CPU pipeline is always in-order. The issue point, which
241/// is the point of reference for counting cycles, only makes sense as an
242/// in-order part of the pipeline. Other parts of the pipeline are sometimes
243/// falling behind and sometimes catching up. It's only interesting to model
244/// those other, decoupled parts of the pipeline if they may be predictably
245/// resource constrained in a way that the scheduler can exploit.
246///
247/// The LLVM machine model distinguishes between in-order constraints and
248/// out-of-order constraints so that the target's scheduling strategy can apply
249/// appropriate heuristics. For a well-balanced CPU pipeline, out-of-order
250/// resources would not typically be treated as a hard scheduling
251/// constraint. For example, in the GenericScheduler, a delay caused by limited
252/// out-of-order resources is not directly reflected in the number of cycles
253/// that the scheduler sees between issuing an instruction and its dependent
254/// instructions. In other words, out-of-order resources don't directly increase
255/// the latency between pairs of instructions. However, they can still be used
256/// to detect potential bottlenecks across a sequence of instructions and bias
257/// the scheduling heuristics appropriately.
259 // IssueWidth is the maximum number of instructions that may be scheduled in
260 // the same per-cycle group. This is meant to be a hard in-order constraint
261 // (a.k.a. "hazard"). In the GenericScheduler strategy, no more than
262 // IssueWidth micro-ops can ever be scheduled in a particular cycle.
263 //
264 // In practice, IssueWidth is useful to model any bottleneck between the
265 // decoder (after micro-op expansion) and the out-of-order reservation
266 // stations or the decoder bandwidth itself. If the total number of
267 // reservation stations is also a bottleneck, or if any other pipeline stage
268 // has a bandwidth limitation, then that can be naturally modeled by adding an
269 // out-of-order processor resource.
270 unsigned IssueWidth;
271 static const unsigned DefaultIssueWidth = 1;
272
273 // MicroOpBufferSize is the number of micro-ops that the processor may buffer
274 // for out-of-order execution.
275 //
276 // "0" means operations that are not ready in this cycle are not considered
277 // for scheduling (they go in the pending queue). Latency is paramount. This
278 // may be more efficient if many instructions are pending in a schedule.
279 //
280 // "1" means all instructions are considered for scheduling regardless of
281 // whether they are ready in this cycle. Latency still causes issue stalls,
282 // but we balance those stalls against other heuristics.
283 //
284 // "> 1" means the processor is out-of-order. This is a machine independent
285 // estimate of highly machine specific characteristics such as the register
286 // renaming pool and reorder buffer.
288 static const unsigned DefaultMicroOpBufferSize = 0;
289
290 // LoopMicroOpBufferSize is the number of micro-ops that the processor may
291 // buffer for optimized loop execution. More generally, this represents the
292 // optimal number of micro-ops in a loop body. A loop may be partially
293 // unrolled to bring the count of micro-ops in the loop body closer to this
294 // number.
296 static const unsigned DefaultLoopMicroOpBufferSize = 0;
297
298 // LoadLatency is the expected latency of load instructions.
299 unsigned LoadLatency;
300 static const unsigned DefaultLoadLatency = 4;
301
302 // HighLatency is the expected latency of "very high latency" operations.
303 // See TargetInstrInfo::isHighLatencyDef().
304 // By default, this is set to an arbitrarily high number of cycles
305 // likely to have some impact on scheduling heuristics.
306 unsigned HighLatency;
307 static const unsigned DefaultHighLatency = 10;
308
309 // MispredictPenalty is the typical number of extra cycles the processor
310 // takes to recover from a branch misprediction.
312 static const unsigned DefaultMispredictPenalty = 10;
313
314 bool PostRAScheduler; // default value is false
315
317
318 // Tells the MachineScheduler whether or not to track resource usage
319 // using intervals via ResourceSegments (see
320 // llvm/include/llvm/CodeGen/MachineScheduler.h).
322
323 unsigned ProcID;
329 // Instruction itinerary tables used by InstrItineraryData.
330 friend class InstrItineraryData;
332
334
336
337 unsigned getProcessorID() const { return ProcID; }
338
339 /// Does this machine model include instruction-level scheduling.
340 bool hasInstrSchedModel() const { return SchedClassTable; }
341
344 "No extra information available for this model");
345 return *ExtraProcessorInfo;
346 }
347
348 /// Return true if this machine model data for all instructions with a
349 /// scheduling class (itinerary class or SchedRW list).
350 bool isComplete() const { return CompleteModel; }
351
352 /// Return true if machine supports out of order execution.
353 bool isOutOfOrder() const { return MicroOpBufferSize > 1; }
354
355 unsigned getNumProcResourceKinds() const {
357 }
358
359 const MCProcResourceDesc *getProcResource(unsigned ProcResourceIdx) const {
360 assert(hasInstrSchedModel() && "No scheduling machine model");
361
362 assert(ProcResourceIdx < NumProcResourceKinds && "bad proc resource idx");
363 return &ProcResourceTable[ProcResourceIdx];
364 }
365
366 const MCSchedClassDesc *getSchedClassDesc(unsigned SchedClassIdx) const {
367 assert(hasInstrSchedModel() && "No scheduling machine model");
368
369 assert(SchedClassIdx < NumSchedClasses && "bad scheduling class idx");
370 return &SchedClassTable[SchedClassIdx];
371 }
372
373 StringRef getSchedClassName(unsigned SchedClassIdx) const {
374#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
375 return (*SchedClassNames)[SchedClassTable[SchedClassIdx].NameOff];
376#else
377 return "<unknown>";
378#endif
379 }
380
381 /// Returns the latency value for the scheduling class.
382 LLVM_ABI static int computeInstrLatency(const MCSubtargetInfo &STI,
383 const MCSchedClassDesc &SCDesc);
384
386 unsigned SClass) const;
387
389 const MCInstrInfo &MCII,
390 const MCInst &Inst) const;
391
392 template <typename MCSubtargetInfo, typename MCInstrInfo,
393 typename InstrItineraryData, typename MCInstOrMachineInstr>
395 const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
396 const MCInstOrMachineInstr &Inst,
398 ResolveVariantSchedClass =
399 [](const MCSchedClassDesc *SCDesc) { return SCDesc; }) const;
400
401 // Returns the reciprocal throughput information from a MCSchedClassDesc.
402 LLVM_ABI static double
403 getReciprocalThroughput(const MCSubtargetInfo &STI,
404 const MCSchedClassDesc &SCDesc);
405
406 LLVM_ABI static double getReciprocalThroughput(unsigned SchedClass,
407 const InstrItineraryData &IID);
408
409 LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI,
410 const MCInstrInfo &MCII,
411 const MCInst &Inst) const;
412
413 /// Returns the maximum forwarding delay for register reads dependent on
414 /// writes of scheduling class WriteResourceIdx.
415 LLVM_ABI static unsigned
417 unsigned WriteResourceIdx = 0);
418
419 /// Returns the bypass delay cycle for the maximum latency write cycle
420 LLVM_ABI static unsigned getBypassDelayCycles(const MCSubtargetInfo &STI,
421 const MCSchedClassDesc &SCDesc);
422
423 /// Returns the default initialized model.
425};
426
427// The first three are only template'd arguments so we can get away with leaving
428// them as incomplete types below. The third is a template over
429// MCInst/MachineInstr so as to avoid a layering violation here that would make
430// the MC layer depend on CodeGen.
431template <typename MCSubtargetInfo, typename MCInstrInfo,
432 typename InstrItineraryData, typename MCInstOrMachineInstr>
434 const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
435 const MCInstOrMachineInstr &Inst,
437 ResolveVariantSchedClass) const {
438 static const int NoInformationAvailable = -1;
439 // Check if we have a scheduling model for instructions.
440 if (!hasInstrSchedModel()) {
441 // Try to fall back to the itinerary model if the scheduling model doesn't
442 // have a scheduling table. Note the default does not have a table.
443
444 llvm::StringRef CPU = STI.getCPU();
445
446 // Check if we have a CPU to get the itinerary information.
447 if (CPU.empty())
448 return NoInformationAvailable;
449
450 // Get itinerary information.
452 // Get the scheduling class of the requested instruction.
453 const MCInstrDesc &Desc = MCII.get(Inst.getOpcode());
454 unsigned SCClass = Desc.getSchedClass();
455
456 unsigned Latency = 0;
457
458 for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx)
459 if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
460 Latency = std::max(Latency, *OperCycle);
461
462 return int(Latency);
463 }
464
465 unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
466 const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
467 SCDesc = ResolveVariantSchedClass(SCDesc);
468
469 if (!SCDesc || !SCDesc->isValid())
470 return NoInformationAvailable;
471
472 return MCSchedModel::computeInstrLatency(STI, *SCDesc);
473}
474
475} // namespace llvm
476
477#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ABI
Definition Compiler.h:213
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
Generic base class for all target subtargets.
StringRef getCPU() const
InstrItineraryData getInstrItineraryForCPU(StringRef CPU) const
Get scheduling itinerary of a CPU.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A table of densely packed, null-terminated strings indexed by offset.
Definition StringTable.h:33
An efficient, type-erasing, non-owning reference to a callable.
This is an optimization pass for GlobalISel generic memory operations.
Op::Description Desc
@ Other
Any other memory.
Definition ModRef.h:68
ArrayRef(const T &OneElt) -> ArrayRef< T >
An itinerary represents the scheduling information for an instruction.
Provide extra details about the machine processor.
Definition MCSchedule.h:191
const MCRegisterFileDesc * RegisterFiles
Definition MCSchedule.h:196
const MCRegisterCostEntry * RegisterCostTable
Definition MCSchedule.h:198
Define a kind of processor resource that will be modeled by the scheduler.
Definition MCSchedule.h:36
bool operator==(const MCProcResourceDesc &Other) const
Definition MCSchedule.h:60
const unsigned * SubUnitsIdxBegin
Definition MCSchedule.h:58
Specify the number of cycles allowed after instruction issue before a particular use operand reads it...
Definition MCSchedule.h:108
bool operator==(const MCReadAdvanceEntry &Other) const
Definition MCSchedule.h:113
Specify the cost of a register definition in terms of number of physical register allocated at regist...
Definition MCSchedule.h:156
A register file descriptor.
Definition MCSchedule.h:171
uint16_t MaxMovesEliminatedPerCycle
Definition MCSchedule.h:179
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
bool isVariant() const
Definition MCSchedule.h:144
static const unsigned short InvalidNumMicroOps
Definition MCSchedule.h:124
uint16_t NumWriteLatencyEntries
Definition MCSchedule.h:137
uint16_t NumReadAdvanceEntries
Definition MCSchedule.h:139
uint16_t NumWriteProcResEntries
Definition MCSchedule.h:135
static const unsigned short VariantNumMicroOps
Definition MCSchedule.h:125
Machine model for scheduling, bundling, and heuristics.
Definition MCSchedule.h:258
static LLVM_ABI const MCSchedModel Default
Returns the default initialized model.
Definition MCSchedule.h:424
bool isOutOfOrder() const
Return true if machine supports out of order execution.
Definition MCSchedule.h:353
bool hasExtraProcessorInfo() const
Definition MCSchedule.h:335
static LLVM_ABI unsigned getForwardingDelayCycles(ArrayRef< MCReadAdvanceEntry > Entries, unsigned WriteResourceIdx=0)
Returns the maximum forwarding delay for register reads dependent on writes of scheduling class Write...
static const unsigned DefaultLoopMicroOpBufferSize
Definition MCSchedule.h:296
const InstrItinerary * InstrItineraries
Definition MCSchedule.h:331
static const unsigned DefaultHighLatency
Definition MCSchedule.h:307
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition MCSchedule.h:366
unsigned getProcessorID() const
Definition MCSchedule.h:337
const MCExtraProcessorInfo & getExtraProcessorInfo() const
Definition MCSchedule.h:342
unsigned getNumProcResourceKinds() const
Definition MCSchedule.h:355
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition MCSchedule.h:340
static const unsigned DefaultLoadLatency
Definition MCSchedule.h:300
unsigned LoopMicroOpBufferSize
Definition MCSchedule.h:295
static LLVM_ABI int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
static const unsigned DefaultMicroOpBufferSize
Definition MCSchedule.h:288
friend class InstrItineraryData
Definition MCSchedule.h:330
const StringTable * SchedClassNames
Definition MCSchedule.h:328
const MCSchedClassDesc * SchedClassTable
Definition MCSchedule.h:325
const MCProcResourceDesc * ProcResourceTable
Definition MCSchedule.h:324
static const unsigned DefaultMispredictPenalty
Definition MCSchedule.h:312
unsigned MicroOpBufferSize
Definition MCSchedule.h:287
unsigned NumSchedClasses
Definition MCSchedule.h:327
const MCExtraProcessorInfo * ExtraProcessorInfo
Definition MCSchedule.h:333
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition MCSchedule.h:359
static LLVM_ABI unsigned getBypassDelayCycles(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the bypass delay cycle for the maximum latency write cycle.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
StringRef getSchedClassName(unsigned SchedClassIdx) const
Definition MCSchedule.h:373
static const unsigned DefaultIssueWidth
Definition MCSchedule.h:271
unsigned NumProcResourceKinds
Definition MCSchedule.h:326
bool isComplete() const
Return true if this machine model data for all instructions with a scheduling class (itinerary class ...
Definition MCSchedule.h:350
unsigned MispredictPenalty
Definition MCSchedule.h:311
Specify the latency in cpu cycles for a particular scheduling class and def index.
Definition MCSchedule.h:91
bool operator==(const MCWriteLatencyEntry &Other) const
Definition MCSchedule.h:95
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:68
bool operator==(const MCWriteProcResEntry &Other) const
Definition MCSchedule.h:79
uint16_t AcquireAtCycle
Cycle at which the resource will be aquired by an instruction, relatively to the cycle in which the i...
Definition MCSchedule.h:77
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
Definition MCSchedule.h:73