LLVM 22.0.0git
AMDGPUInsertDelayAlu.cpp
Go to the documentation of this file.
1//===- AMDGPUInsertDelayAlu.cpp - Insert s_delay_alu instructions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_delay_alu instructions to avoid stalls on GFX11+.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "GCNSubtarget.h"
17#include "SIInstrInfo.h"
18#include "llvm/ADT/SetVector.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "amdgpu-insert-delay-alu"
23
24namespace {
25
26class AMDGPUInsertDelayAlu {
27public:
28 const GCNSubtarget *ST;
29 const SIInstrInfo *SII;
31
32 const TargetSchedModel *SchedModel;
33
34 // Return true if MI waits for all outstanding VALU instructions to complete.
35 static bool instructionWaitsForVALU(const MachineInstr &MI) {
36 // These instruction types wait for VA_VDST==0 before issuing.
37 const uint64_t VA_VDST_0 = SIInstrFlags::DS | SIInstrFlags::EXP |
40 if (MI.getDesc().TSFlags & VA_VDST_0)
41 return true;
42 if (MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B32 ||
43 MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)
44 return true;
45 if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
46 AMDGPU::DepCtr::decodeFieldVaVdst(MI.getOperand(0).getImm()) == 0)
47 return true;
48 return false;
49 }
50
51 static bool instructionWaitsForSGPRWrites(const MachineInstr &MI) {
52 // These instruction types wait for VA_SDST==0 before issuing.
53 uint64_t MIFlags = MI.getDesc().TSFlags;
54 if (MIFlags & SIInstrFlags::SMRD)
55 return true;
56
57 if (MIFlags & SIInstrFlags::SALU) {
58 for (auto &Op : MI.operands()) {
59 if (Op.isReg())
60 return true;
61 }
62 }
63 return false;
64 }
65
66 // Types of delay that can be encoded in an s_delay_alu instruction.
67 enum DelayType { VALU, TRANS, SALU, OTHER };
68
69 // Get the delay type for a MachineInstr.
70 DelayType getDelayType(const MachineInstr &MI) {
72 return TRANS;
73 // WMMA XDL ops are treated the same as TRANS.
74 if (AMDGPU::isGFX1250(*ST) && SII->isXDLWMMA(MI))
75 return TRANS;
77 return VALU;
79 return SALU;
80 return OTHER;
81 }
82
83 // Information about the last instruction(s) that wrote to a particular
84 // regunit. In straight-line code there will only be one such instruction, but
85 // when control flow converges we merge the delay information from each path
86 // to represent the union of the worst-case delays of each type.
87 struct DelayInfo {
88 // One larger than the maximum number of (non-TRANS) VALU instructions we
89 // can encode in an s_delay_alu instruction.
90 static constexpr unsigned VALU_MAX = 5;
91
92 // One larger than the maximum number of TRANS instructions we can encode in
93 // an s_delay_alu instruction.
94 static constexpr unsigned TRANS_MAX = 4;
95
96 // One larger than the maximum number of SALU cycles we can encode in an
97 // s_delay_alu instruction.
98 static constexpr unsigned SALU_CYCLES_MAX = 4;
99
100 // If it was written by a (non-TRANS) VALU, remember how many clock cycles
101 // are left until it completes, and how many other (non-TRANS) VALU we have
102 // seen since it was issued.
103 uint8_t VALUCycles = 0;
104 uint8_t VALUNum = VALU_MAX;
105
106 // If it was written by a TRANS, remember how many clock cycles are left
107 // until it completes, and how many other TRANS we have seen since it was
108 // issued.
109 uint8_t TRANSCycles = 0;
110 uint8_t TRANSNum = TRANS_MAX;
111 // Also remember how many other (non-TRANS) VALU we have seen since it was
112 // issued. When an instruction depends on both a prior TRANS and a prior
113 // non-TRANS VALU, this is used to decide whether to encode a wait for just
114 // one or both of them.
115 uint8_t TRANSNumVALU = VALU_MAX;
116
117 // If it was written by an SALU, remember how many clock cycles are left
118 // until it completes.
119 uint8_t SALUCycles = 0;
120
121 DelayInfo() = default;
122
123 DelayInfo(DelayType Type, unsigned Cycles) {
124 switch (Type) {
125 default:
126 llvm_unreachable("unexpected type");
127 case VALU:
128 VALUCycles = Cycles;
129 VALUNum = 0;
130 break;
131 case TRANS:
132 TRANSCycles = Cycles;
133 TRANSNum = 0;
134 TRANSNumVALU = 0;
135 break;
136 case SALU:
137 // Guard against pseudo-instructions like SI_CALL which are marked as
138 // SALU but with a very high latency.
139 SALUCycles = std::min(Cycles, SALU_CYCLES_MAX);
140 break;
141 }
142 }
143
144 bool operator==(const DelayInfo &RHS) const {
145 return VALUCycles == RHS.VALUCycles && VALUNum == RHS.VALUNum &&
146 TRANSCycles == RHS.TRANSCycles && TRANSNum == RHS.TRANSNum &&
147 TRANSNumVALU == RHS.TRANSNumVALU && SALUCycles == RHS.SALUCycles;
148 }
149
150 bool operator!=(const DelayInfo &RHS) const { return !(*this == RHS); }
151
152 // Merge another DelayInfo into this one, to represent the union of the
153 // worst-case delays of each type.
154 void merge(const DelayInfo &RHS) {
155 VALUCycles = std::max(VALUCycles, RHS.VALUCycles);
156 VALUNum = std::min(VALUNum, RHS.VALUNum);
157 TRANSCycles = std::max(TRANSCycles, RHS.TRANSCycles);
158 TRANSNum = std::min(TRANSNum, RHS.TRANSNum);
159 TRANSNumVALU = std::min(TRANSNumVALU, RHS.TRANSNumVALU);
160 SALUCycles = std::max(SALUCycles, RHS.SALUCycles);
161 }
162
163 // Update this DelayInfo after issuing an instruction of the specified type.
164 // Cycles is the number of cycles it takes to issue the instruction. Return
165 // true if there is no longer any useful delay info.
166 bool advance(DelayType Type, unsigned Cycles) {
167 bool Erase = true;
168
169 VALUNum += (Type == VALU);
170 if (VALUNum >= VALU_MAX || VALUCycles <= Cycles) {
171 // Forget about the VALU instruction. It was too far back or has
172 // definitely completed by now.
173 VALUNum = VALU_MAX;
174 VALUCycles = 0;
175 } else {
176 VALUCycles -= Cycles;
177 Erase = false;
178 }
179
180 TRANSNum += (Type == TRANS);
181 TRANSNumVALU += (Type == VALU);
182 if (TRANSNum >= TRANS_MAX || TRANSCycles <= Cycles) {
183 // Forget about any TRANS instruction. It was too far back or has
184 // definitely completed by now.
185 TRANSNum = TRANS_MAX;
186 TRANSNumVALU = VALU_MAX;
187 TRANSCycles = 0;
188 } else {
189 TRANSCycles -= Cycles;
190 Erase = false;
191 }
192
193 if (SALUCycles <= Cycles) {
194 // Forget about any SALU instruction. It has definitely completed by
195 // now.
196 SALUCycles = 0;
197 } else {
198 SALUCycles -= Cycles;
199 Erase = false;
200 }
201
202 return Erase;
203 }
204
205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
206 void dump() const {
207 if (VALUCycles)
208 dbgs() << " VALUCycles=" << (int)VALUCycles;
209 if (VALUNum < VALU_MAX)
210 dbgs() << " VALUNum=" << (int)VALUNum;
211 if (TRANSCycles)
212 dbgs() << " TRANSCycles=" << (int)TRANSCycles;
213 if (TRANSNum < TRANS_MAX)
214 dbgs() << " TRANSNum=" << (int)TRANSNum;
215 if (TRANSNumVALU < VALU_MAX)
216 dbgs() << " TRANSNumVALU=" << (int)TRANSNumVALU;
217 if (SALUCycles)
218 dbgs() << " SALUCycles=" << (int)SALUCycles;
219 }
220#endif
221 };
222
223 // A map from regunits to the delay info for that regunit.
224 struct DelayState : DenseMap<unsigned, DelayInfo> {
225 // Merge another DelayState into this one by merging the delay info for each
226 // regunit.
227 void merge(const DelayState &RHS) {
228 for (const auto &KV : RHS) {
229 iterator It;
230 bool Inserted;
231 std::tie(It, Inserted) = insert(KV);
232 if (!Inserted)
233 It->second.merge(KV.second);
234 }
235 }
236
237 // Advance the delay info for each regunit, erasing any that are no longer
238 // useful.
239 void advance(DelayType Type, unsigned Cycles) {
240 iterator Next;
241 for (auto I = begin(), E = end(); I != E; I = Next) {
242 Next = std::next(I);
243 if (I->second.advance(Type, Cycles))
244 erase(I);
245 }
246 }
247
248 void advanceByVALUNum(unsigned VALUNum) {
249 iterator Next;
250 for (auto I = begin(), E = end(); I != E; I = Next) {
251 Next = std::next(I);
252 if (I->second.VALUNum >= VALUNum && I->second.VALUCycles > 0) {
253 erase(I);
254 }
255 }
256 }
257
258#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
259 void dump(const TargetRegisterInfo *TRI) const {
260 if (empty()) {
261 dbgs() << " empty\n";
262 return;
263 }
264
265 // Dump DelayInfo for each RegUnit in numerical order.
267 Order.reserve(size());
268 for (const_iterator I = begin(), E = end(); I != E; ++I)
269 Order.push_back(I);
270 llvm::sort(Order, [](const const_iterator &A, const const_iterator &B) {
271 return A->first < B->first;
272 });
273 for (const_iterator I : Order) {
274 dbgs() << " " << printRegUnit(I->first, TRI);
275 I->second.dump();
276 dbgs() << "\n";
277 }
278 }
279#endif
280 };
281
282 // The saved delay state at the end of each basic block.
284
285 // Emit an s_delay_alu instruction if necessary before MI.
286 MachineInstr *emitDelayAlu(MachineInstr &MI, DelayInfo Delay,
287 MachineInstr *LastDelayAlu) {
288 unsigned Imm = 0;
289
290 // Wait for a TRANS instruction.
291 if (Delay.TRANSNum < DelayInfo::TRANS_MAX)
292 Imm |= 4 + Delay.TRANSNum;
293
294 // Wait for a VALU instruction (if it's more recent than any TRANS
295 // instruction that we're also waiting for).
296 if (Delay.VALUNum < DelayInfo::VALU_MAX &&
297 Delay.VALUNum <= Delay.TRANSNumVALU) {
298 if (Imm & 0xf)
299 Imm |= Delay.VALUNum << 7;
300 else
301 Imm |= Delay.VALUNum;
302 }
303
304 // Wait for an SALU instruction.
305 if (Delay.SALUCycles) {
306 assert(Delay.SALUCycles < DelayInfo::SALU_CYCLES_MAX);
307 if (Imm & 0x780) {
308 // We have already encoded a VALU and a TRANS delay. There's no room in
309 // the encoding for an SALU delay as well, so just drop it.
310 } else if (Imm & 0xf) {
311 Imm |= (Delay.SALUCycles + 8) << 7;
312 } else {
313 Imm |= Delay.SALUCycles + 8;
314 }
315 }
316
317 // Don't emit the s_delay_alu instruction if there's nothing to wait for.
318 if (!Imm)
319 return LastDelayAlu;
320
321 // If we only need to wait for one instruction, try encoding it in the last
322 // s_delay_alu that we emitted.
323 if (!(Imm & 0x780) && LastDelayAlu) {
324 unsigned Skip = 0;
325 for (auto I = MachineBasicBlock::instr_iterator(LastDelayAlu),
327 ++I != E;) {
328 if (!I->isBundle() && !I->isMetaInstruction())
329 ++Skip;
330 }
331 if (Skip < 6) {
332 MachineOperand &Op = LastDelayAlu->getOperand(0);
333 unsigned LastImm = Op.getImm();
334 assert((LastImm & ~0xf) == 0 &&
335 "Remembered an s_delay_alu with no room for another delay!");
336 LastImm |= Imm << 7 | Skip << 4;
337 Op.setImm(LastImm);
338 return nullptr;
339 }
340 }
341
342 auto &MBB = *MI.getParent();
343 MachineInstr *DelayAlu =
344 BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_DELAY_ALU)).addImm(Imm);
345 // Remember the s_delay_alu for next time if there is still room in it to
346 // encode another delay.
347 return (Imm & 0x780) ? nullptr : DelayAlu;
348 }
349
350 bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
351 DelayState State;
352 for (auto *Pred : MBB.predecessors())
353 State.merge(BlockState[Pred]);
354
355 LLVM_DEBUG(dbgs() << " State at start of " << printMBBReference(MBB)
356 << "\n";
357 State.dump(TRI););
358
359 bool Changed = false;
360 MachineInstr *LastDelayAlu = nullptr;
361
362 MCRegUnit LastSGPRFromVALU = 0;
363 // Iterate over the contents of bundles, but don't emit any instructions
364 // inside a bundle.
365 for (auto &MI : MBB.instrs()) {
366 if (MI.isBundle() || MI.isMetaInstruction())
367 continue;
368
369 // Ignore some more instructions that do not generate any code.
370 switch (MI.getOpcode()) {
371 case AMDGPU::SI_RETURN_TO_EPILOG:
372 continue;
373 }
374
375 DelayType Type = getDelayType(MI);
376
377 if (instructionWaitsForSGPRWrites(MI)) {
378 auto It = State.find(LastSGPRFromVALU);
379 if (It != State.end()) {
380 DelayInfo Info = It->getSecond();
381 State.advanceByVALUNum(Info.VALUNum);
382 LastSGPRFromVALU = 0;
383 }
384 }
385
386 if (instructionWaitsForVALU(MI)) {
387 // Forget about all outstanding VALU delays.
388 // TODO: This is overkill since it also forgets about SALU delays.
389 State = DelayState();
390 } else if (Type != OTHER) {
391 DelayInfo Delay;
392 // TODO: Scan implicit uses too?
393 for (const auto &Op : MI.explicit_uses()) {
394 if (Op.isReg()) {
395 // One of the operands of the writelane is also the output operand.
396 // This creates the insertion of redundant delays. Hence, we have to
397 // ignore this operand.
398 if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32 && Op.isTied())
399 continue;
400 for (MCRegUnit Unit : TRI->regunits(Op.getReg())) {
401 auto It = State.find(Unit);
402 if (It != State.end()) {
403 Delay.merge(It->second);
404 State.erase(Unit);
405 }
406 }
407 }
408 }
409
410 if (SII->isVALU(MI.getOpcode())) {
411 for (const auto &Op : MI.defs()) {
412 Register Reg = Op.getReg();
413 if (AMDGPU::isSGPR(Reg, TRI)) {
414 LastSGPRFromVALU = *TRI->regunits(Reg).begin();
415 break;
416 }
417 }
418 }
419
420 if (Emit && !MI.isBundledWithPred()) {
421 // TODO: For VALU->SALU delays should we use s_delay_alu or s_nop or
422 // just ignore them?
423 LastDelayAlu = emitDelayAlu(MI, Delay, LastDelayAlu);
424 }
425 }
426
427 if (Type != OTHER) {
428 // TODO: Scan implicit defs too?
429 for (const auto &Op : MI.defs()) {
430 unsigned Latency = SchedModel->computeOperandLatency(
431 &MI, Op.getOperandNo(), nullptr, 0);
432 for (MCRegUnit Unit : TRI->regunits(Op.getReg()))
433 State[Unit] = DelayInfo(Type, Latency);
434 }
435 }
436
437 // Advance by the number of cycles it takes to issue this instruction.
438 // TODO: Use a more advanced model that accounts for instructions that
439 // take multiple cycles to issue on a particular pipeline.
440 unsigned Cycles = SIInstrInfo::getNumWaitStates(MI);
441 // TODO: In wave64 mode, double the number of cycles for VALU and VMEM
442 // instructions on the assumption that they will usually have to be issued
443 // twice?
444 State.advance(Type, Cycles);
445
446 LLVM_DEBUG(dbgs() << " State after " << MI; State.dump(TRI););
447 }
448
449 if (Emit) {
450 assert(State == BlockState[&MBB] &&
451 "Basic block state should not have changed on final pass!");
452 } else if (DelayState &BS = BlockState[&MBB]; State != BS) {
453 BS = std::move(State);
454 Changed = true;
455 }
456 return Changed;
457 }
458
459 bool run(MachineFunction &MF) {
460 LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()
461 << "\n");
462
463 ST = &MF.getSubtarget<GCNSubtarget>();
464 if (!ST->hasDelayAlu())
465 return false;
466
467 SII = ST->getInstrInfo();
468 TRI = ST->getRegisterInfo();
469 SchedModel = &SII->getSchedModel();
470
471 // Calculate the delay state for each basic block, iterating until we reach
472 // a fixed point.
474 for (auto &MBB : reverse(MF))
475 WorkList.insert(&MBB);
476 while (!WorkList.empty()) {
477 auto &MBB = *WorkList.pop_back_val();
478 bool Changed = runOnMachineBasicBlock(MBB, false);
479 if (Changed)
480 WorkList.insert_range(MBB.successors());
481 }
482
483 LLVM_DEBUG(dbgs() << "Final pass over all BBs\n");
484
485 // Make one last pass over all basic blocks to emit s_delay_alu
486 // instructions.
487 bool Changed = false;
488 for (auto &MBB : MF)
489 Changed |= runOnMachineBasicBlock(MBB, true);
490 return Changed;
491 }
492};
493
494class AMDGPUInsertDelayAluLegacy : public MachineFunctionPass {
495public:
496 static char ID;
497
498 AMDGPUInsertDelayAluLegacy() : MachineFunctionPass(ID) {}
499
500 void getAnalysisUsage(AnalysisUsage &AU) const override {
501 AU.setPreservesCFG();
503 }
504
505 bool runOnMachineFunction(MachineFunction &MF) override {
506 if (skipFunction(MF.getFunction()))
507 return false;
508 AMDGPUInsertDelayAlu Impl;
509 return Impl.run(MF);
510 }
511};
512} // namespace
513
517 if (!AMDGPUInsertDelayAlu().run(MF))
518 return PreservedAnalyses::all();
520 PA.preserveSet<CFGAnalyses>();
521 return PA;
522} // end namespace llvm
523
524char AMDGPUInsertDelayAluLegacy::ID = 0;
525
526char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAluLegacy::ID;
527
528INITIALIZE_PASS(AMDGPUInsertDelayAluLegacy, DEBUG_TYPE,
529 "AMDGPU Insert Delay ALU", false, false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
Interface definition for SIInstrInfo.
This file implements a set that has insertion order iteration characteristics.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:188
Instructions::iterator instr_iterator
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
MachineOperand class - Representation of each machine instruction operand.
void dump() const
Definition: Pass.cpp:146
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isXDLWMMA(const MachineInstr &MI) const
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:440
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1563
static bool isTRANS(const MachineInstr &MI)
Definition: SIInstrInfo.h:820
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:448
A vector that has set insertion semantics.
Definition: SetVector.h:59
void insert_range(Range &&R)
Definition: SetVector.h:193
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
value_type pop_back_val()
Definition: SetVector.h:296
void reserve(size_type N)
Definition: SmallVector.h:664
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned decodeFieldVaVdst(unsigned Encoded)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2113
LLVM_ABI Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2147
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
char & AMDGPUInsertDelayAluID
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &MFAM)