LLVM 22.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "GCNSubtarget.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "frame-info"
22
24 "amdgpu-spill-vgpr-to-agpr",
25 cl::desc("Enable spilling VGPRs to AGPRs"),
27 cl::init(true));
28
29// Find a register matching \p RC from \p LiveUnits which is unused and
30// available throughout the function. On failure, returns AMDGPU::NoRegister.
31// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
32// MCRegisters. This should reduce the number of iterations and avoid redundant
33// checking.
35 const LiveRegUnits &LiveUnits,
36 const TargetRegisterClass &RC) {
37 for (MCRegister Reg : RC) {
38 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
39 !MRI.isReserved(Reg))
40 return Reg;
41 }
42 return MCRegister();
43}
44
45// Find a scratch register that we can use in the prologue. We avoid using
46// callee-save registers since they may appear to be free when this is called
47// from canUseAsPrologue (during shrink wrapping), but then no longer be free
48// when this is called from emitPrologue.
51 const TargetRegisterClass &RC, bool Unused = false) {
52 // Mark callee saved registers as used so we will not choose them.
53 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
54 for (unsigned i = 0; CSRegs[i]; ++i)
55 LiveUnits.addReg(CSRegs[i]);
56
57 // We are looking for a register that can be used throughout the entire
58 // function, so any use is unacceptable.
59 if (Unused)
60 return findUnusedRegister(MRI, LiveUnits, RC);
61
62 for (MCRegister Reg : RC) {
63 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
64 return Reg;
65 }
66
67 return MCRegister();
68}
69
70/// Query target location for spilling SGPRs
71/// \p IncludeScratchCopy : Also look for free scratch SGPRs
73 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
74 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
75 bool IncludeScratchCopy = true) {
77 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
78
79 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
80 const SIRegisterInfo *TRI = ST.getRegisterInfo();
81 unsigned Size = TRI->getSpillSize(RC);
82 Align Alignment = TRI->getSpillAlign(RC);
83
84 // We need to save and restore the given SGPR.
85
86 Register ScratchSGPR;
87 // 1: Try to save the given register into an unused scratch SGPR. The
88 // LiveUnits should have all the callee saved registers marked as used. For
89 // certain cases we skip copy to scratch SGPR.
90 if (IncludeScratchCopy)
91 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
92
93 if (!ScratchSGPR) {
94 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
96
97 if (TRI->spillSGPRToVGPR() &&
98 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
99 /*IsPrologEpilog=*/true)) {
100 // 2: There's no free lane to spill, and no free register to save the
101 // SGPR, so we're forced to take another VGPR to use for the spill.
104 SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
105
106 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
107 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
108 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
109 << '\n';);
110 } else {
111 // Remove dead <FI> index
113 // 3: If all else fails, spill the register to memory.
114 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
116 SGPR,
117 PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));
118 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
119 << printReg(SGPR, TRI) << '\n');
120 }
121 } else {
124 SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
125 LiveUnits.addReg(ScratchSGPR);
126 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
127 << printReg(ScratchSGPR, TRI) << '\n');
128 }
129}
130
131// We need to specially emit stack operations here because a different frame
132// register is used than in the rest of the function, as getFrameRegister would
133// use.
134static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
135 const SIMachineFunctionInfo &FuncInfo,
136 LiveRegUnits &LiveUnits, MachineFunction &MF,
139 Register SpillReg, int FI, Register FrameReg,
140 int64_t DwordOff = 0) {
141 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
143
144 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
147 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
148 FrameInfo.getObjectAlign(FI));
149 LiveUnits.addReg(SpillReg);
150 bool IsKill = !MBB.isLiveIn(SpillReg);
151 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
152 DwordOff, MMO, nullptr, &LiveUnits);
153 if (IsKill)
154 LiveUnits.removeReg(SpillReg);
155}
156
157static void buildEpilogRestore(const GCNSubtarget &ST,
158 const SIRegisterInfo &TRI,
159 const SIMachineFunctionInfo &FuncInfo,
160 LiveRegUnits &LiveUnits, MachineFunction &MF,
163 const DebugLoc &DL, Register SpillReg, int FI,
164 Register FrameReg, int64_t DwordOff = 0) {
165 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
167
168 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
171 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
172 FrameInfo.getObjectAlign(FI));
173 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
174 DwordOff, MMO, nullptr, &LiveUnits);
175}
176
178 const DebugLoc &DL, const SIInstrInfo *TII,
179 Register TargetReg) {
182 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
183 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
184 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
185 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
186
187 if (MFI->getGITPtrHigh() != 0xffffffff) {
188 BuildMI(MBB, I, DL, SMovB32, TargetHi)
189 .addImm(MFI->getGITPtrHigh())
190 .addReg(TargetReg, RegState::ImplicitDefine);
191 } else {
192 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
193 BuildMI(MBB, I, DL, GetPC64, TargetReg);
194 }
195 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
196 MF->getRegInfo().addLiveIn(GitPtrLo);
197 MBB.addLiveIn(GitPtrLo);
198 BuildMI(MBB, I, DL, SMovB32, TargetLo)
199 .addReg(GitPtrLo);
200}
201
202static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
203 const SIMachineFunctionInfo *FuncInfo,
205 MachineBasicBlock::iterator MBBI, bool IsProlog) {
206 if (LiveUnits.empty()) {
207 LiveUnits.init(TRI);
208 if (IsProlog) {
209 LiveUnits.addLiveIns(MBB);
210 } else {
211 // In epilog.
212 LiveUnits.addLiveOuts(MBB);
213 LiveUnits.stepBackward(*MBBI);
214 }
215 }
216}
217
218namespace llvm {
219
220// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
221// BP, etc. These spills are delayed until the current function's frame is
222// finalized. For a given register, the builder uses the
223// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
227 MachineFunction &MF;
228 const GCNSubtarget &ST;
229 MachineFrameInfo &MFI;
230 SIMachineFunctionInfo *FuncInfo;
231 const SIInstrInfo *TII;
232 const SIRegisterInfo &TRI;
233 Register SuperReg;
235 LiveRegUnits &LiveUnits;
236 const DebugLoc &DL;
237 Register FrameReg;
238 ArrayRef<int16_t> SplitParts;
239 unsigned NumSubRegs;
240 unsigned EltSize = 4;
241
242 void saveToMemory(const int FI) const {
244 assert(!MFI.isDeadObjectIndex(FI));
245
246 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
247
249 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
250 if (!TmpVGPR)
251 report_fatal_error("failed to find free scratch register");
252
253 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
254 Register SubReg = NumSubRegs == 1
255 ? SuperReg
256 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
257 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
258 .addReg(SubReg);
259
260 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
261 FI, FrameReg, DwordOff);
262 DwordOff += 4;
263 }
264 }
265
266 void saveToVGPRLane(const int FI) const {
267 assert(!MFI.isDeadObjectIndex(FI));
268
272 assert(Spill.size() == NumSubRegs);
273
274 for (unsigned I = 0; I < NumSubRegs; ++I) {
275 Register SubReg = NumSubRegs == 1
276 ? SuperReg
277 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
278 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
279 Spill[I].VGPR)
280 .addReg(SubReg)
281 .addImm(Spill[I].Lane)
282 .addReg(Spill[I].VGPR, RegState::Undef);
283 }
284 }
285
286 void copyToScratchSGPR(Register DstReg) const {
287 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
288 .addReg(SuperReg)
290 }
291
292 void restoreFromMemory(const int FI) {
294
295 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
297 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
298 if (!TmpVGPR)
299 report_fatal_error("failed to find free scratch register");
300
301 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
302 Register SubReg = NumSubRegs == 1
303 ? SuperReg
304 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
305
306 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
307 TmpVGPR, FI, FrameReg, DwordOff);
308 MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
309 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
310 .addReg(TmpVGPR, RegState::Kill);
311 DwordOff += 4;
312 }
313 }
314
315 void restoreFromVGPRLane(const int FI) {
319 assert(Spill.size() == NumSubRegs);
320
321 for (unsigned I = 0; I < NumSubRegs; ++I) {
322 Register SubReg = NumSubRegs == 1
323 ? SuperReg
324 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
325 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
326 .addReg(Spill[I].VGPR)
327 .addImm(Spill[I].Lane);
328 }
329 }
330
331 void copyFromScratchSGPR(Register SrcReg) const {
332 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
333 .addReg(SrcReg)
335 }
336
337public:
342 const DebugLoc &DL, const SIInstrInfo *TII,
343 const SIRegisterInfo &TRI,
344 LiveRegUnits &LiveUnits, Register FrameReg)
345 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
346 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
347 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
348 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
349 FrameReg(FrameReg) {
350 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
351 SplitParts = TRI.getRegSplitParts(RC, EltSize);
352 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
353
354 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
355 }
356
357 void save() {
358 switch (SI.getKind()) {
360 return saveToMemory(SI.getIndex());
362 return saveToVGPRLane(SI.getIndex());
364 return copyToScratchSGPR(SI.getReg());
365 }
366 }
367
368 void restore() {
369 switch (SI.getKind()) {
371 return restoreFromMemory(SI.getIndex());
373 return restoreFromVGPRLane(SI.getIndex());
375 return copyFromScratchSGPR(SI.getReg());
376 }
377 }
378};
379
380} // namespace llvm
381
382// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
383void SIFrameLowering::emitEntryFunctionFlatScratchInit(
385 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
387 const SIInstrInfo *TII = ST.getInstrInfo();
388 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
390
391 // We don't need this if we only have spills since there is no user facing
392 // scratch.
393
394 // TODO: If we know we don't have flat instructions earlier, we can omit
395 // this from the input registers.
396 //
397 // TODO: We only need to know if we access scratch space through a flat
398 // pointer. Because we only detect if flat instructions are used at all,
399 // this will be used more often than necessary on VI.
400
401 Register FlatScrInitLo;
402 Register FlatScrInitHi;
403
404 if (ST.isAmdPalOS()) {
405 // Extract the scratch offset from the descriptor in the GIT
406 LiveRegUnits LiveUnits;
407 LiveUnits.init(*TRI);
408 LiveUnits.addLiveIns(MBB);
409
410 // Find unused reg to load flat scratch init into
412 Register FlatScrInit = AMDGPU::NoRegister;
413 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
414 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
415 AllSGPR64s = AllSGPR64s.slice(
416 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
417 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
418 for (MCPhysReg Reg : AllSGPR64s) {
419 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
420 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
421 FlatScrInit = Reg;
422 break;
423 }
424 }
425 assert(FlatScrInit && "Failed to find free register for scratch init");
426
427 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
428 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
429
430 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
431
432 // We now have the GIT ptr - now get the scratch descriptor from the entry
433 // at offset 0 (or offset 16 for a compute shader).
435 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
436 auto *MMO = MF.getMachineMemOperand(
437 PtrInfo,
440 8, Align(4));
441 unsigned Offset =
443 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
444 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
445 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
446 .addReg(FlatScrInit)
447 .addImm(EncodedOffset) // offset
448 .addImm(0) // cpol
449 .addMemOperand(MMO);
450
451 // Mask the offset in [47:0] of the descriptor
452 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
453 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
454 .addReg(FlatScrInitHi)
455 .addImm(0xffff);
456 And->getOperand(3).setIsDead(); // Mark SCC as dead.
457 } else {
458 Register FlatScratchInitReg =
460 assert(FlatScratchInitReg);
461
463 MRI.addLiveIn(FlatScratchInitReg);
464 MBB.addLiveIn(FlatScratchInitReg);
465
466 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
467 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
468 }
469
470 // Do a 64-bit pointer add.
471 if (ST.flatScratchIsPointer()) {
472 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
473 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
474 .addReg(FlatScrInitLo)
475 .addReg(ScratchWaveOffsetReg);
476 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
477 FlatScrInitHi)
478 .addReg(FlatScrInitHi)
479 .addImm(0);
480 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
481
482 using namespace AMDGPU::Hwreg;
483 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
484 .addReg(FlatScrInitLo)
485 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
486 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
487 .addReg(FlatScrInitHi)
488 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
489 return;
490 }
491
492 // For GFX9.
493 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
494 .addReg(FlatScrInitLo)
495 .addReg(ScratchWaveOffsetReg);
496 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
497 AMDGPU::FLAT_SCR_HI)
498 .addReg(FlatScrInitHi)
499 .addImm(0);
500 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
501
502 return;
503 }
504
505 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
506
507 // Copy the size in bytes.
508 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
509 .addReg(FlatScrInitHi, RegState::Kill);
510
511 // Add wave offset in bytes to private base offset.
512 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
513 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
514 .addReg(FlatScrInitLo)
515 .addReg(ScratchWaveOffsetReg);
516
517 // Convert offset to 256-byte units.
518 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
519 AMDGPU::FLAT_SCR_HI)
520 .addReg(FlatScrInitLo, RegState::Kill)
521 .addImm(8);
522 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
523}
524
525// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
526// memory. They should have been removed by now.
528 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
529 I != E; ++I) {
530 if (!MFI.isDeadObjectIndex(I))
531 return false;
532 }
533
534 return true;
535}
536
537// Shift down registers reserved for the scratch RSRC.
538Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
539 MachineFunction &MF) const {
540
542 const SIInstrInfo *TII = ST.getInstrInfo();
543 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
546
547 assert(MFI->isEntryFunction());
548
549 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
550
551 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
553 return Register();
554
555 if (ST.hasSGPRInitBug() ||
556 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
557 return ScratchRsrcReg;
558
559 // We reserved the last registers for this. Shift it down to the end of those
560 // which were actually used.
561 //
562 // FIXME: It might be safer to use a pseudoregister before replacement.
563
564 // FIXME: We should be able to eliminate unused input registers. We only
565 // cannot do this for the resources required for scratch access. For now we
566 // skip over user SGPRs and may leave unused holes.
567
568 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
569 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
570 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
571
572 // Skip the last N reserved elements because they should have already been
573 // reserved for VCC etc.
574 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
575 for (MCPhysReg Reg : AllSGPR128s) {
576 // Pick the first unallocated one. Make sure we don't clobber the other
577 // reserved input we needed. Also for PAL, make sure we don't clobber
578 // the GIT pointer passed in SGPR0 or SGPR8.
579 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
580 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
581 MRI.replaceRegWith(ScratchRsrcReg, Reg);
582 MFI->setScratchRSrcReg(Reg);
583 MRI.reserveReg(Reg, TRI);
584 return Reg;
585 }
586 }
587
588 return ScratchRsrcReg;
589}
590
591static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
592 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
593}
594
596 MachineBasicBlock &MBB) const {
597 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
598
599 // FIXME: If we only have SGPR spills, we won't actually be using scratch
600 // memory since these spill to VGPRs. We should be cleaning up these unused
601 // SGPR spill frame indices somewhere.
602
603 // FIXME: We still have implicit uses on SGPR spill instructions in case they
604 // need to spill to vector memory. It's likely that will not happen, but at
605 // this point it appears we need the setup. This part of the prolog should be
606 // emitted after frame indices are eliminated.
607
608 // FIXME: Remove all of the isPhysRegUsed checks
609
611 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
612 const SIInstrInfo *TII = ST.getInstrInfo();
613 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
615 const Function &F = MF.getFunction();
616 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
617
618 assert(MFI->isEntryFunction());
619
620 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
622
623 // We need to do the replacement of the private segment buffer register even
624 // if there are no stack objects. There could be stores to undef or a
625 // constant without an associated object.
626 //
627 // This will return `Register()` in cases where there are no actual
628 // uses of the SRSRC.
629 Register ScratchRsrcReg;
630 if (!ST.enableFlatScratch())
631 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
632
633 // Make the selected register live throughout the function.
634 if (ScratchRsrcReg) {
635 for (MachineBasicBlock &OtherBB : MF) {
636 if (&OtherBB != &MBB) {
637 OtherBB.addLiveIn(ScratchRsrcReg);
638 }
639 }
640 }
641
642 // Now that we have fixed the reserved SRSRC we need to locate the
643 // (potentially) preloaded SRSRC.
644 Register PreloadedScratchRsrcReg;
645 if (ST.isAmdHsaOrMesa(F)) {
646 PreloadedScratchRsrcReg =
648 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
649 // We added live-ins during argument lowering, but since they were not
650 // used they were deleted. We're adding the uses now, so add them back.
651 MRI.addLiveIn(PreloadedScratchRsrcReg);
652 MBB.addLiveIn(PreloadedScratchRsrcReg);
653 }
654 }
655
656 // Debug location must be unknown since the first debug location is used to
657 // determine the end of the prologue.
658 DebugLoc DL;
660
661 // We found the SRSRC first because it needs four registers and has an
662 // alignment requirement. If the SRSRC that we found is clobbering with
663 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
664 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
665 // wave offset to a free SGPR.
666 Register ScratchWaveOffsetReg;
667 if (PreloadedScratchWaveOffsetReg &&
668 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
669 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
670 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
671 AllSGPRs = AllSGPRs.slice(
672 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
673 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
674 for (MCPhysReg Reg : AllSGPRs) {
675 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
676 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
677 ScratchWaveOffsetReg = Reg;
678 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
679 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
680 break;
681 }
682 }
683
684 // FIXME: We can spill incoming arguments and restore at the end of the
685 // prolog.
686 if (!ScratchWaveOffsetReg)
688 "could not find temporary scratch offset register in prolog");
689 } else {
690 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
691 }
692 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
693
694 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
695 if (!mayReserveScratchForCWSR(MF)) {
696 if (hasFP(MF)) {
698 assert(FPReg != AMDGPU::FP_REG);
699 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
700 }
701
704 assert(SPReg != AMDGPU::SP_REG);
705 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
706 }
707 } else {
708 // We need to check if we're on a compute queue - if we are, then the CWSR
709 // trap handler may need to store some VGPRs on the stack. The first VGPR
710 // block is saved separately, so we only need to allocate space for any
711 // additional VGPR blocks used. For now, we will make sure there's enough
712 // room for the theoretical maximum number of VGPRs that can be allocated.
713 // FIXME: Figure out if the shader uses fewer VGPRs in practice.
714 assert(hasFP(MF));
716 assert(FPReg != AMDGPU::FP_REG);
717 unsigned VGPRSize = llvm::alignTo(
718 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -
720 MFI->getDynamicVGPRBlockSize())) *
721 4,
722 FrameInfo.getMaxAlign());
724
725 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), FPReg)
728 // The MicroEngine ID is 0 for the graphics queue, and 1 or 2 for compute
729 // (3 is unused, so we ignore it). Unfortunately, S_GETREG doesn't set
730 // SCC, so we need to check for 0 manually.
731 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32)).addImm(0).addReg(FPReg);
732 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), FPReg).addImm(VGPRSize);
735 assert(SPReg != AMDGPU::SP_REG);
736
737 // If at least one of the constants can be inlined, then we can use
738 // s_cselect. Otherwise, use a mov and cmovk.
739 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||
741 ST.hasInv2PiInlineImm())) {
742 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)
743 .addImm(Offset + VGPRSize)
744 .addImm(Offset);
745 } else {
746 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
747 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)
748 .addImm(Offset + VGPRSize);
749 }
750 }
751 }
752
753 bool NeedsFlatScratchInit =
755 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
756 (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
757
758 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
759 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
760 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
761 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
762 }
763
764 if (NeedsFlatScratchInit) {
765 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
766 }
767
768 if (ScratchRsrcReg) {
769 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
770 PreloadedScratchRsrcReg,
771 ScratchRsrcReg, ScratchWaveOffsetReg);
772 }
773}
774
775// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
776void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
778 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
779 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
780
781 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
782 const SIInstrInfo *TII = ST.getInstrInfo();
783 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
785 const Function &Fn = MF.getFunction();
786
787 if (ST.isAmdPalOS()) {
788 // The pointer to the GIT is formed from the offset passed in and either
789 // the amdgpu-git-ptr-high function attribute or the top part of the PC
790 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
791 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
792
793 buildGitPtr(MBB, I, DL, TII, Rsrc01);
794
795 // We now have the GIT ptr - now get the scratch descriptor from the entry
796 // at offset 0 (or offset 16 for a compute shader).
798 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
799 auto *MMO = MF.getMachineMemOperand(
800 PtrInfo,
803 16, Align(4));
804 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
805 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
806 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
807 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
808 .addReg(Rsrc01)
809 .addImm(EncodedOffset) // offset
810 .addImm(0) // cpol
811 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
812 .addMemOperand(MMO);
813
814 // The driver will always set the SRD for wave 64 (bits 118:117 of
815 // descriptor / bits 22:21 of third sub-reg will be 0b11)
816 // If the shader is actually wave32 we have to modify the const_index_stride
817 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
818 // reason the driver does this is that there can be cases where it presents
819 // 2 shaders with different wave size (e.g. VsFs).
820 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
821 if (ST.isWave32()) {
822 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
823 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
824 .addImm(21)
825 .addReg(Rsrc03);
826 }
827 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
828 assert(!ST.isAmdHsaOrMesa(Fn));
829 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
830
831 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
832 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
833
834 // Use relocations to get the pointer, and setup the other bits manually.
835 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
836
838 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
839
841 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
842
843 BuildMI(MBB, I, DL, Mov64, Rsrc01)
845 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
846 } else {
847 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
848
850 auto *MMO = MF.getMachineMemOperand(
851 PtrInfo,
854 8, Align(4));
855 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
857 .addImm(0) // offset
858 .addImm(0) // cpol
859 .addMemOperand(MMO)
860 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
861
864 }
865 } else {
866 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
867 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
868
869 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
870 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
871 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
872
873 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
874 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
875 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
876 }
877
878 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
879 .addImm(Lo_32(Rsrc23))
880 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
881
882 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
883 .addImm(Hi_32(Rsrc23))
884 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
885 } else if (ST.isAmdHsaOrMesa(Fn)) {
886 assert(PreloadedScratchRsrcReg);
887
888 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
889 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
890 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
891 }
892 }
893
894 // Add the scratch wave offset into the scratch RSRC.
895 //
896 // We only want to update the first 48 bits, which is the base address
897 // pointer, without touching the adjacent 16 bits of flags. We know this add
898 // cannot carry-out from bit 47, otherwise the scratch allocation would be
899 // impossible to fit in the 48-bit global address space.
900 //
901 // TODO: Evaluate if it is better to just construct an SRD using the flat
902 // scratch init and some constants rather than update the one we are passed.
903 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
904 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
905
906 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
907 // the kernel body via inreg arguments.
908 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
909 .addReg(ScratchRsrcSub0)
910 .addReg(ScratchWaveOffsetReg)
911 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
912 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
913 .addReg(ScratchRsrcSub1)
914 .addImm(0)
915 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
916 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
917}
918
920 switch (ID) {
924 return true;
927 return false;
928 }
929 llvm_unreachable("Invalid TargetStackID::Value");
930}
931
932// Activate only the inactive lanes when \p EnableInactiveLanes is true.
933// Otherwise, activate all lanes. It returns the saved exec.
935 MachineFunction &MF,
938 const DebugLoc &DL, bool IsProlog,
939 bool EnableInactiveLanes) {
940 Register ScratchExecCopy;
942 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
943 const SIInstrInfo *TII = ST.getInstrInfo();
944 const SIRegisterInfo &TRI = TII->getRegisterInfo();
946
947 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
948
949 if (FuncInfo->isWholeWaveFunction()) {
950 // Whole wave functions already have a copy of the original EXEC mask that
951 // we can use.
952 assert(IsProlog && "Epilog should look at return, not setup");
953 ScratchExecCopy =
954 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
955 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
956 } else {
957 ScratchExecCopy = findScratchNonCalleeSaveRegister(
958 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
959 }
960
961 if (!ScratchExecCopy)
962 report_fatal_error("failed to find free scratch register");
963
964 LiveUnits.addReg(ScratchExecCopy);
965
966 const unsigned SaveExecOpc =
967 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
968 : AMDGPU::S_OR_SAVEEXEC_B32)
969 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
970 : AMDGPU::S_OR_SAVEEXEC_B64);
971 auto SaveExec =
972 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
973 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
974
975 return ScratchExecCopy;
976}
977
981 Register FrameReg, Register FramePtrRegScratchCopy) const {
983 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
984 const SIInstrInfo *TII = ST.getInstrInfo();
985 const SIRegisterInfo &TRI = TII->getRegisterInfo();
987
988 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
989 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
990 // might end up flipping the EXEC bits twice.
991 Register ScratchExecCopy;
992 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
993 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
994 if (!WWMScratchRegs.empty())
995 ScratchExecCopy =
996 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
997 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
998
999 auto StoreWWMRegisters =
1001 for (const auto &Reg : WWMRegs) {
1002 Register VGPR = Reg.first;
1003 int FI = Reg.second;
1004 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1005 VGPR, FI, FrameReg);
1006 }
1007 };
1008
1009 for (const Register Reg : make_first_range(WWMScratchRegs)) {
1010 if (!MRI.isReserved(Reg)) {
1011 MRI.addLiveIn(Reg);
1012 MBB.addLiveIn(Reg);
1013 }
1014 }
1015 StoreWWMRegisters(WWMScratchRegs);
1016
1017 auto EnableAllLanes = [&]() {
1018 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1019 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1020 };
1021
1022 if (!WWMCalleeSavedRegs.empty()) {
1023 if (ScratchExecCopy) {
1024 EnableAllLanes();
1025 } else {
1026 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1027 /*IsProlog*/ true,
1028 /*EnableInactiveLanes*/ false);
1029 }
1030 }
1031
1032 StoreWWMRegisters(WWMCalleeSavedRegs);
1033 if (FuncInfo->isWholeWaveFunction()) {
1034 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose, so we can remove
1035 // it now. If we have already saved some WWM CSR registers, then the EXEC is
1036 // already -1 and we don't need to do anything else. Otherwise, set EXEC to
1037 // -1 here.
1038 if (!ScratchExecCopy)
1039 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
1040 /*EnableInactiveLanes*/ true);
1041 else if (WWMCalleeSavedRegs.empty())
1042 EnableAllLanes();
1043 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1044 } else if (ScratchExecCopy) {
1045 // FIXME: Split block and make terminator.
1046 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1047 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1048 .addReg(ScratchExecCopy, RegState::Kill);
1049 LiveUnits.addReg(ScratchExecCopy);
1050 }
1051
1052 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1053
1054 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1055 // Special handle FP spill:
1056 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
1057 // Otherwise, FP has been moved to a temporary register and spill it
1058 // instead.
1059 Register Reg =
1060 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1061 if (!Reg)
1062 continue;
1063
1064 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1065 LiveUnits, FrameReg);
1066 SB.save();
1067 }
1068
1069 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
1070 // such scratch registers live throughout the function.
1071 SmallVector<Register, 1> ScratchSGPRs;
1072 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
1073 if (!ScratchSGPRs.empty()) {
1074 for (MachineBasicBlock &MBB : MF) {
1075 for (MCPhysReg Reg : ScratchSGPRs)
1076 MBB.addLiveIn(Reg);
1077
1079 }
1080 if (!LiveUnits.empty()) {
1081 for (MCPhysReg Reg : ScratchSGPRs)
1082 LiveUnits.addReg(Reg);
1083 }
1084 }
1085}
1086
1090 Register FrameReg, Register FramePtrRegScratchCopy) const {
1091 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1092 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1093 const SIInstrInfo *TII = ST.getInstrInfo();
1094 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1095 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1096
1097 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1098 // Special handle FP restore:
1099 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1100 // the FP value to a temporary register. The frame pointer should be
1101 // overwritten only at the end when all other spills are restored from
1102 // current frame.
1103 Register Reg =
1104 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1105 if (!Reg)
1106 continue;
1107
1108 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1109 LiveUnits, FrameReg);
1110 SB.restore();
1111 }
1112
1113 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1114 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1115 // this, we might end up flipping the EXEC bits twice.
1116 Register ScratchExecCopy;
1117 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1118 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1119 auto RestoreWWMRegisters =
1121 for (const auto &Reg : WWMRegs) {
1122 Register VGPR = Reg.first;
1123 int FI = Reg.second;
1124 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1125 VGPR, FI, FrameReg);
1126 }
1127 };
1128
1129 if (FuncInfo->isWholeWaveFunction()) {
1130 // For whole wave functions, the EXEC is already -1 at this point.
1131 // Therefore, we can restore the CSR WWM registers right away.
1132 RestoreWWMRegisters(WWMCalleeSavedRegs);
1133
1134 // The original EXEC is the first operand of the return instruction.
1135 const MachineInstr &Return = MBB.instr_back();
1136 assert(Return.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN &&
1137 "Unexpected return inst");
1138 Register OrigExec = Return.getOperand(0).getReg();
1139
1140 if (!WWMScratchRegs.empty()) {
1141 unsigned XorOpc = ST.isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64;
1142 BuildMI(MBB, MBBI, DL, TII->get(XorOpc), TRI.getExec())
1143 .addReg(OrigExec)
1144 .addImm(-1);
1145 RestoreWWMRegisters(WWMScratchRegs);
1146 }
1147
1148 // Restore original EXEC.
1149 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1150 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);
1151 return;
1152 }
1153
1154 if (!WWMScratchRegs.empty()) {
1155 ScratchExecCopy =
1156 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1157 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);
1158 }
1159 RestoreWWMRegisters(WWMScratchRegs);
1160 if (!WWMCalleeSavedRegs.empty()) {
1161 if (ScratchExecCopy) {
1162 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1163 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1164 } else {
1165 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1166 /*IsProlog*/ false,
1167 /*EnableInactiveLanes*/ false);
1168 }
1169 }
1170
1171 RestoreWWMRegisters(WWMCalleeSavedRegs);
1172 if (ScratchExecCopy) {
1173 // FIXME: Split block and make terminator.
1174 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1175 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1176 .addReg(ScratchExecCopy, RegState::Kill);
1177 }
1178}
1179
1181 MachineBasicBlock &MBB) const {
1183 if (FuncInfo->isEntryFunction()) {
1185 return;
1186 }
1187
1188 MachineFrameInfo &MFI = MF.getFrameInfo();
1189 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1190 const SIInstrInfo *TII = ST.getInstrInfo();
1191 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1193
1194 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1195 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1196 Register BasePtrReg =
1197 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1198 LiveRegUnits LiveUnits;
1199
1201 // DebugLoc must be unknown since the first instruction with DebugLoc is used
1202 // to determine the end of the prologue.
1203 DebugLoc DL;
1204
1205 if (FuncInfo->isChainFunction()) {
1206 // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
1207 // are free to set one up if they need it.
1208 bool UseSP = requiresStackPointerReference(MF);
1209 if (UseSP) {
1210 assert(StackPtrReg != AMDGPU::SP_REG);
1211
1212 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
1214 }
1215 }
1216
1217 bool HasFP = false;
1218 bool HasBP = false;
1219 uint32_t NumBytes = MFI.getStackSize();
1220 uint32_t RoundedSize = NumBytes;
1221
1222 if (TRI.hasStackRealignment(MF))
1223 HasFP = true;
1224
1225 Register FramePtrRegScratchCopy;
1226 if (!HasFP && !hasFP(MF)) {
1227 // Emit the CSR spill stores with SP base register.
1228 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
1229 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1230 FramePtrRegScratchCopy);
1231 } else {
1232 // CSR spill stores will use FP as base register.
1233 Register SGPRForFPSaveRestoreCopy =
1234 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1235
1236 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1237 if (SGPRForFPSaveRestoreCopy) {
1238 // Copy FP to the scratch register now and emit the CFI entry. It avoids
1239 // the extra FP copy needed in the other two cases when FP is spilled to
1240 // memory or to a VGPR lane.
1242 FramePtrReg,
1243 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
1244 DL, TII, TRI, LiveUnits, FramePtrReg);
1245 SB.save();
1246 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1247 } else {
1248 // Copy FP into a new scratch register so that its previous value can be
1249 // spilled after setting up the new frame.
1250 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1251 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1252 if (!FramePtrRegScratchCopy)
1253 report_fatal_error("failed to find free scratch register");
1254
1255 LiveUnits.addReg(FramePtrRegScratchCopy);
1256 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1257 .addReg(FramePtrReg);
1258 }
1259 }
1260
1261 if (HasFP) {
1262 const unsigned Alignment = MFI.getMaxAlign().value();
1263
1264 RoundedSize += Alignment;
1265 if (LiveUnits.empty()) {
1266 LiveUnits.init(TRI);
1267 LiveUnits.addLiveIns(MBB);
1268 }
1269
1270 // s_add_i32 s33, s32, NumBytes
1271 // s_and_b32 s33, s33, 0b111...0000
1272 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
1273 .addReg(StackPtrReg)
1274 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
1276 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1277 .addReg(FramePtrReg, RegState::Kill)
1278 .addImm(-Alignment * getScratchScaleFactor(ST))
1280 And->getOperand(3).setIsDead(); // Mark SCC as dead.
1281 FuncInfo->setIsStackRealigned(true);
1282 } else if ((HasFP = hasFP(MF))) {
1283 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1284 .addReg(StackPtrReg)
1286 }
1287
1288 // If FP is used, emit the CSR spills with FP base register.
1289 if (HasFP) {
1290 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1291 FramePtrRegScratchCopy);
1292 if (FramePtrRegScratchCopy)
1293 LiveUnits.removeReg(FramePtrRegScratchCopy);
1294 }
1295
1296 // If we need a base pointer, set it up here. It's whatever the value of
1297 // the stack pointer is at this point. Any variable size objects will be
1298 // allocated after this, so we can still use the base pointer to reference
1299 // the incoming arguments.
1300 if ((HasBP = TRI.hasBasePointer(MF))) {
1301 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1302 .addReg(StackPtrReg)
1304 }
1305
1306 if (HasFP && RoundedSize != 0) {
1307 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1308 .addReg(StackPtrReg)
1309 .addImm(RoundedSize * getScratchScaleFactor(ST))
1311 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1312 }
1313
1314 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1315 (void)FPSaved;
1316 assert((!HasFP || FPSaved) &&
1317 "Needed to save FP but didn't save it anywhere");
1318
1319 // If we allow spilling to AGPRs we may have saved FP but then spill
1320 // everything into AGPRs instead of the stack.
1321 assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
1322 "Saved FP but didn't need it");
1323
1324 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1325 (void)BPSaved;
1326 assert((!HasBP || BPSaved) &&
1327 "Needed to save BP but didn't save it anywhere");
1328
1329 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1330}
1331
1333 MachineBasicBlock &MBB) const {
1334 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1335 if (FuncInfo->isEntryFunction())
1336 return;
1337
1338 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1339 const SIInstrInfo *TII = ST.getInstrInfo();
1340 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1342 LiveRegUnits LiveUnits;
1343 // Get the insert location for the epilogue. If there were no terminators in
1344 // the block, get the last instruction.
1346 DebugLoc DL;
1347 if (!MBB.empty()) {
1349 if (MBBI != MBB.end())
1350 DL = MBBI->getDebugLoc();
1351
1353 }
1354
1355 const MachineFrameInfo &MFI = MF.getFrameInfo();
1356 uint32_t NumBytes = MFI.getStackSize();
1357 uint32_t RoundedSize = FuncInfo->isStackRealigned()
1358 ? NumBytes + MFI.getMaxAlign().value()
1359 : NumBytes;
1360 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1361 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1362 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1363
1364 if (RoundedSize != 0) {
1365 if (TRI.hasBasePointer(MF)) {
1366 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1367 .addReg(TRI.getBaseRegister())
1369 } else if (hasFP(MF)) {
1370 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1371 .addReg(FramePtrReg)
1373 }
1374 }
1375
1376 Register FramePtrRegScratchCopy;
1377 Register SGPRForFPSaveRestoreCopy =
1378 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1379 if (FPSaved) {
1380 // CSR spill restores should use FP as base register. If
1381 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1382 // into a new scratch register and copy to FP later when other registers are
1383 // restored from the current stack frame.
1384 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1385 if (SGPRForFPSaveRestoreCopy) {
1386 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1387 } else {
1388 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1389 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1390 if (!FramePtrRegScratchCopy)
1391 report_fatal_error("failed to find free scratch register");
1392
1393 LiveUnits.addReg(FramePtrRegScratchCopy);
1394 }
1395
1396 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1397 FramePtrRegScratchCopy);
1398 }
1399
1400 if (FPSaved) {
1401 // Insert the copy to restore FP.
1402 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1403 : FramePtrRegScratchCopy;
1405 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1406 .addReg(SrcReg);
1407 if (SGPRForFPSaveRestoreCopy)
1409 } else {
1410 // Insert the CSR spill restores with SP as the base register.
1411 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits,
1412 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1413 FramePtrRegScratchCopy);
1414 }
1415}
1416
1417#ifndef NDEBUG
1419 const MachineFrameInfo &MFI = MF.getFrameInfo();
1420 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1421 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1422 I != E; ++I) {
1423 if (!MFI.isDeadObjectIndex(I) &&
1426 return false;
1427 }
1428 }
1429
1430 return true;
1431}
1432#endif
1433
1435 int FI,
1436 Register &FrameReg) const {
1437 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1438
1439 FrameReg = RI->getFrameRegister(MF);
1441}
1442
1444 MachineFunction &MF,
1445 RegScavenger *RS) const {
1446 MachineFrameInfo &MFI = MF.getFrameInfo();
1447
1448 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1449 const SIInstrInfo *TII = ST.getInstrInfo();
1450 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1453
1454 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1456
1457 if (SpillVGPRToAGPR) {
1458 // To track the spill frame indices handled in this pass.
1459 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1460 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1461
1462 bool SeenDbgInstr = false;
1463
1464 for (MachineBasicBlock &MBB : MF) {
1466 int FrameIndex;
1467 if (MI.isDebugInstr())
1468 SeenDbgInstr = true;
1469
1470 if (TII->isVGPRSpill(MI)) {
1471 // Try to eliminate stack used by VGPR spills before frame
1472 // finalization.
1473 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1474 AMDGPU::OpName::vaddr);
1475 int FI = MI.getOperand(FIOp).getIndex();
1476 Register VReg =
1477 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1478 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1479 TRI->isAGPR(MRI, VReg))) {
1480 assert(RS != nullptr);
1482 RS->backward(std::next(MI.getIterator()));
1483 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1484 SpillFIs.set(FI);
1485 continue;
1486 }
1487 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1488 TII->isLoadFromStackSlot(MI, FrameIndex))
1489 if (!MFI.isFixedObjectIndex(FrameIndex))
1490 NonVGPRSpillFIs.set(FrameIndex);
1491 }
1492 }
1493
1494 // Stack slot coloring may assign different objects to the same stack slot.
1495 // If not, then the VGPR to AGPR spill slot is dead.
1496 for (unsigned FI : SpillFIs.set_bits())
1497 if (!NonVGPRSpillFIs.test(FI))
1498 FuncInfo->setVGPRToAGPRSpillDead(FI);
1499
1500 for (MachineBasicBlock &MBB : MF) {
1501 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1502 MBB.addLiveIn(Reg);
1503
1504 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1505 MBB.addLiveIn(Reg);
1506
1508
1509 if (!SpillFIs.empty() && SeenDbgInstr) {
1510 // FIXME: The dead frame indices are replaced with a null register from
1511 // the debug value instructions. We should instead, update it with the
1512 // correct register value. But not sure the register value alone is
1513 for (MachineInstr &MI : MBB) {
1514 if (MI.isDebugValue()) {
1515 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
1516 if (MI.getOperand(StackOperandIdx).isFI() &&
1517 !MFI.isFixedObjectIndex(
1518 MI.getOperand(StackOperandIdx).getIndex()) &&
1519 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
1520 MI.getOperand(StackOperandIdx)
1521 .ChangeToRegister(Register(), false /*isDef*/);
1522 }
1523 }
1524 }
1525 }
1526 }
1527 }
1528
1529 // At this point we've already allocated all spilled SGPRs to VGPRs if we
1530 // can. Any remaining SGPR spills will go to memory, so move them back to the
1531 // default stack.
1532 bool HaveSGPRToVMemSpill =
1533 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1535 "SGPR spill should have been removed in SILowerSGPRSpills");
1536
1537 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1538 // but currently hasNonSpillStackObjects is set only from source
1539 // allocas. Stack temps produced from legalization are not counted currently.
1540 if (!allStackObjectsAreDead(MFI)) {
1541 assert(RS && "RegScavenger required if spilling");
1542
1543 // Add an emergency spill slot
1544 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1545
1546 // If we are spilling SGPRs to memory with a large frame, we may need a
1547 // second VGPR emergency frame index.
1548 if (HaveSGPRToVMemSpill &&
1551 }
1552 }
1553}
1554
1556 MachineFunction &MF, RegScavenger *RS) const {
1557 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1558 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1561
1562 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1563 // On gfx908, we had initially reserved highest available VGPR for AGPR
1564 // copy. Now since we are done with RA, check if there exist an unused VGPR
1565 // which is lower than the eariler reserved VGPR before RA. If one exist,
1566 // use it for AGPR copy instead of one reserved before RA.
1567 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1568 Register UnusedLowVGPR =
1569 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1570 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1571 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1572 // Reserve this newly identified VGPR (for AGPR copy)
1573 // reserved registers should already be frozen at this point
1574 // so we can avoid calling MRI.freezeReservedRegs and just use
1575 // MRI.reserveReg
1576 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1577 MRI.reserveReg(UnusedLowVGPR, TRI);
1578 }
1579 }
1580 // We initally reserved the highest available SGPR pair for long branches
1581 // now, after RA, we shift down to a lower unused one if one exists
1582 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
1583 Register UnusedLowSGPR =
1584 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1585 // If LongBranchReservedReg is null then we didn't find a long branch
1586 // and never reserved a register to begin with so there is nothing to
1587 // shift down. Then if UnusedLowSGPR is null, there isn't available lower
1588 // register to use so just keep the original one we set.
1589 if (LongBranchReservedReg && UnusedLowSGPR) {
1590 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
1591 MRI.reserveReg(UnusedLowSGPR, TRI);
1592 }
1593}
1594
1595// The special SGPR spills like the one needed for FP, BP or any reserved
1596// registers delayed until frame lowering.
1598 MachineFunction &MF, BitVector &SavedVGPRs,
1599 bool NeedExecCopyReservedReg) const {
1600 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1603 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1604 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1605 LiveRegUnits LiveUnits;
1606 LiveUnits.init(*TRI);
1607 // Initially mark callee saved registers as used so we will not choose them
1608 // while looking for scratch SGPRs.
1609 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1610 for (unsigned I = 0; CSRegs[I]; ++I)
1611 LiveUnits.addReg(CSRegs[I]);
1612
1613 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1614
1615 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1616 if (NeedExecCopyReservedReg ||
1617 (ReservedRegForExecCopy &&
1618 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1619 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1620 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
1621 if (UnusedScratchReg) {
1622 // If found any unused scratch SGPR, reserve the register itself for Exec
1623 // copy and there is no need for any spills in that case.
1624 MFI->setSGPRForEXECCopy(UnusedScratchReg);
1625 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1626 LiveUnits.addReg(UnusedScratchReg);
1627 } else {
1628 // Needs spill.
1629 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
1630 "Re-reserving spill slot for EXEC copy register");
1631 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
1632 /*IncludeScratchCopy=*/false);
1633 }
1634 } else if (ReservedRegForExecCopy) {
1635 // Reset it at this point. There are no whole-wave copies and spills
1636 // encountered.
1637 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
1638 }
1639
1640 // hasFP only knows about stack objects that already exist. We're now
1641 // determining the stack slots that will be created, so we have to predict
1642 // them. Stack objects force FP usage with calls.
1643 //
1644 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1645 // don't want to report it here.
1646 //
1647 // FIXME: Is this really hasReservedCallFrame?
1648 const bool WillHaveFP =
1649 FrameInfo.hasCalls() &&
1650 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1651
1652 if (WillHaveFP || hasFP(MF)) {
1653 Register FramePtrReg = MFI->getFrameOffsetReg();
1654 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1655 "Re-reserving spill slot for FP");
1656 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
1657 }
1658
1659 if (TRI->hasBasePointer(MF)) {
1660 Register BasePtrReg = TRI->getBaseRegister();
1661 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1662 "Re-reserving spill slot for BP");
1663 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1664 }
1665}
1666
1667// Only report VGPRs to generic code.
1669 BitVector &SavedVGPRs,
1670 RegScavenger *RS) const {
1672
1673 // If this is a function with the amdgpu_cs_chain[_preserve] calling
1674 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
1675 // we don't need to save and restore anything.
1676 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
1677 return;
1678
1680
1681 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1682 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1683 const SIInstrInfo *TII = ST.getInstrInfo();
1684 bool NeedExecCopyReservedReg = false;
1685
1686 MachineInstr *ReturnMI = nullptr;
1687 for (MachineBasicBlock &MBB : MF) {
1688 for (MachineInstr &MI : MBB) {
1689 // TODO: Walking through all MBBs here would be a bad heuristic. Better
1690 // handle them elsewhere.
1691 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1692 NeedExecCopyReservedReg = true;
1693 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1694 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1695 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1696 (MFI->isChainFunction() &&
1697 TII->isChainCallOpcode(MI.getOpcode()))) {
1698 // We expect all return to be the same size.
1699 assert(!ReturnMI ||
1700 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1701 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1702 ReturnMI = &MI;
1703 }
1704 }
1705 }
1706
1707 SmallVector<Register> SortedWWMVGPRs;
1708 for (Register Reg : MFI->getWWMReservedRegs()) {
1709 // The shift-back is needed only for the VGPRs used for SGPR spills and they
1710 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
1711 // reserved registers.
1712 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1713 if (TRI->getRegSizeInBits(*RC) != 32)
1714 continue;
1715 SortedWWMVGPRs.push_back(Reg);
1716 }
1717
1718 sort(SortedWWMVGPRs, std::greater<Register>());
1719 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
1720
1721 if (MFI->isEntryFunction())
1722 return;
1723
1724 if (MFI->isWholeWaveFunction()) {
1725 // In practice, all the VGPRs are WWM registers, and we will need to save at
1726 // least their inactive lanes. Add them to WWMReservedRegs.
1727 assert(!NeedExecCopyReservedReg &&
1728 "Whole wave functions can use the reg mapped for their i1 argument");
1729
1730 // FIXME: Be more efficient!
1731 for (MCRegister Reg : AMDGPU::VGPR_32RegClass)
1732 if (MF.getRegInfo().isPhysRegModified(Reg)) {
1733 MFI->reserveWWMRegister(Reg);
1734 MF.begin()->addLiveIn(Reg);
1735 }
1736 MF.begin()->sortUniqueLiveIns();
1737 }
1738
1739 // Remove any VGPRs used in the return value because these do not need to be saved.
1740 // This prevents CSR restore from clobbering return VGPRs.
1741 if (ReturnMI) {
1742 for (auto &Op : ReturnMI->operands()) {
1743 if (Op.isReg())
1744 SavedVGPRs.reset(Op.getReg());
1745 }
1746 }
1747
1748 // Create the stack objects for WWM registers now.
1749 for (Register Reg : MFI->getWWMReservedRegs()) {
1750 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1751 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1752 TRI->getSpillAlign(*RC));
1753 }
1754
1755 // Ignore the SGPRs the default implementation found.
1756 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1757
1758 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1759 // In gfx908 there was do AGPR loads and stores and thus spilling also
1760 // require a temporary VGPR.
1761 if (!ST.hasGFX90AInsts())
1762 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1763
1764 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1765
1766 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1767 // allow the default insertion to handle them.
1768 for (auto &Reg : MFI->getWWMSpills())
1769 SavedVGPRs.reset(Reg.first);
1770}
1771
1773 BitVector &SavedRegs,
1774 RegScavenger *RS) const {
1777 if (MFI->isEntryFunction())
1778 return;
1779
1780 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1781 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1782
1783 // The SP is specifically managed and we don't want extra spills of it.
1784 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1785
1786 const BitVector AllSavedRegs = SavedRegs;
1787 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1788
1789 // We have to anticipate introducing CSR VGPR spills or spill of caller
1790 // save VGPR reserved for SGPR spills as we now always create stack entry
1791 // for it, if we don't have any stack objects already, since we require a FP
1792 // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1793 // there are any SGPR spills. Whether they are CSR spills or otherwise.
1794 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1795 const bool WillHaveFP =
1796 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1797
1798 // FP will be specially managed like SP.
1799 if (WillHaveFP || hasFP(MF))
1800 SavedRegs.reset(MFI->getFrameOffsetReg());
1801
1802 // Return address use with return instruction is hidden through the SI_RETURN
1803 // pseudo. Given that and since the IPRA computes actual register usage and
1804 // does not use CSR list, the clobbering of return address by function calls
1805 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1806 // usage collection. This will ensure save/restore of return address happens
1807 // in those scenarios.
1808 const MachineRegisterInfo &MRI = MF.getRegInfo();
1809 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1810 if (!MFI->isEntryFunction() &&
1811 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1812 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1813 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1814 }
1815}
1816
1818 const GCNSubtarget &ST,
1819 std::vector<CalleeSavedInfo> &CSI,
1820 unsigned &MinCSFrameIndex,
1821 unsigned &MaxCSFrameIndex) {
1823 MachineFrameInfo &MFI = MF.getFrameInfo();
1824 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1825
1826 assert(
1827 llvm::is_sorted(CSI,
1828 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
1829 return A.getReg() < B.getReg();
1830 }) &&
1831 "Callee saved registers not sorted");
1832
1833 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
1834 return !CSI.isSpilledToReg() &&
1835 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1836 !FuncInfo->isWWMReservedRegister(CSI.getReg());
1837 };
1838
1839 auto CSEnd = CSI.end();
1840 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1841 Register Reg = CSIt->getReg();
1842 if (!CanUseBlockOps(*CSIt))
1843 continue;
1844
1845 // Find all the regs that will fit in a 32-bit mask starting at the current
1846 // reg and build said mask. It should have 1 for every register that's
1847 // included, with the current register as the least significant bit.
1848 uint32_t Mask = 1;
1849 CSEnd = std::remove_if(
1850 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1851 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1852 Mask |= 1 << (CSI.getReg() - Reg);
1853 return true;
1854 } else {
1855 return false;
1856 }
1857 });
1858
1859 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
1860 Register RegBlock =
1861 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1862 if (!RegBlock) {
1863 // We couldn't find a super register for the block. This can happen if
1864 // the register we started with is too high (e.g. v232 if the maximum is
1865 // v255). We therefore try to get the last register block and figure out
1866 // the mask from there.
1867 Register LastBlockStart =
1868 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1869 RegBlock =
1870 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1871 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1872 "Couldn't find super register");
1873 int RegDelta = Reg - LastBlockStart;
1874 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
1875 "Bad shift amount");
1876 Mask <<= RegDelta;
1877 }
1878
1879 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
1880
1881 // The stack objects can be a bit smaller than the register block if we know
1882 // some of the high bits of Mask are 0. This may happen often with calling
1883 // conventions where the caller and callee-saved VGPRs are interleaved at
1884 // a small boundary (e.g. 8 or 16).
1885 int UnusedBits = llvm::countl_zero(Mask);
1886 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1887 int FrameIdx =
1888 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1889 /*isSpillSlot=*/true);
1890 if ((unsigned)FrameIdx < MinCSFrameIndex)
1891 MinCSFrameIndex = FrameIdx;
1892 if ((unsigned)FrameIdx > MaxCSFrameIndex)
1893 MaxCSFrameIndex = FrameIdx;
1894
1895 CSIt->setFrameIdx(FrameIdx);
1896 CSIt->setReg(RegBlock);
1897 }
1898 CSI.erase(CSEnd, CSI.end());
1899}
1900
1903 std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
1904 unsigned &MaxCSFrameIndex) const {
1905 if (CSI.empty())
1906 return true; // Early exit if no callee saved registers are modified!
1907
1908 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1909 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1910
1911 if (UseVGPRBlocks)
1912 assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);
1913
1914 return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks;
1915}
1916
1919 std::vector<CalleeSavedInfo> &CSI) const {
1920 if (CSI.empty())
1921 return true; // Early exit if no callee saved registers are modified!
1922
1923 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1924 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1925 const SIRegisterInfo *RI = ST.getRegisterInfo();
1926 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1927 Register BasePtrReg = RI->getBaseRegister();
1928 Register SGPRForFPSaveRestoreCopy =
1929 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1930 Register SGPRForBPSaveRestoreCopy =
1931 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1932 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1933 return false;
1934
1935 unsigned NumModifiedRegs = 0;
1936
1937 if (SGPRForFPSaveRestoreCopy)
1938 NumModifiedRegs++;
1939 if (SGPRForBPSaveRestoreCopy)
1940 NumModifiedRegs++;
1941
1942 for (auto &CS : CSI) {
1943 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1944 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1945 if (--NumModifiedRegs)
1946 break;
1947 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1948 SGPRForBPSaveRestoreCopy) {
1949 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1950 if (--NumModifiedRegs)
1951 break;
1952 }
1953 }
1954
1955 return false;
1956}
1957
1959 const MachineFunction &MF) const {
1960
1961 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1962 const MachineFrameInfo &MFI = MF.getFrameInfo();
1963 const SIInstrInfo *TII = ST.getInstrInfo();
1964 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1965 uint64_t MaxOffset = EstStackSize - 1;
1966
1967 // We need the emergency stack slots to be allocated in range of the
1968 // MUBUF/flat scratch immediate offset from the base register, so assign these
1969 // first at the incoming SP position.
1970 //
1971 // TODO: We could try sorting the objects to find a hole in the first bytes
1972 // rather than allocating as close to possible. This could save a lot of space
1973 // on frames with alignment requirements.
1974 if (ST.enableFlatScratch()) {
1975 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1977 return false;
1978 } else {
1979 if (TII->isLegalMUBUFImmOffset(MaxOffset))
1980 return false;
1981 }
1982
1983 return true;
1984}
1985
1990 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1991 if (!ST.useVGPRBlockOpsForCSR())
1992 return false;
1993
1994 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1996 const SIInstrInfo *TII = ST.getInstrInfo();
1998
1999 const TargetRegisterClass *BlockRegClass =
2000 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
2001 for (const CalleeSavedInfo &CS : CSI) {
2002 Register Reg = CS.getReg();
2003 if (!BlockRegClass->contains(Reg) ||
2004 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2006 continue;
2007 }
2008
2009 // Build a scratch block store.
2010 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2011 int FrameIndex = CS.getFrameIdx();
2012 MachinePointerInfo PtrInfo =
2013 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2014 MachineMemOperand *MMO =
2016 FrameInfo.getObjectSize(FrameIndex),
2017 FrameInfo.getObjectAlign(FrameIndex));
2018
2019 BuildMI(MBB, MI, MI->getDebugLoc(),
2020 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2021 .addReg(Reg, getKillRegState(false))
2022 .addFrameIndex(FrameIndex)
2024 .addImm(0)
2025 .addImm(Mask)
2026 .addMemOperand(MMO);
2027
2028 FuncInfo->setHasSpilledVGPRs();
2029
2030 // Add the register to the liveins. This is necessary because if any of the
2031 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2032 // then the whole block will be marked as reserved and `updateLiveness` will
2033 // skip it.
2034 MBB.addLiveIn(Reg);
2035 }
2037
2038 return true;
2039}
2040
2045 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2046 if (!ST.useVGPRBlockOpsForCSR())
2047 return false;
2048
2050 MachineFrameInfo &MFI = MF->getFrameInfo();
2051 const SIInstrInfo *TII = ST.getInstrInfo();
2052 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2053 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
2054 for (const CalleeSavedInfo &CS : reverse(CSI)) {
2055 Register Reg = CS.getReg();
2056 if (!BlockRegClass->contains(Reg) ||
2057 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2059 continue;
2060 }
2061
2062 // Build a scratch block load.
2063 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2064 int FrameIndex = CS.getFrameIdx();
2065 MachinePointerInfo PtrInfo =
2066 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2068 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
2069 MFI.getObjectAlign(FrameIndex));
2070
2071 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
2072 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2073 .addFrameIndex(FrameIndex)
2074 .addReg(FuncInfo->getStackPtrOffsetReg())
2075 .addImm(0)
2076 .addImm(Mask)
2077 .addMemOperand(MMO);
2078 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
2079
2080 // Add the register to the liveins. This is necessary because if any of the
2081 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2082 // then the whole block will be marked as reserved and `updateLiveness` will
2083 // skip it.
2084 MBB.addLiveIn(Reg);
2085 }
2086
2088 return true;
2089}
2090
2092 MachineFunction &MF,
2095 int64_t Amount = I->getOperand(0).getImm();
2096 if (Amount == 0)
2097 return MBB.erase(I);
2098
2099 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2100 const SIInstrInfo *TII = ST.getInstrInfo();
2101 const DebugLoc &DL = I->getDebugLoc();
2102 unsigned Opc = I->getOpcode();
2103 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2104 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2105
2106 if (!hasReservedCallFrame(MF)) {
2107 Amount = alignTo(Amount, getStackAlign());
2108 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2111
2112 Amount *= getScratchScaleFactor(ST);
2113 if (IsDestroy)
2114 Amount = -Amount;
2115 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
2116 .addReg(SPReg)
2117 .addImm(Amount);
2118 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
2119 } else if (CalleePopAmount != 0) {
2120 llvm_unreachable("is this used?");
2121 }
2122
2123 return MBB.erase(I);
2124}
2125
2126/// Returns true if the frame will require a reference to the stack pointer.
2127///
2128/// This is the set of conditions common to setting up the stack pointer in a
2129/// kernel, and for using a frame pointer in a callable function.
2130///
2131/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
2132/// references SP.
2134 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
2135}
2136
2137// The FP for kernels is always known 0, so we never really need to setup an
2138// explicit register for it. However, DisableFramePointerElim will force us to
2139// use a register for it.
2141 const MachineFrameInfo &MFI = MF.getFrameInfo();
2142
2143 // For entry & chain functions we can use an immediate offset in most cases,
2144 // so the presence of calls doesn't imply we need a distinct frame pointer.
2145 if (MFI.hasCalls() &&
2148 // All offsets are unsigned, so need to be addressed in the same direction
2149 // as stack growth.
2150
2151 // FIXME: This function is pretty broken, since it can be called before the
2152 // frame layout is determined or CSR spills are inserted.
2153 return MFI.getStackSize() != 0;
2154 }
2155
2156 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
2157 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
2158 MF) ||
2161}
2162
2164 const MachineFunction &MF) const {
2165 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
2168}
2169
2170// This is essentially a reduced version of hasFP for entry functions. Since the
2171// stack pointer is known 0 on entry to kernels, we never really need an FP
2172// register. We may need to initialize the stack pointer depending on the frame
2173// properties, which logically overlaps many of the cases where an ordinary
2174// function would require an FP.
2175// Also used for chain functions. While not technically entry functions, chain
2176// functions may need to set up a stack pointer in some situations.
2178 const MachineFunction &MF) const {
2179 // Callable functions always require a stack pointer reference.
2182 "only expected to call this for entry points and chain functions");
2183
2184 const MachineFrameInfo &MFI = MF.getFrameInfo();
2185
2186 // Entry points ordinarily don't need to initialize SP. We have to set it up
2187 // for callees if there are any. Also note tail calls are impossible/don't
2188 // make any sense for kernels.
2189 if (MFI.hasCalls())
2190 return true;
2191
2192 // We still need to initialize the SP if we're doing anything weird that
2193 // references the SP, like variable sized stack objects.
2194 return frameTriviallyRequiresSP(MFI);
2195}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall=false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex)
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static const int BlockSize
Definition: TarWriter.cpp:33
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:191
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & reset()
Definition: BitVector.h:392
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition: BitVector.h:725
BitVector & set()
Definition: BitVector.h:351
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:170
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:713
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition: BitVector.h:156
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
bool hasImplicitBufferPtr() const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:117
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:74
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:87
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:103
bool empty() const
Returns true if the set is empty.
Definition: LiveRegUnits.h:84
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MachineInstr & instr_back()
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
mop_range operands()
Definition: MachineInstr.h:693
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void backward()
Update internal register state and move MBB iterator backwards.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:102
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
bool empty() const
Definition: SmallVector.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:50
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:551
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1444
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1939
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:164
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.