LLVM 22.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "GCNSubtarget.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "frame-info"
22
24 "amdgpu-spill-vgpr-to-agpr",
25 cl::desc("Enable spilling VGPRs to AGPRs"),
27 cl::init(true));
28
29// Find a register matching \p RC from \p LiveUnits which is unused and
30// available throughout the function. On failure, returns AMDGPU::NoRegister.
31// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
32// MCRegisters. This should reduce the number of iterations and avoid redundant
33// checking.
35 const LiveRegUnits &LiveUnits,
36 const TargetRegisterClass &RC) {
37 for (MCRegister Reg : RC) {
38 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
39 !MRI.isReserved(Reg))
40 return Reg;
41 }
42 return MCRegister();
43}
44
45// Find a scratch register that we can use in the prologue. We avoid using
46// callee-save registers since they may appear to be free when this is called
47// from canUseAsPrologue (during shrink wrapping), but then no longer be free
48// when this is called from emitPrologue.
51 const TargetRegisterClass &RC, bool Unused = false) {
52 // Mark callee saved registers as used so we will not choose them.
53 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
54 for (unsigned i = 0; CSRegs[i]; ++i)
55 LiveUnits.addReg(CSRegs[i]);
56
57 // We are looking for a register that can be used throughout the entire
58 // function, so any use is unacceptable.
59 if (Unused)
60 return findUnusedRegister(MRI, LiveUnits, RC);
61
62 for (MCRegister Reg : RC) {
63 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
64 return Reg;
65 }
66
67 return MCRegister();
68}
69
70/// Query target location for spilling SGPRs
71/// \p IncludeScratchCopy : Also look for free scratch SGPRs
73 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
74 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
75 bool IncludeScratchCopy = true) {
77 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
78
79 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
80 const SIRegisterInfo *TRI = ST.getRegisterInfo();
81 unsigned Size = TRI->getSpillSize(RC);
82 Align Alignment = TRI->getSpillAlign(RC);
83
84 // We need to save and restore the given SGPR.
85
86 Register ScratchSGPR;
87 // 1: Try to save the given register into an unused scratch SGPR. The
88 // LiveUnits should have all the callee saved registers marked as used. For
89 // certain cases we skip copy to scratch SGPR.
90 if (IncludeScratchCopy)
91 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
92
93 if (!ScratchSGPR) {
94 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
96
97 if (TRI->spillSGPRToVGPR() &&
98 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
99 /*IsPrologEpilog=*/true)) {
100 // 2: There's no free lane to spill, and no free register to save the
101 // SGPR, so we're forced to take another VGPR to use for the spill.
105
106 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
107 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
108 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
109 << '\n';);
110 } else {
111 // Remove dead <FI> index
113 // 3: If all else fails, spill the register to memory.
114 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
116 SGPR,
118 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
119 << printReg(SGPR, TRI) << '\n');
120 }
121 } else {
125 LiveUnits.addReg(ScratchSGPR);
126 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
127 << printReg(ScratchSGPR, TRI) << '\n');
128 }
129}
130
131// We need to specially emit stack operations here because a different frame
132// register is used than in the rest of the function, as getFrameRegister would
133// use.
134static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
135 const SIMachineFunctionInfo &FuncInfo,
136 LiveRegUnits &LiveUnits, MachineFunction &MF,
139 Register SpillReg, int FI, Register FrameReg,
140 int64_t DwordOff = 0) {
141 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
143
144 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
147 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
148 FrameInfo.getObjectAlign(FI));
149 LiveUnits.addReg(SpillReg);
150 bool IsKill = !MBB.isLiveIn(SpillReg);
151 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
152 DwordOff, MMO, nullptr, &LiveUnits);
153 if (IsKill)
154 LiveUnits.removeReg(SpillReg);
155}
156
157static void buildEpilogRestore(const GCNSubtarget &ST,
158 const SIRegisterInfo &TRI,
159 const SIMachineFunctionInfo &FuncInfo,
160 LiveRegUnits &LiveUnits, MachineFunction &MF,
163 const DebugLoc &DL, Register SpillReg, int FI,
164 Register FrameReg, int64_t DwordOff = 0) {
165 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
167
168 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
171 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
172 FrameInfo.getObjectAlign(FI));
173 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
174 DwordOff, MMO, nullptr, &LiveUnits);
175}
176
178 const DebugLoc &DL, const SIInstrInfo *TII,
179 Register TargetReg) {
180 MachineFunction *MF = MBB.getParent();
182 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
183 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
184 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
185 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
186
187 if (MFI->getGITPtrHigh() != 0xffffffff) {
188 BuildMI(MBB, I, DL, SMovB32, TargetHi)
189 .addImm(MFI->getGITPtrHigh())
190 .addReg(TargetReg, RegState::ImplicitDefine);
191 } else {
192 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
193 BuildMI(MBB, I, DL, GetPC64, TargetReg);
194 }
195 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
196 MF->getRegInfo().addLiveIn(GitPtrLo);
197 MBB.addLiveIn(GitPtrLo);
198 BuildMI(MBB, I, DL, SMovB32, TargetLo)
199 .addReg(GitPtrLo);
200}
201
202static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
203 const SIMachineFunctionInfo *FuncInfo,
205 MachineBasicBlock::iterator MBBI, bool IsProlog) {
206 if (LiveUnits.empty()) {
207 LiveUnits.init(TRI);
208 if (IsProlog) {
209 LiveUnits.addLiveIns(MBB);
210 } else {
211 // In epilog.
212 LiveUnits.addLiveOuts(MBB);
213 LiveUnits.stepBackward(*MBBI);
214 }
215 }
216}
217
218namespace llvm {
219
220// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
221// BP, etc. These spills are delayed until the current function's frame is
222// finalized. For a given register, the builder uses the
223// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
227 MachineFunction &MF;
228 const GCNSubtarget &ST;
229 MachineFrameInfo &MFI;
230 SIMachineFunctionInfo *FuncInfo;
231 const SIInstrInfo *TII;
232 const SIRegisterInfo &TRI;
233 Register SuperReg;
235 LiveRegUnits &LiveUnits;
236 const DebugLoc &DL;
237 Register FrameReg;
238 ArrayRef<int16_t> SplitParts;
239 unsigned NumSubRegs;
240 unsigned EltSize = 4;
241
242 void saveToMemory(const int FI) const {
243 MachineRegisterInfo &MRI = MF.getRegInfo();
244 assert(!MFI.isDeadObjectIndex(FI));
245
246 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
247
249 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
250 if (!TmpVGPR)
251 report_fatal_error("failed to find free scratch register");
252
253 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
254 Register SubReg = NumSubRegs == 1
255 ? SuperReg
256 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
257 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
258 .addReg(SubReg);
259
260 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
261 FI, FrameReg, DwordOff);
262 DwordOff += 4;
263 }
264 }
265
266 void saveToVGPRLane(const int FI) const {
267 assert(!MFI.isDeadObjectIndex(FI));
268
269 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
271 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
272 assert(Spill.size() == NumSubRegs);
273
274 for (unsigned I = 0; I < NumSubRegs; ++I) {
275 Register SubReg = NumSubRegs == 1
276 ? SuperReg
277 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
278 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
279 Spill[I].VGPR)
280 .addReg(SubReg)
281 .addImm(Spill[I].Lane)
282 .addReg(Spill[I].VGPR, RegState::Undef);
283 }
284 }
285
286 void copyToScratchSGPR(Register DstReg) const {
287 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
288 .addReg(SuperReg)
290 }
291
292 void restoreFromMemory(const int FI) {
293 MachineRegisterInfo &MRI = MF.getRegInfo();
294
295 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
297 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
298 if (!TmpVGPR)
299 report_fatal_error("failed to find free scratch register");
300
301 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
302 Register SubReg = NumSubRegs == 1
303 ? SuperReg
304 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
305
306 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
307 TmpVGPR, FI, FrameReg, DwordOff);
308 MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
309 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
310 .addReg(TmpVGPR, RegState::Kill);
311 DwordOff += 4;
312 }
313 }
314
315 void restoreFromVGPRLane(const int FI) {
316 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
318 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
319 assert(Spill.size() == NumSubRegs);
320
321 for (unsigned I = 0; I < NumSubRegs; ++I) {
322 Register SubReg = NumSubRegs == 1
323 ? SuperReg
324 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
325 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
326 .addReg(Spill[I].VGPR)
327 .addImm(Spill[I].Lane);
328 }
329 }
330
331 void copyFromScratchSGPR(Register SrcReg) const {
332 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
333 .addReg(SrcReg)
335 }
336
337public:
342 const DebugLoc &DL, const SIInstrInfo *TII,
343 const SIRegisterInfo &TRI,
344 LiveRegUnits &LiveUnits, Register FrameReg)
345 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
346 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
347 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
348 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
349 FrameReg(FrameReg) {
350 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
351 SplitParts = TRI.getRegSplitParts(RC, EltSize);
352 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
353
354 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
355 }
356
357 void save() {
358 switch (SI.getKind()) {
360 return saveToMemory(SI.getIndex());
362 return saveToVGPRLane(SI.getIndex());
364 return copyToScratchSGPR(SI.getReg());
365 }
366 }
367
368 void restore() {
369 switch (SI.getKind()) {
371 return restoreFromMemory(SI.getIndex());
373 return restoreFromVGPRLane(SI.getIndex());
375 return copyFromScratchSGPR(SI.getReg());
376 }
377 }
378};
379
380} // namespace llvm
381
382// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
383void SIFrameLowering::emitEntryFunctionFlatScratchInit(
385 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
386 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
387 const SIInstrInfo *TII = ST.getInstrInfo();
388 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
389 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
390
391 // We don't need this if we only have spills since there is no user facing
392 // scratch.
393
394 // TODO: If we know we don't have flat instructions earlier, we can omit
395 // this from the input registers.
396 //
397 // TODO: We only need to know if we access scratch space through a flat
398 // pointer. Because we only detect if flat instructions are used at all,
399 // this will be used more often than necessary on VI.
400
401 Register FlatScrInitLo;
402 Register FlatScrInitHi;
403
404 if (ST.isAmdPalOS()) {
405 // Extract the scratch offset from the descriptor in the GIT
406 LiveRegUnits LiveUnits;
407 LiveUnits.init(*TRI);
408 LiveUnits.addLiveIns(MBB);
409
410 // Find unused reg to load flat scratch init into
411 MachineRegisterInfo &MRI = MF.getRegInfo();
412 Register FlatScrInit = AMDGPU::NoRegister;
413 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
414 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
415 AllSGPR64s = AllSGPR64s.slice(
416 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
417 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
418 for (MCPhysReg Reg : AllSGPR64s) {
419 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
420 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
421 FlatScrInit = Reg;
422 break;
423 }
424 }
425 assert(FlatScrInit && "Failed to find free register for scratch init");
426
427 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
428 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
429
430 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
431
432 // We now have the GIT ptr - now get the scratch descriptor from the entry
433 // at offset 0 (or offset 16 for a compute shader).
434 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
435 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
436 auto *MMO = MF.getMachineMemOperand(
437 PtrInfo,
440 8, Align(4));
441 unsigned Offset =
443 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
444 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
445 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
446 .addReg(FlatScrInit)
447 .addImm(EncodedOffset) // offset
448 .addImm(0) // cpol
449 .addMemOperand(MMO);
450
451 // Mask the offset in [47:0] of the descriptor
452 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
453 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
454 .addReg(FlatScrInitHi)
455 .addImm(0xffff);
456 And->getOperand(3).setIsDead(); // Mark SCC as dead.
457 } else {
458 Register FlatScratchInitReg =
460 assert(FlatScratchInitReg);
461
462 MachineRegisterInfo &MRI = MF.getRegInfo();
463 MRI.addLiveIn(FlatScratchInitReg);
464 MBB.addLiveIn(FlatScratchInitReg);
465
466 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
467 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
468 }
469
470 // Do a 64-bit pointer add.
471 if (ST.flatScratchIsPointer()) {
472 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
473 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
474 .addReg(FlatScrInitLo)
475 .addReg(ScratchWaveOffsetReg);
476 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
477 FlatScrInitHi)
478 .addReg(FlatScrInitHi)
479 .addImm(0);
480 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
481
482 using namespace AMDGPU::Hwreg;
483 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
484 .addReg(FlatScrInitLo)
485 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
486 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
487 .addReg(FlatScrInitHi)
488 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
489 return;
490 }
491
492 // For GFX9.
493 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
494 .addReg(FlatScrInitLo)
495 .addReg(ScratchWaveOffsetReg);
496 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
497 AMDGPU::FLAT_SCR_HI)
498 .addReg(FlatScrInitHi)
499 .addImm(0);
500 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
501
502 return;
503 }
504
505 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
506
507 // Copy the size in bytes.
508 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
509 .addReg(FlatScrInitHi, RegState::Kill);
510
511 // Add wave offset in bytes to private base offset.
512 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
513 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
514 .addReg(FlatScrInitLo)
515 .addReg(ScratchWaveOffsetReg);
516
517 // Convert offset to 256-byte units.
518 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
519 AMDGPU::FLAT_SCR_HI)
520 .addReg(FlatScrInitLo, RegState::Kill)
521 .addImm(8);
522 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
523}
524
525// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
526// memory. They should have been removed by now.
528 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
529 I != E; ++I) {
530 if (!MFI.isDeadObjectIndex(I))
531 return false;
532 }
533
534 return true;
535}
536
537// Shift down registers reserved for the scratch RSRC.
538Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
539 MachineFunction &MF) const {
540
541 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
542 const SIInstrInfo *TII = ST.getInstrInfo();
543 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
544 MachineRegisterInfo &MRI = MF.getRegInfo();
545 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
546
547 assert(MFI->isEntryFunction());
548
549 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
550
551 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
553 return Register();
554
555 if (ST.hasSGPRInitBug() ||
556 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
557 return ScratchRsrcReg;
558
559 // We reserved the last registers for this. Shift it down to the end of those
560 // which were actually used.
561 //
562 // FIXME: It might be safer to use a pseudoregister before replacement.
563
564 // FIXME: We should be able to eliminate unused input registers. We only
565 // cannot do this for the resources required for scratch access. For now we
566 // skip over user SGPRs and may leave unused holes.
567
568 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
569 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
570 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
571
572 // Skip the last N reserved elements because they should have already been
573 // reserved for VCC etc.
574 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
575 for (MCPhysReg Reg : AllSGPR128s) {
576 // Pick the first unallocated one. Make sure we don't clobber the other
577 // reserved input we needed. Also for PAL, make sure we don't clobber
578 // the GIT pointer passed in SGPR0 or SGPR8.
579 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
580 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
581 MRI.replaceRegWith(ScratchRsrcReg, Reg);
583 MRI.reserveReg(Reg, TRI);
584 return Reg;
585 }
586 }
587
588 return ScratchRsrcReg;
589}
590
591static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
592 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
593}
594
596 MachineBasicBlock &MBB) const {
597 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
598
599 // FIXME: If we only have SGPR spills, we won't actually be using scratch
600 // memory since these spill to VGPRs. We should be cleaning up these unused
601 // SGPR spill frame indices somewhere.
602
603 // FIXME: We still have implicit uses on SGPR spill instructions in case they
604 // need to spill to vector memory. It's likely that will not happen, but at
605 // this point it appears we need the setup. This part of the prolog should be
606 // emitted after frame indices are eliminated.
607
608 // FIXME: Remove all of the isPhysRegUsed checks
609
611 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
612 const SIInstrInfo *TII = ST.getInstrInfo();
613 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
615 const Function &F = MF.getFunction();
616 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
617
618 assert(MFI->isEntryFunction());
619
620 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
622
623 // We need to do the replacement of the private segment buffer register even
624 // if there are no stack objects. There could be stores to undef or a
625 // constant without an associated object.
626 //
627 // This will return `Register()` in cases where there are no actual
628 // uses of the SRSRC.
629 Register ScratchRsrcReg;
630 if (!ST.enableFlatScratch())
631 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
632
633 // Make the selected register live throughout the function.
634 if (ScratchRsrcReg) {
635 for (MachineBasicBlock &OtherBB : MF) {
636 if (&OtherBB != &MBB) {
637 OtherBB.addLiveIn(ScratchRsrcReg);
638 }
639 }
640 }
641
642 // Now that we have fixed the reserved SRSRC we need to locate the
643 // (potentially) preloaded SRSRC.
644 Register PreloadedScratchRsrcReg;
645 if (ST.isAmdHsaOrMesa(F)) {
646 PreloadedScratchRsrcReg =
648 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
649 // We added live-ins during argument lowering, but since they were not
650 // used they were deleted. We're adding the uses now, so add them back.
651 MRI.addLiveIn(PreloadedScratchRsrcReg);
652 MBB.addLiveIn(PreloadedScratchRsrcReg);
653 }
654 }
655
656 // Debug location must be unknown since the first debug location is used to
657 // determine the end of the prologue.
658 DebugLoc DL;
660
661 // We found the SRSRC first because it needs four registers and has an
662 // alignment requirement. If the SRSRC that we found is clobbering with
663 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
664 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
665 // wave offset to a free SGPR.
666 Register ScratchWaveOffsetReg;
667 if (PreloadedScratchWaveOffsetReg &&
668 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
669 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
670 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
671 AllSGPRs = AllSGPRs.slice(
672 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
673 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
674 for (MCPhysReg Reg : AllSGPRs) {
675 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
676 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
677 ScratchWaveOffsetReg = Reg;
678 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
679 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
680 break;
681 }
682 }
683
684 // FIXME: We can spill incoming arguments and restore at the end of the
685 // prolog.
686 if (!ScratchWaveOffsetReg)
688 "could not find temporary scratch offset register in prolog");
689 } else {
690 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
691 }
692 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
693
694 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
695 if (!mayReserveScratchForCWSR(MF)) {
696 if (hasFP(MF)) {
698 assert(FPReg != AMDGPU::FP_REG);
699 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
700 }
701
704 assert(SPReg != AMDGPU::SP_REG);
705 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
706 }
707 } else {
708 // We need to check if we're on a compute queue - if we are, then the CWSR
709 // trap handler may need to store some VGPRs on the stack. The first VGPR
710 // block is saved separately, so we only need to allocate space for any
711 // additional VGPR blocks used. For now, we will make sure there's enough
712 // room for the theoretical maximum number of VGPRs that can be allocated.
713 // FIXME: Figure out if the shader uses fewer VGPRs in practice.
714 assert(hasFP(MF));
716 assert(FPReg != AMDGPU::FP_REG);
717 unsigned VGPRSize = llvm::alignTo(
718 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -
720 MFI->getDynamicVGPRBlockSize())) *
721 4,
722 FrameInfo.getMaxAlign());
724
725 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), FPReg)
728 // The MicroEngine ID is 0 for the graphics queue, and 1 or 2 for compute
729 // (3 is unused, so we ignore it). Unfortunately, S_GETREG doesn't set
730 // SCC, so we need to check for 0 manually.
731 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32)).addImm(0).addReg(FPReg);
732 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), FPReg).addImm(VGPRSize);
735 assert(SPReg != AMDGPU::SP_REG);
736
737 // If at least one of the constants can be inlined, then we can use
738 // s_cselect. Otherwise, use a mov and cmovk.
739 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||
741 ST.hasInv2PiInlineImm())) {
742 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)
743 .addImm(Offset + VGPRSize)
744 .addImm(Offset);
745 } else {
746 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
747 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)
748 .addImm(Offset + VGPRSize);
749 }
750 }
751 }
752
753 bool NeedsFlatScratchInit =
755 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
756 (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
757
758 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
759 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
760 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
761 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
762 }
763
764 if (NeedsFlatScratchInit) {
765 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
766 }
767
768 if (ScratchRsrcReg) {
769 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
770 PreloadedScratchRsrcReg,
771 ScratchRsrcReg, ScratchWaveOffsetReg);
772 }
773}
774
775// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
776void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
778 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
779 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
780
781 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
782 const SIInstrInfo *TII = ST.getInstrInfo();
783 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
785 const Function &Fn = MF.getFunction();
786
787 if (ST.isAmdPalOS()) {
788 // The pointer to the GIT is formed from the offset passed in and either
789 // the amdgpu-git-ptr-high function attribute or the top part of the PC
790 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
791 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
792
793 buildGitPtr(MBB, I, DL, TII, Rsrc01);
794
795 // We now have the GIT ptr - now get the scratch descriptor from the entry
796 // at offset 0 (or offset 16 for a compute shader).
798 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
799 auto *MMO = MF.getMachineMemOperand(
800 PtrInfo,
803 16, Align(4));
804 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
805 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
806 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
807 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
808 .addReg(Rsrc01)
809 .addImm(EncodedOffset) // offset
810 .addImm(0) // cpol
811 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
812 .addMemOperand(MMO);
813
814 // The driver will always set the SRD for wave 64 (bits 118:117 of
815 // descriptor / bits 22:21 of third sub-reg will be 0b11)
816 // If the shader is actually wave32 we have to modify the const_index_stride
817 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
818 // reason the driver does this is that there can be cases where it presents
819 // 2 shaders with different wave size (e.g. VsFs).
820 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
821 if (ST.isWave32()) {
822 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
823 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
824 .addImm(21)
825 .addReg(Rsrc03);
826 }
827 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
828 assert(!ST.isAmdHsaOrMesa(Fn));
829 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
830
831 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
832 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
833
834 // Use relocations to get the pointer, and setup the other bits manually.
835 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
836
838 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
839
841 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
842
843 BuildMI(MBB, I, DL, Mov64, Rsrc01)
845 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
846 } else {
847 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
848
849 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
850 auto *MMO = MF.getMachineMemOperand(
851 PtrInfo,
854 8, Align(4));
855 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
857 .addImm(0) // offset
858 .addImm(0) // cpol
859 .addMemOperand(MMO)
860 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
861
864 }
865 } else {
866 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
867 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
868
869 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
870 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
871 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
872
873 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
874 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
875 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
876 }
877
878 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
879 .addImm(Lo_32(Rsrc23))
880 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
881
882 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
883 .addImm(Hi_32(Rsrc23))
884 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
885 } else if (ST.isAmdHsaOrMesa(Fn)) {
886 assert(PreloadedScratchRsrcReg);
887
888 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
889 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
890 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
891 }
892 }
893
894 // Add the scratch wave offset into the scratch RSRC.
895 //
896 // We only want to update the first 48 bits, which is the base address
897 // pointer, without touching the adjacent 16 bits of flags. We know this add
898 // cannot carry-out from bit 47, otherwise the scratch allocation would be
899 // impossible to fit in the 48-bit global address space.
900 //
901 // TODO: Evaluate if it is better to just construct an SRD using the flat
902 // scratch init and some constants rather than update the one we are passed.
903 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
904 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
905
906 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
907 // the kernel body via inreg arguments.
908 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
909 .addReg(ScratchRsrcSub0)
910 .addReg(ScratchWaveOffsetReg)
911 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
912 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
913 .addReg(ScratchRsrcSub1)
914 .addImm(0)
915 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
916 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
917}
918
920 switch (ID) {
924 return true;
927 return false;
928 }
929 llvm_unreachable("Invalid TargetStackID::Value");
930}
931
932// Activate only the inactive lanes when \p EnableInactiveLanes is true.
933// Otherwise, activate all lanes. It returns the saved exec.
935 MachineFunction &MF,
938 const DebugLoc &DL, bool IsProlog,
939 bool EnableInactiveLanes) {
940 Register ScratchExecCopy;
942 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
943 const SIInstrInfo *TII = ST.getInstrInfo();
944 const SIRegisterInfo &TRI = TII->getRegisterInfo();
946
947 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
948
949 if (FuncInfo->isWholeWaveFunction()) {
950 // Whole wave functions already have a copy of the original EXEC mask that
951 // we can use.
952 assert(IsProlog && "Epilog should look at return, not setup");
953 ScratchExecCopy =
954 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
955 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
956 } else {
957 ScratchExecCopy = findScratchNonCalleeSaveRegister(
958 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
959 }
960
961 if (!ScratchExecCopy)
962 report_fatal_error("failed to find free scratch register");
963
964 LiveUnits.addReg(ScratchExecCopy);
965
966 const unsigned SaveExecOpc =
967 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
968 : AMDGPU::S_OR_SAVEEXEC_B32)
969 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
970 : AMDGPU::S_OR_SAVEEXEC_B64);
971 auto SaveExec =
972 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
973 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
974
975 return ScratchExecCopy;
976}
977
981 Register FrameReg, Register FramePtrRegScratchCopy) const {
983 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
984 const SIInstrInfo *TII = ST.getInstrInfo();
985 const SIRegisterInfo &TRI = TII->getRegisterInfo();
987
988 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
989 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
990 // might end up flipping the EXEC bits twice.
991 Register ScratchExecCopy;
992 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
993 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
994 if (!WWMScratchRegs.empty())
995 ScratchExecCopy =
996 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
997 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
998
999 auto StoreWWMRegisters =
1001 for (const auto &Reg : WWMRegs) {
1002 Register VGPR = Reg.first;
1003 int FI = Reg.second;
1004 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1005 VGPR, FI, FrameReg);
1006 }
1007 };
1008
1009 for (const Register Reg : make_first_range(WWMScratchRegs)) {
1010 if (!MRI.isReserved(Reg)) {
1011 MRI.addLiveIn(Reg);
1012 MBB.addLiveIn(Reg);
1013 }
1014 }
1015 StoreWWMRegisters(WWMScratchRegs);
1016
1017 auto EnableAllLanes = [&]() {
1018 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1019 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1020 };
1021
1022 if (!WWMCalleeSavedRegs.empty()) {
1023 if (ScratchExecCopy) {
1024 EnableAllLanes();
1025 } else {
1026 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1027 /*IsProlog*/ true,
1028 /*EnableInactiveLanes*/ false);
1029 }
1030 }
1031
1032 StoreWWMRegisters(WWMCalleeSavedRegs);
1033 if (FuncInfo->isWholeWaveFunction()) {
1034 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose, so we can remove
1035 // it now. If we have already saved some WWM CSR registers, then the EXEC is
1036 // already -1 and we don't need to do anything else. Otherwise, set EXEC to
1037 // -1 here.
1038 if (!ScratchExecCopy)
1039 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
1040 /*EnableInactiveLanes*/ true);
1041 else if (WWMCalleeSavedRegs.empty())
1042 EnableAllLanes();
1043 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1044 } else if (ScratchExecCopy) {
1045 // FIXME: Split block and make terminator.
1046 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1047 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1048 .addReg(ScratchExecCopy, RegState::Kill);
1049 LiveUnits.addReg(ScratchExecCopy);
1050 }
1051
1052 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1053
1054 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1055 // Special handle FP spill:
1056 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
1057 // Otherwise, FP has been moved to a temporary register and spill it
1058 // instead.
1059 Register Reg =
1060 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1061 if (!Reg)
1062 continue;
1063
1064 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1065 LiveUnits, FrameReg);
1066 SB.save();
1067 }
1068
1069 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
1070 // such scratch registers live throughout the function.
1071 SmallVector<Register, 1> ScratchSGPRs;
1072 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
1073 if (!ScratchSGPRs.empty()) {
1074 for (MachineBasicBlock &MBB : MF) {
1075 for (MCPhysReg Reg : ScratchSGPRs)
1076 MBB.addLiveIn(Reg);
1077
1078 MBB.sortUniqueLiveIns();
1079 }
1080 if (!LiveUnits.empty()) {
1081 for (MCPhysReg Reg : ScratchSGPRs)
1082 LiveUnits.addReg(Reg);
1083 }
1084 }
1085}
1086
1090 Register FrameReg, Register FramePtrRegScratchCopy) const {
1091 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1092 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1093 const SIInstrInfo *TII = ST.getInstrInfo();
1094 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1095 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1096
1097 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1098 // Special handle FP restore:
1099 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1100 // the FP value to a temporary register. The frame pointer should be
1101 // overwritten only at the end when all other spills are restored from
1102 // current frame.
1103 Register Reg =
1104 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1105 if (!Reg)
1106 continue;
1107
1108 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1109 LiveUnits, FrameReg);
1110 SB.restore();
1111 }
1112
1113 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1114 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1115 // this, we might end up flipping the EXEC bits twice.
1116 Register ScratchExecCopy;
1117 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1118 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1119 auto RestoreWWMRegisters =
1121 for (const auto &Reg : WWMRegs) {
1122 Register VGPR = Reg.first;
1123 int FI = Reg.second;
1124 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1125 VGPR, FI, FrameReg);
1126 }
1127 };
1128
1129 if (FuncInfo->isWholeWaveFunction()) {
1130 // For whole wave functions, the EXEC is already -1 at this point.
1131 // Therefore, we can restore the CSR WWM registers right away.
1132 RestoreWWMRegisters(WWMCalleeSavedRegs);
1133
1134 // The original EXEC is the first operand of the return instruction.
1135 MachineInstr &Return = MBB.instr_back();
1136 unsigned Opcode = Return.getOpcode();
1137 switch (Opcode) {
1138 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1139 Opcode = AMDGPU::SI_RETURN;
1140 break;
1141 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1142 Opcode = AMDGPU::SI_TCRETURN_GFX;
1143 break;
1144 default:
1145 llvm_unreachable("Unexpected return inst");
1146 }
1147 Register OrigExec = Return.getOperand(0).getReg();
1148
1149 if (!WWMScratchRegs.empty()) {
1150 unsigned XorOpc = ST.isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64;
1151 BuildMI(MBB, MBBI, DL, TII->get(XorOpc), TRI.getExec())
1152 .addReg(OrigExec)
1153 .addImm(-1);
1154 RestoreWWMRegisters(WWMScratchRegs);
1155 }
1156
1157 // Restore original EXEC.
1158 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1159 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);
1160
1161 // Drop the first operand and update the opcode.
1162 Return.removeOperand(0);
1163 Return.setDesc(TII->get(Opcode));
1164
1165 return;
1166 }
1167
1168 if (!WWMScratchRegs.empty()) {
1169 ScratchExecCopy =
1170 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1171 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);
1172 }
1173 RestoreWWMRegisters(WWMScratchRegs);
1174 if (!WWMCalleeSavedRegs.empty()) {
1175 if (ScratchExecCopy) {
1176 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1177 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1178 } else {
1179 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1180 /*IsProlog*/ false,
1181 /*EnableInactiveLanes*/ false);
1182 }
1183 }
1184
1185 RestoreWWMRegisters(WWMCalleeSavedRegs);
1186 if (ScratchExecCopy) {
1187 // FIXME: Split block and make terminator.
1188 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1189 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1190 .addReg(ScratchExecCopy, RegState::Kill);
1191 }
1192}
1193
1195 MachineBasicBlock &MBB) const {
1197 if (FuncInfo->isEntryFunction()) {
1199 return;
1200 }
1201
1202 MachineFrameInfo &MFI = MF.getFrameInfo();
1203 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1204 const SIInstrInfo *TII = ST.getInstrInfo();
1205 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1207
1208 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1209 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1210 Register BasePtrReg =
1211 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1212 LiveRegUnits LiveUnits;
1213
1215 // DebugLoc must be unknown since the first instruction with DebugLoc is used
1216 // to determine the end of the prologue.
1217 DebugLoc DL;
1218
1219 if (FuncInfo->isChainFunction()) {
1220 // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
1221 // are free to set one up if they need it.
1222 bool UseSP = requiresStackPointerReference(MF);
1223 if (UseSP) {
1224 assert(StackPtrReg != AMDGPU::SP_REG);
1225
1226 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
1228 }
1229 }
1230
1231 bool HasFP = false;
1232 bool HasBP = false;
1233 uint32_t NumBytes = MFI.getStackSize();
1234 uint32_t RoundedSize = NumBytes;
1235
1236 if (TRI.hasStackRealignment(MF))
1237 HasFP = true;
1238
1239 Register FramePtrRegScratchCopy;
1240 if (!HasFP && !hasFP(MF)) {
1241 // Emit the CSR spill stores with SP base register.
1242 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
1243 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1244 FramePtrRegScratchCopy);
1245 } else {
1246 // CSR spill stores will use FP as base register.
1247 Register SGPRForFPSaveRestoreCopy =
1248 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1249
1250 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1251 if (SGPRForFPSaveRestoreCopy) {
1252 // Copy FP to the scratch register now and emit the CFI entry. It avoids
1253 // the extra FP copy needed in the other two cases when FP is spilled to
1254 // memory or to a VGPR lane.
1256 FramePtrReg,
1257 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
1258 DL, TII, TRI, LiveUnits, FramePtrReg);
1259 SB.save();
1260 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1261 } else {
1262 // Copy FP into a new scratch register so that its previous value can be
1263 // spilled after setting up the new frame.
1264 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1265 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1266 if (!FramePtrRegScratchCopy)
1267 report_fatal_error("failed to find free scratch register");
1268
1269 LiveUnits.addReg(FramePtrRegScratchCopy);
1270 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1271 .addReg(FramePtrReg);
1272 }
1273 }
1274
1275 if (HasFP) {
1276 const unsigned Alignment = MFI.getMaxAlign().value();
1277
1278 RoundedSize += Alignment;
1279 if (LiveUnits.empty()) {
1280 LiveUnits.init(TRI);
1281 LiveUnits.addLiveIns(MBB);
1282 }
1283
1284 // s_add_i32 s33, s32, NumBytes
1285 // s_and_b32 s33, s33, 0b111...0000
1286 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
1287 .addReg(StackPtrReg)
1288 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
1290 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1291 .addReg(FramePtrReg, RegState::Kill)
1292 .addImm(-Alignment * getScratchScaleFactor(ST))
1294 And->getOperand(3).setIsDead(); // Mark SCC as dead.
1295 FuncInfo->setIsStackRealigned(true);
1296 } else if ((HasFP = hasFP(MF))) {
1297 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1298 .addReg(StackPtrReg)
1300 }
1301
1302 // If FP is used, emit the CSR spills with FP base register.
1303 if (HasFP) {
1304 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1305 FramePtrRegScratchCopy);
1306 if (FramePtrRegScratchCopy)
1307 LiveUnits.removeReg(FramePtrRegScratchCopy);
1308 }
1309
1310 // If we need a base pointer, set it up here. It's whatever the value of
1311 // the stack pointer is at this point. Any variable size objects will be
1312 // allocated after this, so we can still use the base pointer to reference
1313 // the incoming arguments.
1314 if ((HasBP = TRI.hasBasePointer(MF))) {
1315 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1316 .addReg(StackPtrReg)
1318 }
1319
1320 if (HasFP && RoundedSize != 0) {
1321 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1322 .addReg(StackPtrReg)
1323 .addImm(RoundedSize * getScratchScaleFactor(ST))
1325 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1326 }
1327
1328 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1329 (void)FPSaved;
1330 assert((!HasFP || FPSaved) &&
1331 "Needed to save FP but didn't save it anywhere");
1332
1333 // If we allow spilling to AGPRs we may have saved FP but then spill
1334 // everything into AGPRs instead of the stack.
1335 assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
1336 "Saved FP but didn't need it");
1337
1338 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1339 (void)BPSaved;
1340 assert((!HasBP || BPSaved) &&
1341 "Needed to save BP but didn't save it anywhere");
1342
1343 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1344}
1345
1347 MachineBasicBlock &MBB) const {
1348 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1349 if (FuncInfo->isEntryFunction())
1350 return;
1351
1352 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1353 const SIInstrInfo *TII = ST.getInstrInfo();
1354 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1356 LiveRegUnits LiveUnits;
1357 // Get the insert location for the epilogue. If there were no terminators in
1358 // the block, get the last instruction.
1360 DebugLoc DL;
1361 if (!MBB.empty()) {
1362 MBBI = MBB.getLastNonDebugInstr();
1363 if (MBBI != MBB.end())
1364 DL = MBBI->getDebugLoc();
1365
1366 MBBI = MBB.getFirstTerminator();
1367 }
1368
1369 const MachineFrameInfo &MFI = MF.getFrameInfo();
1370 uint32_t NumBytes = MFI.getStackSize();
1371 uint32_t RoundedSize = FuncInfo->isStackRealigned()
1372 ? NumBytes + MFI.getMaxAlign().value()
1373 : NumBytes;
1374 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1375 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1376 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1377
1378 if (RoundedSize != 0) {
1379 if (TRI.hasBasePointer(MF)) {
1380 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1381 .addReg(TRI.getBaseRegister())
1383 } else if (hasFP(MF)) {
1384 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1385 .addReg(FramePtrReg)
1387 }
1388 }
1389
1390 Register FramePtrRegScratchCopy;
1391 Register SGPRForFPSaveRestoreCopy =
1392 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1393 if (FPSaved) {
1394 // CSR spill restores should use FP as base register. If
1395 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1396 // into a new scratch register and copy to FP later when other registers are
1397 // restored from the current stack frame.
1398 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1399 if (SGPRForFPSaveRestoreCopy) {
1400 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1401 } else {
1402 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1403 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1404 if (!FramePtrRegScratchCopy)
1405 report_fatal_error("failed to find free scratch register");
1406
1407 LiveUnits.addReg(FramePtrRegScratchCopy);
1408 }
1409
1410 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1411 FramePtrRegScratchCopy);
1412 }
1413
1414 if (FPSaved) {
1415 // Insert the copy to restore FP.
1416 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1417 : FramePtrRegScratchCopy;
1419 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1420 .addReg(SrcReg);
1421 if (SGPRForFPSaveRestoreCopy)
1423 } else {
1424 // Insert the CSR spill restores with SP as the base register.
1425 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits,
1426 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1427 FramePtrRegScratchCopy);
1428 }
1429}
1430
1431#ifndef NDEBUG
1433 const MachineFrameInfo &MFI = MF.getFrameInfo();
1434 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1435 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1436 I != E; ++I) {
1437 if (!MFI.isDeadObjectIndex(I) &&
1440 return false;
1441 }
1442 }
1443
1444 return true;
1445}
1446#endif
1447
1449 int FI,
1450 Register &FrameReg) const {
1451 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1452
1453 FrameReg = RI->getFrameRegister(MF);
1455}
1456
1458 MachineFunction &MF,
1459 RegScavenger *RS) const {
1460 MachineFrameInfo &MFI = MF.getFrameInfo();
1461
1462 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1463 const SIInstrInfo *TII = ST.getInstrInfo();
1464 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1467
1468 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1470
1471 if (SpillVGPRToAGPR) {
1472 // To track the spill frame indices handled in this pass.
1473 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1474 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1475
1476 bool SeenDbgInstr = false;
1477
1478 for (MachineBasicBlock &MBB : MF) {
1480 int FrameIndex;
1481 if (MI.isDebugInstr())
1482 SeenDbgInstr = true;
1483
1484 if (TII->isVGPRSpill(MI)) {
1485 // Try to eliminate stack used by VGPR spills before frame
1486 // finalization.
1487 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1488 AMDGPU::OpName::vaddr);
1489 int FI = MI.getOperand(FIOp).getIndex();
1490 Register VReg =
1491 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1492 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1493 TRI->isAGPR(MRI, VReg))) {
1494 assert(RS != nullptr);
1496 RS->backward(std::next(MI.getIterator()));
1497 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1498 SpillFIs.set(FI);
1499 continue;
1500 }
1501 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1502 TII->isLoadFromStackSlot(MI, FrameIndex))
1503 if (!MFI.isFixedObjectIndex(FrameIndex))
1504 NonVGPRSpillFIs.set(FrameIndex);
1505 }
1506 }
1507
1508 // Stack slot coloring may assign different objects to the same stack slot.
1509 // If not, then the VGPR to AGPR spill slot is dead.
1510 for (unsigned FI : SpillFIs.set_bits())
1511 if (!NonVGPRSpillFIs.test(FI))
1512 FuncInfo->setVGPRToAGPRSpillDead(FI);
1513
1514 for (MachineBasicBlock &MBB : MF) {
1515 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1516 MBB.addLiveIn(Reg);
1517
1518 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1519 MBB.addLiveIn(Reg);
1520
1521 MBB.sortUniqueLiveIns();
1522
1523 if (!SpillFIs.empty() && SeenDbgInstr) {
1524 // FIXME: The dead frame indices are replaced with a null register from
1525 // the debug value instructions. We should instead, update it with the
1526 // correct register value. But not sure the register value alone is
1527 for (MachineInstr &MI : MBB) {
1528 if (MI.isDebugValue()) {
1529 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
1530 if (MI.getOperand(StackOperandIdx).isFI() &&
1531 !MFI.isFixedObjectIndex(
1532 MI.getOperand(StackOperandIdx).getIndex()) &&
1533 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
1534 MI.getOperand(StackOperandIdx)
1535 .ChangeToRegister(Register(), false /*isDef*/);
1536 }
1537 }
1538 }
1539 }
1540 }
1541 }
1542
1543 // At this point we've already allocated all spilled SGPRs to VGPRs if we
1544 // can. Any remaining SGPR spills will go to memory, so move them back to the
1545 // default stack.
1546 bool HaveSGPRToVMemSpill =
1547 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1549 "SGPR spill should have been removed in SILowerSGPRSpills");
1550
1551 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1552 // but currently hasNonSpillStackObjects is set only from source
1553 // allocas. Stack temps produced from legalization are not counted currently.
1554 if (!allStackObjectsAreDead(MFI)) {
1555 assert(RS && "RegScavenger required if spilling");
1556
1557 // Add an emergency spill slot
1558 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1559
1560 // If we are spilling SGPRs to memory with a large frame, we may need a
1561 // second VGPR emergency frame index.
1562 if (HaveSGPRToVMemSpill &&
1565 }
1566 }
1567}
1568
1570 MachineFunction &MF, RegScavenger *RS) const {
1571 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1572 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1575
1576 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1577 // On gfx908, we had initially reserved highest available VGPR for AGPR
1578 // copy. Now since we are done with RA, check if there exist an unused VGPR
1579 // which is lower than the eariler reserved VGPR before RA. If one exist,
1580 // use it for AGPR copy instead of one reserved before RA.
1581 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1582 Register UnusedLowVGPR =
1583 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1584 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1585 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1586 // Reserve this newly identified VGPR (for AGPR copy)
1587 // reserved registers should already be frozen at this point
1588 // so we can avoid calling MRI.freezeReservedRegs and just use
1589 // MRI.reserveReg
1590 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1591 MRI.reserveReg(UnusedLowVGPR, TRI);
1592 }
1593 }
1594 // We initally reserved the highest available SGPR pair for long branches
1595 // now, after RA, we shift down to a lower unused one if one exists
1596 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
1597 Register UnusedLowSGPR =
1598 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1599 // If LongBranchReservedReg is null then we didn't find a long branch
1600 // and never reserved a register to begin with so there is nothing to
1601 // shift down. Then if UnusedLowSGPR is null, there isn't available lower
1602 // register to use so just keep the original one we set.
1603 if (LongBranchReservedReg && UnusedLowSGPR) {
1604 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
1605 MRI.reserveReg(UnusedLowSGPR, TRI);
1606 }
1607}
1608
1609// The special SGPR spills like the one needed for FP, BP or any reserved
1610// registers delayed until frame lowering.
1612 MachineFunction &MF, BitVector &SavedVGPRs,
1613 bool NeedExecCopyReservedReg) const {
1614 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1617 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1618 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1619 LiveRegUnits LiveUnits;
1620 LiveUnits.init(*TRI);
1621 // Initially mark callee saved registers as used so we will not choose them
1622 // while looking for scratch SGPRs.
1623 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1624 for (unsigned I = 0; CSRegs[I]; ++I)
1625 LiveUnits.addReg(CSRegs[I]);
1626
1627 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1628
1629 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1630 if (NeedExecCopyReservedReg ||
1631 (ReservedRegForExecCopy &&
1632 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1633 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1634 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
1635 if (UnusedScratchReg) {
1636 // If found any unused scratch SGPR, reserve the register itself for Exec
1637 // copy and there is no need for any spills in that case.
1638 MFI->setSGPRForEXECCopy(UnusedScratchReg);
1639 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1640 LiveUnits.addReg(UnusedScratchReg);
1641 } else {
1642 // Needs spill.
1643 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
1644 "Re-reserving spill slot for EXEC copy register");
1645 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
1646 /*IncludeScratchCopy=*/false);
1647 }
1648 } else if (ReservedRegForExecCopy) {
1649 // Reset it at this point. There are no whole-wave copies and spills
1650 // encountered.
1651 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
1652 }
1653
1654 // hasFP only knows about stack objects that already exist. We're now
1655 // determining the stack slots that will be created, so we have to predict
1656 // them. Stack objects force FP usage with calls.
1657 //
1658 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1659 // don't want to report it here.
1660 //
1661 // FIXME: Is this really hasReservedCallFrame?
1662 const bool WillHaveFP =
1663 FrameInfo.hasCalls() &&
1664 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1665
1666 if (WillHaveFP || hasFP(MF)) {
1667 Register FramePtrReg = MFI->getFrameOffsetReg();
1668 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1669 "Re-reserving spill slot for FP");
1670 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
1671 }
1672
1673 if (TRI->hasBasePointer(MF)) {
1674 Register BasePtrReg = TRI->getBaseRegister();
1675 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1676 "Re-reserving spill slot for BP");
1677 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1678 }
1679}
1680
1681// Only report VGPRs to generic code.
1683 BitVector &SavedVGPRs,
1684 RegScavenger *RS) const {
1686
1687 // If this is a function with the amdgpu_cs_chain[_preserve] calling
1688 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
1689 // we don't need to save and restore anything.
1690 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
1691 return;
1692
1694
1695 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1696 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1697 const SIInstrInfo *TII = ST.getInstrInfo();
1698 bool NeedExecCopyReservedReg = false;
1699
1700 MachineInstr *ReturnMI = nullptr;
1701 for (MachineBasicBlock &MBB : MF) {
1702 for (MachineInstr &MI : MBB) {
1703 // TODO: Walking through all MBBs here would be a bad heuristic. Better
1704 // handle them elsewhere.
1705 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1706 NeedExecCopyReservedReg = true;
1707 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1708 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1709 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1710 (MFI->isChainFunction() &&
1711 TII->isChainCallOpcode(MI.getOpcode()))) {
1712 // We expect all return to be the same size.
1713 assert(!ReturnMI ||
1714 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1715 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1716 ReturnMI = &MI;
1717 }
1718 }
1719 }
1720
1721 SmallVector<Register> SortedWWMVGPRs;
1722 for (Register Reg : MFI->getWWMReservedRegs()) {
1723 // The shift-back is needed only for the VGPRs used for SGPR spills and they
1724 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
1725 // reserved registers.
1726 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1727 if (TRI->getRegSizeInBits(*RC) != 32)
1728 continue;
1729 SortedWWMVGPRs.push_back(Reg);
1730 }
1731
1732 sort(SortedWWMVGPRs, std::greater<Register>());
1733 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
1734
1735 if (MFI->isEntryFunction())
1736 return;
1737
1738 if (MFI->isWholeWaveFunction()) {
1739 // In practice, all the VGPRs are WWM registers, and we will need to save at
1740 // least their inactive lanes. Add them to WWMReservedRegs.
1741 assert(!NeedExecCopyReservedReg &&
1742 "Whole wave functions can use the reg mapped for their i1 argument");
1743
1744 // FIXME: Be more efficient!
1745 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
1746 for (MCRegister Reg :
1747 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1748 if (MF.getRegInfo().isPhysRegModified(Reg)) {
1749 MFI->reserveWWMRegister(Reg);
1750 MF.begin()->addLiveIn(Reg);
1751 }
1752 MF.begin()->sortUniqueLiveIns();
1753 }
1754
1755 // Remove any VGPRs used in the return value because these do not need to be saved.
1756 // This prevents CSR restore from clobbering return VGPRs.
1757 if (ReturnMI) {
1758 for (auto &Op : ReturnMI->operands()) {
1759 if (Op.isReg())
1760 SavedVGPRs.reset(Op.getReg());
1761 }
1762 }
1763
1764 // Create the stack objects for WWM registers now.
1765 for (Register Reg : MFI->getWWMReservedRegs()) {
1766 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1767 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1768 TRI->getSpillAlign(*RC));
1769 }
1770
1771 // Ignore the SGPRs the default implementation found.
1772 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1773
1774 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1775 // In gfx908 there was do AGPR loads and stores and thus spilling also
1776 // require a temporary VGPR.
1777 if (!ST.hasGFX90AInsts())
1778 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1779
1780 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1781
1782 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1783 // allow the default insertion to handle them.
1784 for (auto &Reg : MFI->getWWMSpills())
1785 SavedVGPRs.reset(Reg.first);
1786}
1787
1789 BitVector &SavedRegs,
1790 RegScavenger *RS) const {
1793 if (MFI->isEntryFunction())
1794 return;
1795
1796 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1797 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1798
1799 // The SP is specifically managed and we don't want extra spills of it.
1800 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1801
1802 const BitVector AllSavedRegs = SavedRegs;
1803 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1804
1805 // We have to anticipate introducing CSR VGPR spills or spill of caller
1806 // save VGPR reserved for SGPR spills as we now always create stack entry
1807 // for it, if we don't have any stack objects already, since we require a FP
1808 // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1809 // there are any SGPR spills. Whether they are CSR spills or otherwise.
1810 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1811 const bool WillHaveFP =
1812 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1813
1814 // FP will be specially managed like SP.
1815 if (WillHaveFP || hasFP(MF))
1816 SavedRegs.reset(MFI->getFrameOffsetReg());
1817
1818 // Return address use with return instruction is hidden through the SI_RETURN
1819 // pseudo. Given that and since the IPRA computes actual register usage and
1820 // does not use CSR list, the clobbering of return address by function calls
1821 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1822 // usage collection. This will ensure save/restore of return address happens
1823 // in those scenarios.
1824 const MachineRegisterInfo &MRI = MF.getRegInfo();
1825 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1826 if (!MFI->isEntryFunction() &&
1827 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1828 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1829 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1830 }
1831}
1832
1834 const GCNSubtarget &ST,
1835 std::vector<CalleeSavedInfo> &CSI,
1836 unsigned &MinCSFrameIndex,
1837 unsigned &MaxCSFrameIndex) {
1839 MachineFrameInfo &MFI = MF.getFrameInfo();
1840 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1841
1842 assert(
1843 llvm::is_sorted(CSI,
1844 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
1845 return A.getReg() < B.getReg();
1846 }) &&
1847 "Callee saved registers not sorted");
1848
1849 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
1850 return !CSI.isSpilledToReg() &&
1851 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1852 !FuncInfo->isWWMReservedRegister(CSI.getReg());
1853 };
1854
1855 auto CSEnd = CSI.end();
1856 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1857 Register Reg = CSIt->getReg();
1858 if (!CanUseBlockOps(*CSIt))
1859 continue;
1860
1861 // Find all the regs that will fit in a 32-bit mask starting at the current
1862 // reg and build said mask. It should have 1 for every register that's
1863 // included, with the current register as the least significant bit.
1864 uint32_t Mask = 1;
1865 CSEnd = std::remove_if(
1866 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1867 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1868 Mask |= 1 << (CSI.getReg() - Reg);
1869 return true;
1870 } else {
1871 return false;
1872 }
1873 });
1874
1875 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
1876 Register RegBlock =
1877 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1878 if (!RegBlock) {
1879 // We couldn't find a super register for the block. This can happen if
1880 // the register we started with is too high (e.g. v232 if the maximum is
1881 // v255). We therefore try to get the last register block and figure out
1882 // the mask from there.
1883 Register LastBlockStart =
1884 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1885 RegBlock =
1886 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1887 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1888 "Couldn't find super register");
1889 int RegDelta = Reg - LastBlockStart;
1890 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
1891 "Bad shift amount");
1892 Mask <<= RegDelta;
1893 }
1894
1895 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
1896
1897 // The stack objects can be a bit smaller than the register block if we know
1898 // some of the high bits of Mask are 0. This may happen often with calling
1899 // conventions where the caller and callee-saved VGPRs are interleaved at
1900 // a small boundary (e.g. 8 or 16).
1901 int UnusedBits = llvm::countl_zero(Mask);
1902 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1903 int FrameIdx =
1904 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1905 /*isSpillSlot=*/true);
1906 if ((unsigned)FrameIdx < MinCSFrameIndex)
1907 MinCSFrameIndex = FrameIdx;
1908 if ((unsigned)FrameIdx > MaxCSFrameIndex)
1909 MaxCSFrameIndex = FrameIdx;
1910
1911 CSIt->setFrameIdx(FrameIdx);
1912 CSIt->setReg(RegBlock);
1913 }
1914 CSI.erase(CSEnd, CSI.end());
1915}
1916
1919 std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
1920 unsigned &MaxCSFrameIndex) const {
1921 if (CSI.empty())
1922 return true; // Early exit if no callee saved registers are modified!
1923
1924 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1925 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1926
1927 if (UseVGPRBlocks)
1928 assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);
1929
1930 return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks;
1931}
1932
1935 std::vector<CalleeSavedInfo> &CSI) const {
1936 if (CSI.empty())
1937 return true; // Early exit if no callee saved registers are modified!
1938
1939 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1940 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1941 const SIRegisterInfo *RI = ST.getRegisterInfo();
1942 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1943 Register BasePtrReg = RI->getBaseRegister();
1944 Register SGPRForFPSaveRestoreCopy =
1945 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1946 Register SGPRForBPSaveRestoreCopy =
1947 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1948 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1949 return false;
1950
1951 unsigned NumModifiedRegs = 0;
1952
1953 if (SGPRForFPSaveRestoreCopy)
1954 NumModifiedRegs++;
1955 if (SGPRForBPSaveRestoreCopy)
1956 NumModifiedRegs++;
1957
1958 for (auto &CS : CSI) {
1959 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1960 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1961 if (--NumModifiedRegs)
1962 break;
1963 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1964 SGPRForBPSaveRestoreCopy) {
1965 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1966 if (--NumModifiedRegs)
1967 break;
1968 }
1969 }
1970
1971 return false;
1972}
1973
1975 const MachineFunction &MF) const {
1976
1977 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1978 const MachineFrameInfo &MFI = MF.getFrameInfo();
1979 const SIInstrInfo *TII = ST.getInstrInfo();
1980 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1981 uint64_t MaxOffset = EstStackSize - 1;
1982
1983 // We need the emergency stack slots to be allocated in range of the
1984 // MUBUF/flat scratch immediate offset from the base register, so assign these
1985 // first at the incoming SP position.
1986 //
1987 // TODO: We could try sorting the objects to find a hole in the first bytes
1988 // rather than allocating as close to possible. This could save a lot of space
1989 // on frames with alignment requirements.
1990 if (ST.enableFlatScratch()) {
1991 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1993 return false;
1994 } else {
1995 if (TII->isLegalMUBUFImmOffset(MaxOffset))
1996 return false;
1997 }
1998
1999 return true;
2000}
2001
2005 MachineFunction *MF = MBB.getParent();
2006 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2007 if (!ST.useVGPRBlockOpsForCSR())
2008 return false;
2009
2010 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2012 const SIInstrInfo *TII = ST.getInstrInfo();
2014
2015 const TargetRegisterClass *BlockRegClass =
2016 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
2017 for (const CalleeSavedInfo &CS : CSI) {
2018 Register Reg = CS.getReg();
2019 if (!BlockRegClass->contains(Reg) ||
2020 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2022 continue;
2023 }
2024
2025 // Build a scratch block store.
2026 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2027 int FrameIndex = CS.getFrameIdx();
2028 MachinePointerInfo PtrInfo =
2029 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2030 MachineMemOperand *MMO =
2032 FrameInfo.getObjectSize(FrameIndex),
2033 FrameInfo.getObjectAlign(FrameIndex));
2034
2035 BuildMI(MBB, MI, MI->getDebugLoc(),
2036 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2037 .addReg(Reg, getKillRegState(false))
2038 .addFrameIndex(FrameIndex)
2040 .addImm(0)
2041 .addImm(Mask)
2042 .addMemOperand(MMO);
2043
2044 FuncInfo->setHasSpilledVGPRs();
2045
2046 // Add the register to the liveins. This is necessary because if any of the
2047 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2048 // then the whole block will be marked as reserved and `updateLiveness` will
2049 // skip it.
2050 MBB.addLiveIn(Reg);
2051 }
2052 MBB.sortUniqueLiveIns();
2053
2054 return true;
2055}
2056
2060 MachineFunction *MF = MBB.getParent();
2061 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2062 if (!ST.useVGPRBlockOpsForCSR())
2063 return false;
2064
2066 MachineFrameInfo &MFI = MF->getFrameInfo();
2067 const SIInstrInfo *TII = ST.getInstrInfo();
2068 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2069 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
2070 for (const CalleeSavedInfo &CS : reverse(CSI)) {
2071 Register Reg = CS.getReg();
2072 if (!BlockRegClass->contains(Reg) ||
2073 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2075 continue;
2076 }
2077
2078 // Build a scratch block load.
2079 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2080 int FrameIndex = CS.getFrameIdx();
2081 MachinePointerInfo PtrInfo =
2082 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2084 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
2085 MFI.getObjectAlign(FrameIndex));
2086
2087 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
2088 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2089 .addFrameIndex(FrameIndex)
2090 .addReg(FuncInfo->getStackPtrOffsetReg())
2091 .addImm(0)
2092 .addImm(Mask)
2093 .addMemOperand(MMO);
2094 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
2095
2096 // Add the register to the liveins. This is necessary because if any of the
2097 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2098 // then the whole block will be marked as reserved and `updateLiveness` will
2099 // skip it.
2100 MBB.addLiveIn(Reg);
2101 }
2102
2103 MBB.sortUniqueLiveIns();
2104 return true;
2105}
2106
2108 MachineFunction &MF,
2111 int64_t Amount = I->getOperand(0).getImm();
2112 if (Amount == 0)
2113 return MBB.erase(I);
2114
2115 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2116 const SIInstrInfo *TII = ST.getInstrInfo();
2117 const DebugLoc &DL = I->getDebugLoc();
2118 unsigned Opc = I->getOpcode();
2119 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2120 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2121
2122 if (!hasReservedCallFrame(MF)) {
2123 Amount = alignTo(Amount, getStackAlign());
2124 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2127
2128 Amount *= getScratchScaleFactor(ST);
2129 if (IsDestroy)
2130 Amount = -Amount;
2131 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
2132 .addReg(SPReg)
2133 .addImm(Amount);
2134 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
2135 } else if (CalleePopAmount != 0) {
2136 llvm_unreachable("is this used?");
2137 }
2138
2139 return MBB.erase(I);
2140}
2141
2142/// Returns true if the frame will require a reference to the stack pointer.
2143///
2144/// This is the set of conditions common to setting up the stack pointer in a
2145/// kernel, and for using a frame pointer in a callable function.
2146///
2147/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
2148/// references SP.
2150 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
2151}
2152
2153// The FP for kernels is always known 0, so we never really need to setup an
2154// explicit register for it. However, DisableFramePointerElim will force us to
2155// use a register for it.
2157 const MachineFrameInfo &MFI = MF.getFrameInfo();
2158
2159 // For entry & chain functions we can use an immediate offset in most cases,
2160 // so the presence of calls doesn't imply we need a distinct frame pointer.
2161 if (MFI.hasCalls() &&
2164 // All offsets are unsigned, so need to be addressed in the same direction
2165 // as stack growth.
2166
2167 // FIXME: This function is pretty broken, since it can be called before the
2168 // frame layout is determined or CSR spills are inserted.
2169 return MFI.getStackSize() != 0;
2170 }
2171
2172 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
2173 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
2174 MF) ||
2177}
2178
2180 const MachineFunction &MF) const {
2181 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
2184}
2185
2186// This is essentially a reduced version of hasFP for entry functions. Since the
2187// stack pointer is known 0 on entry to kernels, we never really need an FP
2188// register. We may need to initialize the stack pointer depending on the frame
2189// properties, which logically overlaps many of the cases where an ordinary
2190// function would require an FP.
2191// Also used for chain functions. While not technically entry functions, chain
2192// functions may need to set up a stack pointer in some situations.
2194 const MachineFunction &MF) const {
2195 // Callable functions always require a stack pointer reference.
2198 "only expected to call this for entry points and chain functions");
2199
2200 const MachineFrameInfo &MFI = MF.getFrameInfo();
2201
2202 // Entry points ordinarily don't need to initialize SP. We have to set it up
2203 // for callees if there are any. Also note tail calls are impossible/don't
2204 // make any sense for kernels.
2205 if (MFI.hasCalls())
2206 return true;
2207
2208 // We still need to initialize the SP if we're doing anything weird that
2209 // references the SP, like variable sized stack objects.
2210 return frameTriviallyRequiresSP(MFI);
2211}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex)
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const int BlockSize
Definition TarWriter.cpp:33
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & reset()
Definition BitVector.h:392
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:725
BitVector & set()
Definition BitVector.h:351
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:170
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition BitVector.h:713
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:140
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:156
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
A debug info location.
Definition DebugLoc.h:124
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
mop_range operands()
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void backward()
Update internal register state and move MBB iterator backwards.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:203
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1632
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1407
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1902
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1943
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.