21#define DEBUG_TYPE "frame-info"
24 "amdgpu-spill-vgpr-to-agpr",
25 cl::desc(
"Enable spilling VGPRs to AGPRs"),
38 if (!
MRI.isPhysRegUsed(Reg) && LiveUnits.
available(Reg) &&
54 for (
unsigned i = 0; CSRegs[i]; ++i)
55 LiveUnits.
addReg(CSRegs[i]);
75 bool IncludeScratchCopy =
true) {
81 unsigned Size =
TRI->getSpillSize(RC);
82 Align Alignment =
TRI->getSpillAlign(RC);
90 if (IncludeScratchCopy)
94 int FI = FrameInfo.CreateStackObject(
Size, Alignment,
true,
nullptr,
97 if (
TRI->spillSGPRToVGPR() &&
104 SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
114 FI = FrameInfo.CreateSpillStackObject(
Size, Alignment);
124 SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
125 LiveUnits.
addReg(ScratchSGPR);
140 int64_t DwordOff = 0) {
141 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
149 LiveUnits.
addReg(SpillReg);
151 TRI.buildSpillLoadStore(
MBB,
I,
DL,
Opc, FI, SpillReg, IsKill, FrameReg,
152 DwordOff, MMO,
nullptr, &LiveUnits);
164 Register FrameReg, int64_t DwordOff = 0) {
165 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
173 TRI.buildSpillLoadStore(
MBB,
I,
DL,
Opc, FI, SpillReg,
false, FrameReg,
174 DwordOff, MMO,
nullptr, &LiveUnits);
184 Register TargetLo =
TRI->getSubReg(TargetReg, AMDGPU::sub0);
185 Register TargetHi =
TRI->getSubReg(TargetReg, AMDGPU::sub1);
192 const MCInstrDesc &GetPC64 =
TII->get(AMDGPU::S_GETPC_B64_pseudo);
206 if (LiveUnits.
empty()) {
240 unsigned EltSize = 4;
242 void saveToMemory(
const int FI)
const {
249 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
253 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
261 FI, FrameReg, DwordOff);
266 void saveToVGPRLane(
const int FI)
const {
272 assert(Spill.size() == NumSubRegs);
274 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
286 void copyToScratchSGPR(
Register DstReg)
const {
292 void restoreFromMemory(
const int FI) {
297 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
301 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
307 TmpVGPR, FI, FrameReg, DwordOff);
308 MRI.constrainRegClass(
SubReg, &AMDGPU::SReg_32_XM0RegClass);
315 void restoreFromVGPRLane(
const int FI) {
319 assert(Spill.size() == NumSubRegs);
321 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
331 void copyFromScratchSGPR(
Register SrcReg)
const {
346 ST(MF.getSubtarget<
GCNSubtarget>()), MFI(MF.getFrameInfo()),
348 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits),
DL(
DL),
351 SplitParts =
TRI.getRegSplitParts(RC, EltSize);
352 NumSubRegs = SplitParts.
empty() ? 1 : SplitParts.
size();
354 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
358 switch (SI.getKind()) {
360 return saveToMemory(SI.getIndex());
362 return saveToVGPRLane(SI.getIndex());
364 return copyToScratchSGPR(SI.getReg());
369 switch (SI.getKind()) {
371 return restoreFromMemory(SI.getIndex());
373 return restoreFromVGPRLane(SI.getIndex());
375 return copyFromScratchSGPR(SI.getReg());
383void SIFrameLowering::emitEntryFunctionFlatScratchInit(
404 if (
ST.isAmdPalOS()) {
412 Register FlatScrInit = AMDGPU::NoRegister;
415 AllSGPR64s = AllSGPR64s.
slice(
416 std::min(
static_cast<unsigned>(AllSGPR64s.
size()), NumPreloaded));
420 MRI.isAllocatable(Reg) && !
TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
425 assert(FlatScrInit &&
"Failed to find free register for scratch init");
427 FlatScrInitLo =
TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
428 FlatScrInitHi =
TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
435 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
456 And->getOperand(3).setIsDead();
460 assert(FlatScratchInitReg);
463 MRI.addLiveIn(FlatScratchInitReg);
466 FlatScrInitLo =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
467 FlatScrInitHi =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
471 if (
ST.flatScratchIsPointer()) {
475 .
addReg(ScratchWaveOffsetReg);
482 using namespace AMDGPU::Hwreg;
485 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
488 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
495 .
addReg(ScratchWaveOffsetReg);
515 .
addReg(ScratchWaveOffsetReg);
538Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
551 if (!ScratchRsrcReg || (!
MRI.isPhysRegUsed(ScratchRsrcReg) &&
555 if (
ST.hasSGPRInitBug() ||
556 ScratchRsrcReg !=
TRI->reservedPrivateSegmentBufferReg(MF))
557 return ScratchRsrcReg;
570 AllSGPR128s = AllSGPR128s.
slice(std::min(
static_cast<unsigned>(AllSGPR128s.
size()), NumPreloaded));
579 if (!
MRI.isPhysRegUsed(Reg) &&
MRI.isAllocatable(Reg) &&
580 (!GITPtrLoReg || !
TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
581 MRI.replaceRegWith(ScratchRsrcReg, Reg);
588 return ScratchRsrcReg;
592 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
597 assert(&MF.
front() == &
MBB &&
"Shrink-wrapping not yet supported");
630 if (!ST.enableFlatScratch())
631 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
634 if (ScratchRsrcReg) {
636 if (&OtherBB != &
MBB) {
637 OtherBB.addLiveIn(ScratchRsrcReg);
645 if (ST.isAmdHsaOrMesa(
F)) {
646 PreloadedScratchRsrcReg =
648 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
651 MRI.addLiveIn(PreloadedScratchRsrcReg);
667 if (PreloadedScratchWaveOffsetReg &&
668 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
671 AllSGPRs = AllSGPRs.
slice(
672 std::min(
static_cast<unsigned>(AllSGPRs.
size()), NumPreloaded));
675 if (!
MRI.isPhysRegUsed(Reg) &&
MRI.isAllocatable(Reg) &&
676 !
TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
677 ScratchWaveOffsetReg = Reg;
686 if (!ScratchWaveOffsetReg)
688 "could not find temporary scratch offset register in prolog");
690 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
692 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
741 ST.hasInv2PiInlineImm())) {
753 bool NeedsFlatScratchInit =
755 (
MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.
hasCalls() ||
758 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
759 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
760 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
764 if (NeedsFlatScratchInit) {
765 emitEntryFunctionFlatScratchInit(MF,
MBB,
I,
DL, ScratchWaveOffsetReg);
768 if (ScratchRsrcReg) {
769 emitEntryFunctionScratchRsrcRegSetup(MF,
MBB,
I,
DL,
770 PreloadedScratchRsrcReg,
771 ScratchRsrcReg, ScratchWaveOffsetReg);
776void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
787 if (ST.isAmdPalOS()) {
790 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
791 Register Rsrc03 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
798 const MCInstrDesc &LoadDwordX4 =
TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
827 }
else if (
ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
831 Register Rsrc2 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
832 Register Rsrc3 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
838 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
847 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
866 Register Rsrc0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
867 Register Rsrc1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
885 }
else if (
ST.isAmdHsaOrMesa(Fn)) {
886 assert(PreloadedScratchRsrcReg);
888 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
903 Register ScratchRsrcSub0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
904 Register ScratchRsrcSub1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
910 .
addReg(ScratchWaveOffsetReg)
912 auto Addc =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
939 bool EnableInactiveLanes) {
952 assert(IsProlog &&
"Epilog should look at return, not setup");
954 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
955 assert(ScratchExecCopy &&
"Couldn't find copy of EXEC");
958 MRI, LiveUnits, *
TRI.getWaveMaskRegClass());
961 if (!ScratchExecCopy)
964 LiveUnits.
addReg(ScratchExecCopy);
966 const unsigned SaveExecOpc =
967 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
968 : AMDGPU::S_OR_SAVEEXEC_B32)
969 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
970 : AMDGPU::S_OR_SAVEEXEC_B64);
975 return ScratchExecCopy;
994 if (!WWMScratchRegs.
empty())
999 auto StoreWWMRegisters =
1001 for (
const auto &Reg : WWMRegs) {
1003 int FI = Reg.second;
1005 VGPR, FI, FrameReg);
1010 if (!
MRI.isReserved(Reg)) {
1015 StoreWWMRegisters(WWMScratchRegs);
1017 auto EnableAllLanes = [&]() {
1018 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1022 if (!WWMCalleeSavedRegs.
empty()) {
1023 if (ScratchExecCopy) {
1032 StoreWWMRegisters(WWMCalleeSavedRegs);
1038 if (!ScratchExecCopy)
1041 else if (WWMCalleeSavedRegs.
empty())
1043 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1044 }
else if (ScratchExecCopy) {
1046 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1049 LiveUnits.
addReg(ScratchExecCopy);
1060 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1065 LiveUnits, FrameReg);
1073 if (!ScratchSGPRs.
empty()) {
1080 if (!LiveUnits.
empty()) {
1104 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1109 LiveUnits, FrameReg);
1119 auto RestoreWWMRegisters =
1121 for (
const auto &Reg : WWMRegs) {
1123 int FI = Reg.second;
1125 VGPR, FI, FrameReg);
1132 RestoreWWMRegisters(WWMCalleeSavedRegs);
1136 assert(Return.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN &&
1137 "Unexpected return inst");
1138 Register OrigExec = Return.getOperand(0).getReg();
1140 if (!WWMScratchRegs.
empty()) {
1141 unsigned XorOpc = ST.isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64;
1145 RestoreWWMRegisters(WWMScratchRegs);
1149 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1154 if (!WWMScratchRegs.
empty()) {
1159 RestoreWWMRegisters(WWMScratchRegs);
1160 if (!WWMCalleeSavedRegs.
empty()) {
1161 if (ScratchExecCopy) {
1162 unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1171 RestoreWWMRegisters(WWMCalleeSavedRegs);
1172 if (ScratchExecCopy) {
1174 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1210 assert(StackPtrReg != AMDGPU::SP_REG);
1222 if (
TRI.hasStackRealignment(MF))
1226 if (!HasFP && !
hasFP(MF)) {
1230 FramePtrRegScratchCopy);
1233 Register SGPRForFPSaveRestoreCopy =
1237 if (SGPRForFPSaveRestoreCopy) {
1244 DL,
TII,
TRI, LiveUnits, FramePtrReg);
1246 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1251 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1252 if (!FramePtrRegScratchCopy)
1255 LiveUnits.
addReg(FramePtrRegScratchCopy);
1264 RoundedSize += Alignment;
1265 if (LiveUnits.
empty()) {
1280 And->getOperand(3).setIsDead();
1282 }
else if ((HasFP =
hasFP(MF))) {
1291 FramePtrRegScratchCopy);
1292 if (FramePtrRegScratchCopy)
1293 LiveUnits.
removeReg(FramePtrRegScratchCopy);
1300 if ((HasBP =
TRI.hasBasePointer(MF))) {
1306 if (HasFP && RoundedSize != 0) {
1311 Add->getOperand(3).setIsDead();
1316 assert((!HasFP || FPSaved) &&
1317 "Needed to save FP but didn't save it anywhere");
1322 "Saved FP but didn't need it");
1326 assert((!HasBP || BPSaved) &&
1327 "Needed to save BP but didn't save it anywhere");
1329 assert((HasBP || !BPSaved) &&
"Saved BP but didn't need it");
1350 DL =
MBBI->getDebugLoc();
1364 if (RoundedSize != 0) {
1365 if (
TRI.hasBasePointer(MF)) {
1369 }
else if (
hasFP(MF)) {
1377 Register SGPRForFPSaveRestoreCopy =
1385 if (SGPRForFPSaveRestoreCopy) {
1386 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1389 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1390 if (!FramePtrRegScratchCopy)
1393 LiveUnits.
addReg(FramePtrRegScratchCopy);
1397 FramePtrRegScratchCopy);
1402 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1403 : FramePtrRegScratchCopy;
1407 if (SGPRForFPSaveRestoreCopy)
1413 FramePtrRegScratchCopy);
1454 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->
hasSpilledVGPRs()
1457 if (SpillVGPRToAGPR) {
1462 bool SeenDbgInstr =
false;
1467 if (
MI.isDebugInstr())
1468 SeenDbgInstr =
true;
1470 if (
TII->isVGPRSpill(
MI)) {
1473 unsigned FIOp = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1474 AMDGPU::OpName::vaddr);
1475 int FI =
MI.getOperand(FIOp).getIndex();
1477 TII->getNamedOperand(
MI, AMDGPU::OpName::vdata)->getReg();
1479 TRI->isAGPR(
MRI, VReg))) {
1483 TRI->eliminateFrameIndex(
MI, 0, FIOp, RS);
1490 NonVGPRSpillFIs.
set(FrameIndex);
1496 for (
unsigned FI : SpillFIs.
set_bits())
1497 if (!NonVGPRSpillFIs.
test(FI))
1509 if (!SpillFIs.
empty() && SeenDbgInstr) {
1514 if (
MI.isDebugValue()) {
1515 uint32_t StackOperandIdx =
MI.isDebugValueList() ? 2 : 0;
1516 if (
MI.getOperand(StackOperandIdx).isFI() &&
1518 MI.getOperand(StackOperandIdx).getIndex()) &&
1519 SpillFIs[
MI.getOperand(StackOperandIdx).getIndex()]) {
1520 MI.getOperand(StackOperandIdx)
1521 .ChangeToRegister(
Register(),
false );
1532 bool HaveSGPRToVMemSpill =
1535 "SGPR spill should have been removed in SILowerSGPRSpills");
1541 assert(RS &&
"RegScavenger required if spilling");
1548 if (HaveSGPRToVMemSpill &&
1562 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1569 TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
1570 if (UnusedLowVGPR && (
TRI->getHWRegIndex(UnusedLowVGPR) <
1571 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1577 MRI.reserveReg(UnusedLowVGPR,
TRI);
1584 TRI->findUnusedRegister(
MRI, &AMDGPU::SGPR_64RegClass, MF);
1589 if (LongBranchReservedReg && UnusedLowSGPR) {
1591 MRI.reserveReg(UnusedLowSGPR,
TRI);
1599 bool NeedExecCopyReservedReg)
const {
1610 for (
unsigned I = 0; CSRegs[
I]; ++
I)
1616 if (NeedExecCopyReservedReg ||
1617 (ReservedRegForExecCopy &&
1618 MRI.isPhysRegUsed(ReservedRegForExecCopy,
true))) {
1619 MRI.reserveReg(ReservedRegForExecCopy,
TRI);
1621 if (UnusedScratchReg) {
1625 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1626 LiveUnits.
addReg(UnusedScratchReg);
1630 "Re-reserving spill slot for EXEC copy register");
1634 }
else if (ReservedRegForExecCopy) {
1648 const bool WillHaveFP =
1652 if (WillHaveFP ||
hasFP(MF)) {
1655 "Re-reserving spill slot for FP");
1659 if (
TRI->hasBasePointer(MF)) {
1662 "Re-reserving spill slot for BP");
1684 bool NeedExecCopyReservedReg =
false;
1691 if (
TII->isWWMRegSpillOpcode(
MI.getOpcode()))
1692 NeedExecCopyReservedReg =
true;
1693 else if (
MI.getOpcode() == AMDGPU::SI_RETURN ||
1694 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1695 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1697 TII->isChainCallOpcode(
MI.getOpcode()))) {
1700 (
count_if(
MI.operands(), [](
auto Op) { return Op.isReg(); }) ==
1713 if (
TRI->getRegSizeInBits(*RC) != 32)
1718 sort(SortedWWMVGPRs, std::greater<Register>());
1727 assert(!NeedExecCopyReservedReg &&
1728 "Whole wave functions can use the reg mapped for their i1 argument");
1731 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass)
1734 MF.
begin()->addLiveIn(Reg);
1736 MF.
begin()->sortUniqueLiveIns();
1744 SavedVGPRs.
reset(
Op.getReg());
1752 TRI->getSpillAlign(*RC));
1761 if (!ST.hasGFX90AInsts())
1769 SavedVGPRs.
reset(Reg.first);
1786 const BitVector AllSavedRegs = SavedRegs;
1795 const bool WillHaveFP =
1799 if (WillHaveFP ||
hasFP(MF))
1809 Register RetAddrReg =
TRI->getReturnAddressReg(MF);
1811 (FrameInfo.
hasCalls() ||
MRI.isPhysRegModified(RetAddrReg))) {
1812 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1813 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1819 std::vector<CalleeSavedInfo> &CSI,
1820 unsigned &MinCSFrameIndex,
1821 unsigned &MaxCSFrameIndex) {
1829 return A.getReg() <
B.getReg();
1831 "Callee saved registers not sorted");
1834 return !CSI.isSpilledToReg() &&
1835 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1839 auto CSEnd = CSI.end();
1840 for (
auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1842 if (!CanUseBlockOps(*CSIt))
1849 CSEnd = std::remove_if(
1851 if (CanUseBlockOps(CSI) && CSI.
getReg() < Reg + 32) {
1852 Mask |= 1 << (CSI.getReg() - Reg);
1861 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1868 AMDGPU::VGPR0 +
alignDown(Reg - AMDGPU::VGPR0, 32);
1870 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1871 assert(RegBlock &&
TRI->isSubRegister(RegBlock, Reg) &&
1872 "Couldn't find super register");
1873 int RegDelta = Reg - LastBlockStart;
1875 "Bad shift amount");
1886 unsigned BlockSize =
TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1888 MFI.CreateStackObject(
BlockSize,
TRI->getSpillAlign(*BlockRegClass),
1890 if ((
unsigned)FrameIdx < MinCSFrameIndex)
1891 MinCSFrameIndex = FrameIdx;
1892 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
1893 MaxCSFrameIndex = FrameIdx;
1895 CSIt->setFrameIdx(FrameIdx);
1896 CSIt->setReg(RegBlock);
1898 CSI.erase(CSEnd, CSI.end());
1903 std::vector<CalleeSavedInfo> &CSI,
unsigned &MinCSFrameIndex,
1904 unsigned &MaxCSFrameIndex)
const {
1909 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1919 std::vector<CalleeSavedInfo> &CSI)
const {
1927 Register BasePtrReg = RI->getBaseRegister();
1928 Register SGPRForFPSaveRestoreCopy =
1930 Register SGPRForBPSaveRestoreCopy =
1932 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1935 unsigned NumModifiedRegs = 0;
1937 if (SGPRForFPSaveRestoreCopy)
1939 if (SGPRForBPSaveRestoreCopy)
1942 for (
auto &CS : CSI) {
1943 if (CS.getReg() == FramePtrReg.
asMCReg() && SGPRForFPSaveRestoreCopy) {
1944 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1945 if (--NumModifiedRegs)
1947 }
else if (CS.getReg() == BasePtrReg.
asMCReg() &&
1948 SGPRForBPSaveRestoreCopy) {
1949 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1950 if (--NumModifiedRegs)
1964 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1965 uint64_t MaxOffset = EstStackSize - 1;
1974 if (ST.enableFlatScratch()) {
1979 if (
TII->isLegalMUBUFImmOffset(MaxOffset))
1991 if (!ST.useVGPRBlockOpsForCSR())
2003 if (!BlockRegClass->contains(Reg) ||
2011 int FrameIndex = CS.getFrameIdx();
2020 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2046 if (!ST.useVGPRBlockOpsForCSR())
2056 if (!BlockRegClass->
contains(Reg) ||
2064 int FrameIndex = CS.getFrameIdx();
2069 MFI.getObjectAlign(FrameIndex));
2072 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2095 int64_t Amount =
I->getOperand(0).getImm();
2102 unsigned Opc =
I->getOpcode();
2103 bool IsDestroy =
Opc ==
TII->getCallFrameDestroyOpcode();
2104 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
2108 assert(isUInt<32>(Amount) &&
"exceeded stack address space size");
2118 Add->getOperand(3).setIsDead();
2119 }
else if (CalleePopAmount != 0) {
2182 "only expected to call this for entry points and chain functions");
unsigned const MachineRegisterInfo * MRI
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall=false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex)
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
static const int BlockSize
bool isChainFunction() const
bool isEntryFunction() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
bool test(unsigned Idx) const
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
bool any() const
any - Returns true if any bit is set.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
iterator_range< const_set_bits_iterator > set_bits() const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
This class represents an Operation in the Expression.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasImplicitBufferPtr() const
bool hasFlatScratchInit() const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MachineInstr & instr_back()
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void backward()
Update internal register state and move MBB iterator backwards.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void setSGPRForEXECCopy(Register Reg)
unsigned getNumPreloadedSGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
bool hasSpilledVGPRs() const
void setVGPRToAGPRSpillDead(int FrameIndex)
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
bool isStackRealigned() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register getFrameOffsetReg() const
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
unsigned getGITPtrHigh() const
bool hasSpilledSGPRs() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
void setScratchRSrcReg(Register Reg)
void reserveWWMRegister(Register Reg)
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.