29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling SGPRs to VGPRs"),
38std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
49 const Twine &ErrMsg) {
122 MI->getOperand(0).isKill(),
Index,
RS) {}
137 MovOpc = AMDGPU::S_MOV_B32;
138 NotOpc = AMDGPU::S_NOT_B32;
141 MovOpc = AMDGPU::S_MOV_B64;
142 NotOpc = AMDGPU::S_NOT_B64;
147 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
178 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
207 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
229 "unhandled SGPR spill to memory");
239 I->getOperand(2).setIsDead();
274 I->getOperand(2).setIsDead();
305 "unhandled SGPR spill to memory");
330 ST.getAMDGPUDwarfFlavour(),
334 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
335 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
336 (getSubRegIndexLaneMask(AMDGPU::lo16) |
337 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
338 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
339 "getNumCoveredRegs() will not work with generated subreg masks!");
341 RegPressureIgnoredUnits.
resize(getNumRegUnits());
343 for (
auto Reg : AMDGPU::VGPR_16RegClass) {
345 RegPressureIgnoredUnits.
set(*regunits(Reg).begin());
351 static auto InitializeRegSplitPartsOnce = [
this]() {
352 for (
unsigned Idx = 1, E = getNumSubRegIndices() - 1;
Idx < E; ++
Idx) {
353 unsigned Size = getSubRegIdxSize(
Idx);
356 std::vector<int16_t> &Vec = RegSplitParts[
Size / 16 - 1];
357 unsigned Pos = getSubRegIdxOffset(
Idx);
362 unsigned MaxNumParts = 1024 /
Size;
363 Vec.resize(MaxNumParts);
371 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
372 for (
auto &Row : SubRegFromChannelTable)
373 Row.fill(AMDGPU::NoSubRegister);
374 for (
unsigned Idx = 1;
Idx < getNumSubRegIndices(); ++
Idx) {
375 unsigned Width = getSubRegIdxSize(
Idx) / 32;
376 unsigned Offset = getSubRegIdxOffset(
Idx) / 32;
381 unsigned TableIdx = Width - 1;
382 assert(TableIdx < SubRegFromChannelTable.size());
384 SubRegFromChannelTable[TableIdx][
Offset] =
Idx;
388 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
390 InitializeSubRegFromChannelTableOnce);
408 : CSR_AMDGPU_SaveList;
411 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
412 : CSR_AMDGPU_SI_Gfx_SaveList;
414 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
417 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
418 return &NoCalleeSavedReg;
435 : CSR_AMDGPU_RegMask;
438 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
439 : CSR_AMDGPU_SI_Gfx_RegMask;
444 return AMDGPU_AllVGPRs_RegMask;
451 return CSR_AMDGPU_NoRegs_RegMask;
455 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
466 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
467 return &AMDGPU::AV_32RegClass;
468 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
469 return &AMDGPU::AV_64RegClass;
470 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
471 RC == &AMDGPU::AReg_64_Align2RegClass)
472 return &AMDGPU::AV_64_Align2RegClass;
473 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
474 return &AMDGPU::AV_96RegClass;
475 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
476 RC == &AMDGPU::AReg_96_Align2RegClass)
477 return &AMDGPU::AV_96_Align2RegClass;
478 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
479 return &AMDGPU::AV_128RegClass;
480 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
481 RC == &AMDGPU::AReg_128_Align2RegClass)
482 return &AMDGPU::AV_128_Align2RegClass;
483 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
484 return &AMDGPU::AV_160RegClass;
485 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
486 RC == &AMDGPU::AReg_160_Align2RegClass)
487 return &AMDGPU::AV_160_Align2RegClass;
488 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
489 return &AMDGPU::AV_192RegClass;
490 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
491 RC == &AMDGPU::AReg_192_Align2RegClass)
492 return &AMDGPU::AV_192_Align2RegClass;
493 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
494 return &AMDGPU::AV_256RegClass;
495 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
496 RC == &AMDGPU::AReg_256_Align2RegClass)
497 return &AMDGPU::AV_256_Align2RegClass;
498 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
499 return &AMDGPU::AV_512RegClass;
500 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
501 RC == &AMDGPU::AReg_512_Align2RegClass)
502 return &AMDGPU::AV_512_Align2RegClass;
503 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
504 return &AMDGPU::AV_1024RegClass;
505 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
506 RC == &AMDGPU::AReg_1024_Align2RegClass)
507 return &AMDGPU::AV_1024_Align2RegClass;
537 return AMDGPU_AllVGPRs_RegMask;
541 return AMDGPU_AllAGPRs_RegMask;
545 return AMDGPU_AllVectorRegs_RegMask;
549 return AMDGPU_AllAllocatableSRegs_RegMask;
556 assert(NumRegIndex &&
"Not implemented");
557 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
558 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
563 const unsigned Align,
566 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
567 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
585 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
586 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
589 reserveRegisterTuples(
Reserved, AMDGPU::M0);
592 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
593 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
594 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
597 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
598 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
599 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
600 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
601 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_LO);
602 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_HI);
605 reserveRegisterTuples(
Reserved, AMDGPU::ASYNCcnt);
606 reserveRegisterTuples(
Reserved, AMDGPU::TENSORcnt);
609 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
612 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
615 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
618 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
619 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
620 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
621 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
622 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
623 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
624 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
625 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
626 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
627 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
630 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
635 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
638 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
641 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
648 if (ScratchRSrcReg != AMDGPU::NoRegister) {
652 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
656 if (LongBranchReservedReg)
657 reserveRegisterTuples(
Reserved, LongBranchReservedReg);
664 reserveRegisterTuples(
Reserved, StackPtrReg);
665 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
670 reserveRegisterTuples(
Reserved, FrameReg);
671 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
676 reserveRegisterTuples(
Reserved, BasePtrReg);
677 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
684 reserveRegisterTuples(
Reserved, ExecCopyReg);
692 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
695 if (Index + NumRegs > MaxNumVGPRs)
706 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
709 if (Index + NumRegs > MaxNumAGPRs)
725 if (!NonWWMRegMask.
empty()) {
726 for (
unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
727 RegI < RegE; ++RegI) {
728 if (NonWWMRegMask.
test(RegI))
729 reserveRegisterTuples(
Reserved, RegI);
734 reserveRegisterTuples(
Reserved, Reg);
738 reserveRegisterTuples(
Reserved, Reg);
741 reserveRegisterTuples(
Reserved, Reg);
758 if (
Info->isBottomOfStack())
766 if (
Info->isEntryFunction()) {
799 int OffIdx = AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
800 AMDGPU::OpName::offset);
801 return MI->getOperand(OffIdx).getImm();
806 switch (
MI->getOpcode()) {
807 case AMDGPU::V_ADD_U32_e32:
808 case AMDGPU::V_ADD_U32_e64:
809 case AMDGPU::V_ADD_CO_U32_e32: {
810 int OtherIdx =
Idx == 1 ? 2 : 1;
814 case AMDGPU::V_ADD_CO_U32_e64: {
815 int OtherIdx =
Idx == 2 ? 3 : 2;
826 assert((
Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
827 AMDGPU::OpName::vaddr) ||
828 (
Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
829 AMDGPU::OpName::saddr))) &&
830 "Should never see frame index on non-address operand");
842 return Src1.
isImm() || (Src1.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
847 return Src0.
isImm() || (Src0.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
856 switch (
MI->getOpcode()) {
857 case AMDGPU::V_ADD_U32_e32: {
865 case AMDGPU::V_ADD_U32_e64:
875 case AMDGPU::V_ADD_CO_U32_e32:
881 return MI->getOperand(3).isDead();
882 case AMDGPU::V_ADD_CO_U32_e64:
884 return MI->getOperand(1).isDead();
896 return !
TII->isLegalMUBUFImmOffset(FullOffset);
909 DL = Ins->getDebugLoc();
915 : AMDGPU::V_MOV_B32_e32;
919 : &AMDGPU::VGPR_32RegClass);
927 Register OffsetReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
931 : &AMDGPU::VGPR_32RegClass);
947 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
959 switch (
MI.getOpcode()) {
960 case AMDGPU::V_ADD_U32_e32:
961 case AMDGPU::V_ADD_CO_U32_e32: {
967 if (!ImmOp->
isImm()) {
970 TII->legalizeOperandsVOP2(
MI.getMF()->getRegInfo(),
MI);
975 if (TotalOffset == 0) {
976 MI.setDesc(
TII->get(AMDGPU::COPY));
977 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
980 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
984 ImmOp->
setImm(TotalOffset);
996 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
999 MI.getOperand(2).ChangeToRegister(BaseRegVGPR,
false);
1001 MI.getOperand(2).ChangeToRegister(BaseReg,
false);
1005 case AMDGPU::V_ADD_U32_e64:
1006 case AMDGPU::V_ADD_CO_U32_e64: {
1007 int Src0Idx =
MI.getNumExplicitDefs();
1013 if (!ImmOp->
isImm()) {
1015 TII->legalizeOperandsVOP3(
MI.getMF()->getRegInfo(),
MI);
1020 if (TotalOffset == 0) {
1021 MI.setDesc(
TII->get(AMDGPU::COPY));
1023 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
1024 MI.removeOperand(
I);
1026 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
1029 ImmOp->
setImm(TotalOffset);
1038 bool IsFlat =
TII->isFLATScratch(
MI);
1042 bool SeenFI =
false;
1054 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
1055 : AMDGPU::OpName::vaddr);
1060 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
1066 "offset should be legal");
1077 assert(
TII->isLegalMUBUFImmOffset(NewOffset) &&
"offset should be legal");
1087 switch (
MI->getOpcode()) {
1088 case AMDGPU::V_ADD_U32_e32:
1089 case AMDGPU::V_ADD_CO_U32_e32:
1091 case AMDGPU::V_ADD_U32_e64:
1092 case AMDGPU::V_ADD_CO_U32_e64:
1105 return TII->isLegalMUBUFImmOffset(NewOffset);
1116 return &AMDGPU::VGPR_32RegClass;
1123 if (RC == &AMDGPU::SCC_CLASSRegClass)
1132 unsigned Op =
MI.getOpcode();
1134 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
1135 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
1140 (
uint64_t)
TII->getNamedOperand(
MI, AMDGPU::OpName::mask)->getImm());
1141 case AMDGPU::SI_SPILL_S1024_SAVE:
1142 case AMDGPU::SI_SPILL_S1024_RESTORE:
1143 case AMDGPU::SI_SPILL_V1024_SAVE:
1144 case AMDGPU::SI_SPILL_V1024_RESTORE:
1145 case AMDGPU::SI_SPILL_A1024_SAVE:
1146 case AMDGPU::SI_SPILL_A1024_RESTORE:
1147 case AMDGPU::SI_SPILL_AV1024_SAVE:
1148 case AMDGPU::SI_SPILL_AV1024_RESTORE:
1150 case AMDGPU::SI_SPILL_S512_SAVE:
1151 case AMDGPU::SI_SPILL_S512_RESTORE:
1152 case AMDGPU::SI_SPILL_V512_SAVE:
1153 case AMDGPU::SI_SPILL_V512_RESTORE:
1154 case AMDGPU::SI_SPILL_A512_SAVE:
1155 case AMDGPU::SI_SPILL_A512_RESTORE:
1156 case AMDGPU::SI_SPILL_AV512_SAVE:
1157 case AMDGPU::SI_SPILL_AV512_RESTORE:
1159 case AMDGPU::SI_SPILL_S384_SAVE:
1160 case AMDGPU::SI_SPILL_S384_RESTORE:
1161 case AMDGPU::SI_SPILL_V384_SAVE:
1162 case AMDGPU::SI_SPILL_V384_RESTORE:
1163 case AMDGPU::SI_SPILL_A384_SAVE:
1164 case AMDGPU::SI_SPILL_A384_RESTORE:
1165 case AMDGPU::SI_SPILL_AV384_SAVE:
1166 case AMDGPU::SI_SPILL_AV384_RESTORE:
1168 case AMDGPU::SI_SPILL_S352_SAVE:
1169 case AMDGPU::SI_SPILL_S352_RESTORE:
1170 case AMDGPU::SI_SPILL_V352_SAVE:
1171 case AMDGPU::SI_SPILL_V352_RESTORE:
1172 case AMDGPU::SI_SPILL_A352_SAVE:
1173 case AMDGPU::SI_SPILL_A352_RESTORE:
1174 case AMDGPU::SI_SPILL_AV352_SAVE:
1175 case AMDGPU::SI_SPILL_AV352_RESTORE:
1177 case AMDGPU::SI_SPILL_S320_SAVE:
1178 case AMDGPU::SI_SPILL_S320_RESTORE:
1179 case AMDGPU::SI_SPILL_V320_SAVE:
1180 case AMDGPU::SI_SPILL_V320_RESTORE:
1181 case AMDGPU::SI_SPILL_A320_SAVE:
1182 case AMDGPU::SI_SPILL_A320_RESTORE:
1183 case AMDGPU::SI_SPILL_AV320_SAVE:
1184 case AMDGPU::SI_SPILL_AV320_RESTORE:
1186 case AMDGPU::SI_SPILL_S288_SAVE:
1187 case AMDGPU::SI_SPILL_S288_RESTORE:
1188 case AMDGPU::SI_SPILL_V288_SAVE:
1189 case AMDGPU::SI_SPILL_V288_RESTORE:
1190 case AMDGPU::SI_SPILL_A288_SAVE:
1191 case AMDGPU::SI_SPILL_A288_RESTORE:
1192 case AMDGPU::SI_SPILL_AV288_SAVE:
1193 case AMDGPU::SI_SPILL_AV288_RESTORE:
1195 case AMDGPU::SI_SPILL_S256_SAVE:
1196 case AMDGPU::SI_SPILL_S256_RESTORE:
1197 case AMDGPU::SI_SPILL_V256_SAVE:
1198 case AMDGPU::SI_SPILL_V256_RESTORE:
1199 case AMDGPU::SI_SPILL_A256_SAVE:
1200 case AMDGPU::SI_SPILL_A256_RESTORE:
1201 case AMDGPU::SI_SPILL_AV256_SAVE:
1202 case AMDGPU::SI_SPILL_AV256_RESTORE:
1204 case AMDGPU::SI_SPILL_S224_SAVE:
1205 case AMDGPU::SI_SPILL_S224_RESTORE:
1206 case AMDGPU::SI_SPILL_V224_SAVE:
1207 case AMDGPU::SI_SPILL_V224_RESTORE:
1208 case AMDGPU::SI_SPILL_A224_SAVE:
1209 case AMDGPU::SI_SPILL_A224_RESTORE:
1210 case AMDGPU::SI_SPILL_AV224_SAVE:
1211 case AMDGPU::SI_SPILL_AV224_RESTORE:
1213 case AMDGPU::SI_SPILL_S192_SAVE:
1214 case AMDGPU::SI_SPILL_S192_RESTORE:
1215 case AMDGPU::SI_SPILL_V192_SAVE:
1216 case AMDGPU::SI_SPILL_V192_RESTORE:
1217 case AMDGPU::SI_SPILL_A192_SAVE:
1218 case AMDGPU::SI_SPILL_A192_RESTORE:
1219 case AMDGPU::SI_SPILL_AV192_SAVE:
1220 case AMDGPU::SI_SPILL_AV192_RESTORE:
1222 case AMDGPU::SI_SPILL_S160_SAVE:
1223 case AMDGPU::SI_SPILL_S160_RESTORE:
1224 case AMDGPU::SI_SPILL_V160_SAVE:
1225 case AMDGPU::SI_SPILL_V160_RESTORE:
1226 case AMDGPU::SI_SPILL_A160_SAVE:
1227 case AMDGPU::SI_SPILL_A160_RESTORE:
1228 case AMDGPU::SI_SPILL_AV160_SAVE:
1229 case AMDGPU::SI_SPILL_AV160_RESTORE:
1231 case AMDGPU::SI_SPILL_S128_SAVE:
1232 case AMDGPU::SI_SPILL_S128_RESTORE:
1233 case AMDGPU::SI_SPILL_V128_SAVE:
1234 case AMDGPU::SI_SPILL_V128_RESTORE:
1235 case AMDGPU::SI_SPILL_A128_SAVE:
1236 case AMDGPU::SI_SPILL_A128_RESTORE:
1237 case AMDGPU::SI_SPILL_AV128_SAVE:
1238 case AMDGPU::SI_SPILL_AV128_RESTORE:
1240 case AMDGPU::SI_SPILL_S96_SAVE:
1241 case AMDGPU::SI_SPILL_S96_RESTORE:
1242 case AMDGPU::SI_SPILL_V96_SAVE:
1243 case AMDGPU::SI_SPILL_V96_RESTORE:
1244 case AMDGPU::SI_SPILL_A96_SAVE:
1245 case AMDGPU::SI_SPILL_A96_RESTORE:
1246 case AMDGPU::SI_SPILL_AV96_SAVE:
1247 case AMDGPU::SI_SPILL_AV96_RESTORE:
1249 case AMDGPU::SI_SPILL_S64_SAVE:
1250 case AMDGPU::SI_SPILL_S64_RESTORE:
1251 case AMDGPU::SI_SPILL_V64_SAVE:
1252 case AMDGPU::SI_SPILL_V64_RESTORE:
1253 case AMDGPU::SI_SPILL_A64_SAVE:
1254 case AMDGPU::SI_SPILL_A64_RESTORE:
1255 case AMDGPU::SI_SPILL_AV64_SAVE:
1256 case AMDGPU::SI_SPILL_AV64_RESTORE:
1258 case AMDGPU::SI_SPILL_S32_SAVE:
1259 case AMDGPU::SI_SPILL_S32_RESTORE:
1260 case AMDGPU::SI_SPILL_V32_SAVE:
1261 case AMDGPU::SI_SPILL_V32_RESTORE:
1262 case AMDGPU::SI_SPILL_A32_SAVE:
1263 case AMDGPU::SI_SPILL_A32_RESTORE:
1264 case AMDGPU::SI_SPILL_AV32_SAVE:
1265 case AMDGPU::SI_SPILL_AV32_RESTORE:
1266 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1267 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1268 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1269 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1270 case AMDGPU::SI_SPILL_V16_SAVE:
1271 case AMDGPU::SI_SPILL_V16_RESTORE:
1279 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1280 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1281 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1282 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1283 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1284 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1285 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1286 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1287 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1288 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1289 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1290 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1291 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1292 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1293 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1294 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1302 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1303 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1304 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1305 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1306 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1307 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1308 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1309 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1310 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1311 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1312 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1313 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1314 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1315 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1316 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1317 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1318 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1319 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1320 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1321 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1322 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1323 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1324 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1325 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1326 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1327 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1328 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1329 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1337 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1338 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1339 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1340 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1341 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1342 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1343 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1344 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1345 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1346 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1347 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1348 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1349 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1350 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1351 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1352 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1360 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1361 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1362 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1363 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1364 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1365 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1366 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1367 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1368 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1369 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1370 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1371 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1372 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1373 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1374 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1375 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1376 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1377 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1378 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1379 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1380 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1381 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1382 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1383 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1384 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1385 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1386 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1387 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1396 int Index,
unsigned Lane,
1397 unsigned ValueReg,
bool IsKill) {
1404 if (Reg == AMDGPU::NoRegister)
1407 bool IsStore =
MI->mayStore();
1411 unsigned Dst = IsStore ? Reg : ValueReg;
1412 unsigned Src = IsStore ? ValueReg : Reg;
1413 bool IsVGPR =
TRI->isVGPR(
MRI, Reg);
1415 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1425 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1426 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1444 bool IsStore =
MI->mayStore();
1446 unsigned Opc =
MI->getOpcode();
1447 int LoadStoreOp = IsStore ?
1449 if (LoadStoreOp == -1)
1459 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1460 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1467 AMDGPU::OpName::vdata_in);
1469 NewMI.
add(*VDataIn);
1474 unsigned LoadStoreOp,
1476 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1482 if (
TII->isBlockLoadStore(LoadStoreOp))
1487 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1488 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1491 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1492 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1495 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1496 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1499 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1500 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1516 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1519 assert((!RS || !LiveUnits) &&
"Only RS or LiveUnits can be set but not both");
1527 bool IsStore =
Desc->mayStore();
1528 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1529 bool IsBlock =
TII->isBlockLoadStore(LoadStoreOp);
1531 bool CanClobberSCC =
false;
1532 bool Scavenged =
false;
1543 unsigned EltSize = IsBlock ? RegWidth
1544 : (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u)
1546 unsigned NumSubRegs = RegWidth / EltSize;
1547 unsigned Size = NumSubRegs * EltSize;
1548 unsigned RemSize = RegWidth -
Size;
1549 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1551 int64_t MaterializedOffset =
Offset;
1553 int64_t MaxOffset =
Offset +
Size + RemSize - EltSize;
1554 int64_t ScratchOffsetRegDelta = 0;
1556 if (IsFlat && EltSize > 4) {
1558 Desc = &
TII->get(LoadStoreOp);
1565 "unexpected VGPR spill offset");
1572 bool UseVGPROffset =
false;
1579 if (IsFlat && SGPRBase) {
1603 bool IsOffsetLegal =
1606 :
TII->isLegalMUBUFImmOffset(MaxOffset);
1618 CanClobberSCC = !RS->
isRegUsed(AMDGPU::SCC);
1619 }
else if (LiveUnits) {
1620 CanClobberSCC = LiveUnits->
available(AMDGPU::SCC);
1621 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1629 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1633 UseVGPROffset =
true;
1639 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1641 TmpOffsetVGPR = Reg;
1648 }
else if (!SOffset && CanClobberSCC) {
1659 if (!ScratchOffsetReg)
1661 SOffset = ScratchOffsetReg;
1662 ScratchOffsetRegDelta =
Offset;
1670 if (!IsFlat && !UseVGPROffset)
1673 if (!UseVGPROffset && !SOffset)
1676 if (UseVGPROffset) {
1678 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1679 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1684 .
addReg(ScratchOffsetReg)
1686 Add->getOperand(3).setIsDead();
1692 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1693 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1694 &&
"Unexpected vaddr for flat scratch with a FI operand");
1696 if (UseVGPROffset) {
1700 assert(!
TII->isBlockLoadStore(LoadStoreOp) &&
"Block ops don't have ST");
1704 Desc = &
TII->get(LoadStoreOp);
1707 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1708 ++i, RegOffset += EltSize) {
1709 if (i == NumSubRegs) {
1713 Desc = &
TII->get(LoadStoreOp);
1715 if (!IsFlat && UseVGPROffset) {
1718 Desc = &
TII->get(NewLoadStoreOp);
1721 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1728 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1731 unsigned NumRegs = EltSize / 4;
1737 unsigned SOffsetRegState = 0;
1739 const bool IsLastSubReg = i + 1 == e;
1740 const bool IsFirstSubReg = i == 0;
1749 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1750 bool NeedSuperRegImpOperand = e > 1;
1754 unsigned RemEltSize = EltSize;
1762 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1763 LaneE = RegOffset / 4;
1764 Lane >= LaneE; --Lane) {
1765 bool IsSubReg = e > 1 || EltSize > 4;
1770 if (!MIB.getInstr())
1772 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1774 NeedSuperRegDef =
false;
1776 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1777 NeedSuperRegImpOperand =
true;
1778 unsigned State = SrcDstRegState;
1779 if (!IsLastSubReg || (Lane != LaneE))
1780 State &= ~RegState::Kill;
1781 if (!IsFirstSubReg || (Lane != LaneS))
1782 State &= ~RegState::Define;
1791 if (RemEltSize != EltSize) {
1792 assert(IsFlat && EltSize > 4);
1794 unsigned NumRegs = RemEltSize / 4;
1801 unsigned FinalReg =
SubReg;
1806 if (!TmpIntermediateVGPR) {
1812 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1813 TmpIntermediateVGPR)
1815 if (NeedSuperRegDef)
1817 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1821 SubReg = TmpIntermediateVGPR;
1822 }
else if (UseVGPROffset) {
1823 if (!TmpOffsetVGPR) {
1839 if (UseVGPROffset) {
1848 if (SOffset == AMDGPU::NoRegister) {
1850 if (UseVGPROffset && ScratchOffsetReg) {
1851 MIB.
addReg(ScratchOffsetReg);
1858 MIB.addReg(SOffset, SOffsetRegState);
1861 MIB.addImm(
Offset + RegOffset);
1868 MIB.addMemOperand(NewMMO);
1870 if (!IsAGPR && NeedSuperRegDef)
1873 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1881 if (NeedSuperRegImpOperand &&
1882 (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef)))
1906 if (!IsStore &&
MI !=
MBB.
end() &&
MI->isReturn() &&
1909 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1917 if (!IsStore &&
TII->isBlockLoadStore(LoadStoreOp))
1921 if (ScratchOffsetRegDelta != 0) {
1925 .
addImm(-ScratchOffsetRegDelta);
1934 Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);
1935 for (
unsigned RegOffset = 1; RegOffset < 32; ++RegOffset)
1936 if (!(Mask & (1 << RegOffset)) &&
1937 isCalleeSavedPhysReg(BaseVGPR + RegOffset, *MF))
1943 bool IsKill)
const {
1961 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1966 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1977 bool SpillToPhysVGPRLane)
const {
1978 assert(!
MI->getOperand(0).isUndef() &&
1979 "undef spill should have been deleted earlier");
1986 bool SpillToVGPR = !VGPRSpills.
empty();
1987 if (OnlyToVGPR && !SpillToVGPR)
2000 "Num of SGPRs spilled should be less than or equal to num of "
2003 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2010 bool IsFirstSubreg = i == 0;
2012 bool UseKill = SB.
IsKill && IsLastSubreg;
2018 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
2035 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
2055 for (
unsigned i =
Offset * PVD.PerVGPR,
2065 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.
TmpVGPR)
2082 unsigned SuperKillState = 0;
2096 MI->eraseFromParent();
2108 bool SpillToPhysVGPRLane)
const {
2114 bool SpillToVGPR = !VGPRSpills.
empty();
2115 if (OnlyToVGPR && !SpillToVGPR)
2119 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2127 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
2150 for (
unsigned i =
Offset * PVD.PerVGPR,
2158 bool LastSubReg = (i + 1 == e);
2160 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
2177 MI->eraseFromParent();
2197 for (
unsigned i =
Offset * PVD.PerVGPR,
2216 unsigned SuperKillState = 0;
2228 MI = RestoreMBB.
end();
2234 for (
unsigned i =
Offset * PVD.PerVGPR,
2241 MRI.constrainRegClass(
SubReg, &AMDGPU::SReg_32_XM0RegClass);
2242 bool LastSubReg = (i + 1 == e);
2263 switch (
MI->getOpcode()) {
2264 case AMDGPU::SI_SPILL_S1024_SAVE:
2265 case AMDGPU::SI_SPILL_S512_SAVE:
2266 case AMDGPU::SI_SPILL_S384_SAVE:
2267 case AMDGPU::SI_SPILL_S352_SAVE:
2268 case AMDGPU::SI_SPILL_S320_SAVE:
2269 case AMDGPU::SI_SPILL_S288_SAVE:
2270 case AMDGPU::SI_SPILL_S256_SAVE:
2271 case AMDGPU::SI_SPILL_S224_SAVE:
2272 case AMDGPU::SI_SPILL_S192_SAVE:
2273 case AMDGPU::SI_SPILL_S160_SAVE:
2274 case AMDGPU::SI_SPILL_S128_SAVE:
2275 case AMDGPU::SI_SPILL_S96_SAVE:
2276 case AMDGPU::SI_SPILL_S64_SAVE:
2277 case AMDGPU::SI_SPILL_S32_SAVE:
2278 return spillSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2279 case AMDGPU::SI_SPILL_S1024_RESTORE:
2280 case AMDGPU::SI_SPILL_S512_RESTORE:
2281 case AMDGPU::SI_SPILL_S384_RESTORE:
2282 case AMDGPU::SI_SPILL_S352_RESTORE:
2283 case AMDGPU::SI_SPILL_S320_RESTORE:
2284 case AMDGPU::SI_SPILL_S288_RESTORE:
2285 case AMDGPU::SI_SPILL_S256_RESTORE:
2286 case AMDGPU::SI_SPILL_S224_RESTORE:
2287 case AMDGPU::SI_SPILL_S192_RESTORE:
2288 case AMDGPU::SI_SPILL_S160_RESTORE:
2289 case AMDGPU::SI_SPILL_S128_RESTORE:
2290 case AMDGPU::SI_SPILL_S96_RESTORE:
2291 case AMDGPU::SI_SPILL_S64_RESTORE:
2292 case AMDGPU::SI_SPILL_S32_RESTORE:
2293 return restoreSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2300 int SPAdj,
unsigned FIOperandNum,
2309 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2312 "unreserved scratch RSRC register");
2315 int Index =
MI->getOperand(FIOperandNum).getIndex();
2321 switch (
MI->getOpcode()) {
2323 case AMDGPU::SI_SPILL_S1024_SAVE:
2324 case AMDGPU::SI_SPILL_S512_SAVE:
2325 case AMDGPU::SI_SPILL_S384_SAVE:
2326 case AMDGPU::SI_SPILL_S352_SAVE:
2327 case AMDGPU::SI_SPILL_S320_SAVE:
2328 case AMDGPU::SI_SPILL_S288_SAVE:
2329 case AMDGPU::SI_SPILL_S256_SAVE:
2330 case AMDGPU::SI_SPILL_S224_SAVE:
2331 case AMDGPU::SI_SPILL_S192_SAVE:
2332 case AMDGPU::SI_SPILL_S160_SAVE:
2333 case AMDGPU::SI_SPILL_S128_SAVE:
2334 case AMDGPU::SI_SPILL_S96_SAVE:
2335 case AMDGPU::SI_SPILL_S64_SAVE:
2336 case AMDGPU::SI_SPILL_S32_SAVE: {
2341 case AMDGPU::SI_SPILL_S1024_RESTORE:
2342 case AMDGPU::SI_SPILL_S512_RESTORE:
2343 case AMDGPU::SI_SPILL_S384_RESTORE:
2344 case AMDGPU::SI_SPILL_S352_RESTORE:
2345 case AMDGPU::SI_SPILL_S320_RESTORE:
2346 case AMDGPU::SI_SPILL_S288_RESTORE:
2347 case AMDGPU::SI_SPILL_S256_RESTORE:
2348 case AMDGPU::SI_SPILL_S224_RESTORE:
2349 case AMDGPU::SI_SPILL_S192_RESTORE:
2350 case AMDGPU::SI_SPILL_S160_RESTORE:
2351 case AMDGPU::SI_SPILL_S128_RESTORE:
2352 case AMDGPU::SI_SPILL_S96_RESTORE:
2353 case AMDGPU::SI_SPILL_S64_RESTORE:
2354 case AMDGPU::SI_SPILL_S32_RESTORE: {
2359 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: {
2363 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2366 case AMDGPU::SI_SPILL_V1024_SAVE:
2367 case AMDGPU::SI_SPILL_V512_SAVE:
2368 case AMDGPU::SI_SPILL_V384_SAVE:
2369 case AMDGPU::SI_SPILL_V352_SAVE:
2370 case AMDGPU::SI_SPILL_V320_SAVE:
2371 case AMDGPU::SI_SPILL_V288_SAVE:
2372 case AMDGPU::SI_SPILL_V256_SAVE:
2373 case AMDGPU::SI_SPILL_V224_SAVE:
2374 case AMDGPU::SI_SPILL_V192_SAVE:
2375 case AMDGPU::SI_SPILL_V160_SAVE:
2376 case AMDGPU::SI_SPILL_V128_SAVE:
2377 case AMDGPU::SI_SPILL_V96_SAVE:
2378 case AMDGPU::SI_SPILL_V64_SAVE:
2379 case AMDGPU::SI_SPILL_V32_SAVE:
2380 case AMDGPU::SI_SPILL_V16_SAVE:
2381 case AMDGPU::SI_SPILL_A1024_SAVE:
2382 case AMDGPU::SI_SPILL_A512_SAVE:
2383 case AMDGPU::SI_SPILL_A384_SAVE:
2384 case AMDGPU::SI_SPILL_A352_SAVE:
2385 case AMDGPU::SI_SPILL_A320_SAVE:
2386 case AMDGPU::SI_SPILL_A288_SAVE:
2387 case AMDGPU::SI_SPILL_A256_SAVE:
2388 case AMDGPU::SI_SPILL_A224_SAVE:
2389 case AMDGPU::SI_SPILL_A192_SAVE:
2390 case AMDGPU::SI_SPILL_A160_SAVE:
2391 case AMDGPU::SI_SPILL_A128_SAVE:
2392 case AMDGPU::SI_SPILL_A96_SAVE:
2393 case AMDGPU::SI_SPILL_A64_SAVE:
2394 case AMDGPU::SI_SPILL_A32_SAVE:
2395 case AMDGPU::SI_SPILL_AV1024_SAVE:
2396 case AMDGPU::SI_SPILL_AV512_SAVE:
2397 case AMDGPU::SI_SPILL_AV384_SAVE:
2398 case AMDGPU::SI_SPILL_AV352_SAVE:
2399 case AMDGPU::SI_SPILL_AV320_SAVE:
2400 case AMDGPU::SI_SPILL_AV288_SAVE:
2401 case AMDGPU::SI_SPILL_AV256_SAVE:
2402 case AMDGPU::SI_SPILL_AV224_SAVE:
2403 case AMDGPU::SI_SPILL_AV192_SAVE:
2404 case AMDGPU::SI_SPILL_AV160_SAVE:
2405 case AMDGPU::SI_SPILL_AV128_SAVE:
2406 case AMDGPU::SI_SPILL_AV96_SAVE:
2407 case AMDGPU::SI_SPILL_AV64_SAVE:
2408 case AMDGPU::SI_SPILL_AV32_SAVE:
2409 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2410 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2412 AMDGPU::OpName::vdata);
2414 MI->eraseFromParent();
2418 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2422 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) {
2424 Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
2426 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE
2427 ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR
2429 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2432 auto *
MBB =
MI->getParent();
2433 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2434 if (IsWWMRegSpill) {
2440 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2441 *
MI->memoperands_begin(), RS);
2446 MI->eraseFromParent();
2449 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: {
2453 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2456 case AMDGPU::SI_SPILL_V16_RESTORE:
2457 case AMDGPU::SI_SPILL_V32_RESTORE:
2458 case AMDGPU::SI_SPILL_V64_RESTORE:
2459 case AMDGPU::SI_SPILL_V96_RESTORE:
2460 case AMDGPU::SI_SPILL_V128_RESTORE:
2461 case AMDGPU::SI_SPILL_V160_RESTORE:
2462 case AMDGPU::SI_SPILL_V192_RESTORE:
2463 case AMDGPU::SI_SPILL_V224_RESTORE:
2464 case AMDGPU::SI_SPILL_V256_RESTORE:
2465 case AMDGPU::SI_SPILL_V288_RESTORE:
2466 case AMDGPU::SI_SPILL_V320_RESTORE:
2467 case AMDGPU::SI_SPILL_V352_RESTORE:
2468 case AMDGPU::SI_SPILL_V384_RESTORE:
2469 case AMDGPU::SI_SPILL_V512_RESTORE:
2470 case AMDGPU::SI_SPILL_V1024_RESTORE:
2471 case AMDGPU::SI_SPILL_A32_RESTORE:
2472 case AMDGPU::SI_SPILL_A64_RESTORE:
2473 case AMDGPU::SI_SPILL_A96_RESTORE:
2474 case AMDGPU::SI_SPILL_A128_RESTORE:
2475 case AMDGPU::SI_SPILL_A160_RESTORE:
2476 case AMDGPU::SI_SPILL_A192_RESTORE:
2477 case AMDGPU::SI_SPILL_A224_RESTORE:
2478 case AMDGPU::SI_SPILL_A256_RESTORE:
2479 case AMDGPU::SI_SPILL_A288_RESTORE:
2480 case AMDGPU::SI_SPILL_A320_RESTORE:
2481 case AMDGPU::SI_SPILL_A352_RESTORE:
2482 case AMDGPU::SI_SPILL_A384_RESTORE:
2483 case AMDGPU::SI_SPILL_A512_RESTORE:
2484 case AMDGPU::SI_SPILL_A1024_RESTORE:
2485 case AMDGPU::SI_SPILL_AV32_RESTORE:
2486 case AMDGPU::SI_SPILL_AV64_RESTORE:
2487 case AMDGPU::SI_SPILL_AV96_RESTORE:
2488 case AMDGPU::SI_SPILL_AV128_RESTORE:
2489 case AMDGPU::SI_SPILL_AV160_RESTORE:
2490 case AMDGPU::SI_SPILL_AV192_RESTORE:
2491 case AMDGPU::SI_SPILL_AV224_RESTORE:
2492 case AMDGPU::SI_SPILL_AV256_RESTORE:
2493 case AMDGPU::SI_SPILL_AV288_RESTORE:
2494 case AMDGPU::SI_SPILL_AV320_RESTORE:
2495 case AMDGPU::SI_SPILL_AV352_RESTORE:
2496 case AMDGPU::SI_SPILL_AV384_RESTORE:
2497 case AMDGPU::SI_SPILL_AV512_RESTORE:
2498 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2499 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2500 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2502 AMDGPU::OpName::vdata);
2503 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2507 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {
2509 Opc = AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16;
2511 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE
2512 ? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR
2514 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2517 auto *
MBB =
MI->getParent();
2518 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2519 if (IsWWMRegSpill) {
2526 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2527 *
MI->memoperands_begin(), RS);
2532 MI->eraseFromParent();
2535 case AMDGPU::V_ADD_U32_e32:
2536 case AMDGPU::V_ADD_U32_e64:
2537 case AMDGPU::V_ADD_CO_U32_e32:
2538 case AMDGPU::V_ADD_CO_U32_e64: {
2540 unsigned NumDefs =
MI->getNumExplicitDefs();
2541 unsigned Src0Idx = NumDefs;
2543 bool HasClamp =
false;
2546 switch (
MI->getOpcode()) {
2547 case AMDGPU::V_ADD_U32_e32:
2549 case AMDGPU::V_ADD_U32_e64:
2550 HasClamp =
MI->getOperand(3).getImm();
2552 case AMDGPU::V_ADD_CO_U32_e32:
2553 VCCOp = &
MI->getOperand(3);
2555 case AMDGPU::V_ADD_CO_U32_e64:
2556 VCCOp = &
MI->getOperand(1);
2557 HasClamp =
MI->getOperand(4).getImm();
2562 bool DeadVCC = !VCCOp || VCCOp->
isDead();
2566 unsigned OtherOpIdx =
2567 FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2570 unsigned Src1Idx = Src0Idx + 1;
2571 Register MaterializedReg = FrameReg;
2574 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2578 if (OtherOp->
isImm()) {
2589 OtherOp->
setImm(TotalOffset);
2602 AMDGPU::VGPR_32RegClass,
MI,
false, 0);
2610 MaterializedReg = ScavengedVGPR;
2613 if ((!OtherOp->
isImm() || OtherOp->
getImm() != 0) && MaterializedReg) {
2615 !
TII->isOperandLegal(*
MI, Src1Idx, OtherOp)) {
2622 if (!ScavengedVGPR) {
2624 AMDGPU::VGPR_32RegClass,
MI,
false,
2628 assert(ScavengedVGPR != DstReg);
2633 MaterializedReg = ScavengedVGPR;
2642 AddI32.
add(
MI->getOperand(1));
2644 unsigned MaterializedRegFlags =
2647 if (
isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2652 .addReg(MaterializedReg, MaterializedRegFlags);
2657 .addReg(MaterializedReg, MaterializedRegFlags)
2661 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2662 MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2665 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2666 AddI32.setOperandDead(3);
2668 MaterializedReg = DstReg;
2674 }
else if (
Offset != 0) {
2675 assert(!MaterializedReg);
2679 if (DeadVCC && !HasClamp) {
2684 if (OtherOp->
isReg() && OtherOp->
getReg() == DstReg) {
2686 MI->eraseFromParent();
2691 MI->setDesc(
TII->get(AMDGPU::V_MOV_B32_e32));
2692 MI->removeOperand(FIOperandNum);
2694 unsigned NumOps =
MI->getNumOperands();
2695 for (
unsigned I = NumOps - 2;
I >= NumDefs + 1; --
I)
2696 MI->removeOperand(
I);
2699 MI->removeOperand(1);
2711 if (!
TII->isOperandLegal(*
MI, Src1Idx) &&
TII->commuteInstruction(*
MI)) {
2719 for (
unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {
2720 if (!
TII->isOperandLegal(*
MI, SrcIdx)) {
2724 if (!ScavengedVGPR) {
2726 AMDGPU::VGPR_32RegClass,
MI,
false,
2730 assert(ScavengedVGPR != DstReg);
2736 Src.ChangeToRegister(ScavengedVGPR,
false);
2737 Src.setIsKill(
true);
2743 if (FIOp->
isImm() && FIOp->
getImm() == 0 && DeadVCC && !HasClamp) {
2744 if (OtherOp->
isReg() && OtherOp->
getReg() != DstReg) {
2748 MI->eraseFromParent();
2753 case AMDGPU::S_ADD_I32:
2754 case AMDGPU::S_ADD_U32: {
2756 unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2763 Register MaterializedReg = FrameReg;
2766 bool DeadSCC =
MI->getOperand(3).isDead();
2790 MaterializedReg = TmpReg;
2793 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2798 if (OtherOp.
isImm()) {
2802 if (MaterializedReg)
2806 }
else if (MaterializedReg) {
2810 if (!TmpReg && MaterializedReg == FrameReg) {
2825 MaterializedReg = DstReg;
2838 if (DeadSCC && OtherOp.
isImm() && OtherOp.
getImm() == 0) {
2840 MI->removeOperand(3);
2841 MI->removeOperand(OtherOpIdx);
2842 MI->setDesc(
TII->get(FIOp->
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2843 }
else if (DeadSCC && FIOp->
isImm() && FIOp->
getImm() == 0) {
2845 MI->removeOperand(3);
2846 MI->removeOperand(FIOperandNum);
2848 TII->get(OtherOp.
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2859 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2861 if (
TII->isFLATScratch(*
MI)) {
2863 (int16_t)FIOperandNum ==
2864 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::saddr));
2871 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2882 unsigned Opc =
MI->getOpcode();
2896 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
2897 bool TiedVDst = VDstIn != -1 &&
MI->getOperand(VDstIn).isReg() &&
2898 MI->getOperand(VDstIn).isTied();
2900 MI->untieRegOperand(VDstIn);
2903 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr));
2907 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2909 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2910 assert(NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2911 MI->tieOperands(NewVDst, NewVDstIn);
2913 MI->setDesc(
TII->get(NewOpc));
2921 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp))
2928 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, FIOp);
2930 if (!
Offset && FrameReg && UseSGPR) {
2936 UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
2943 if ((!FrameReg || !
Offset) && TmpReg) {
2944 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2947 MIB.addReg(FrameReg);
2954 bool NeedSaveSCC = RS->
isRegUsed(AMDGPU::SCC) &&
2955 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
2960 MI,
false, 0, !UseSGPR);
2964 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2975 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2995 if (TmpSReg == FrameReg) {
2998 !
MI->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
3022 bool IsMUBUF =
TII->isMUBUF(*
MI);
3028 bool LiveSCC = RS->
isRegUsed(AMDGPU::SCC) &&
3029 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
3031 ? &AMDGPU::SReg_32RegClass
3032 : &AMDGPU::VGPR_32RegClass;
3033 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
3034 MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
3035 MI->getOpcode() == AMDGPU::S_MOV_B32;
3037 IsCopy ?
MI->getOperand(0).getReg()
3040 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3043 IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
3045 if (IsSALU && LiveSCC) {
3051 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
3057 if (IsSALU && !LiveSCC)
3058 Shift.getInstr()->getOperand(3).setIsDead();
3059 if (IsSALU && LiveSCC) {
3063 &AMDGPU::SReg_32_XM0RegClass);
3064 NewDest = ResultReg;
3071 ResultReg = NewDest;
3076 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
3086 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
3098 "Need to reuse carry out register");
3103 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
3105 ConstOffsetReg = MIB.
getReg(1);
3116 if (!MIB || IsSALU) {
3123 Register TmpScaledReg = IsCopy && IsSALU
3126 AMDGPU::SReg_32_XM0RegClass,
MI,
3128 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
3140 AMDGPU::VGPR_32RegClass,
MI,
false, 0,
true);
3143 if ((
Add =
TII->getAddNoCarry(*
MBB,
MI,
DL, TmpResultReg, *RS))) {
3148 if (
Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
3158 "offset is unsafe for v_mad_u32_u24");
3167 bool IsInlinableLiteral =
3169 if (!IsInlinableLiteral) {
3178 if (!IsInlinableLiteral) {
3194 &AMDGPU::SReg_32_XM0RegClass);
3195 NewDest = ResultReg;
3198 AMDGPU::SReg_32_XM0RegClass, *
Add,
false, 0,
3205 ResultReg = NewDest;
3211 ResultReg = TmpResultReg;
3213 if (!TmpScaledReg.
isValid()) {
3226 MI->eraseFromParent();
3236 static_cast<int>(FIOperandNum) ==
3237 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::vaddr));
3239 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
3240 assert((SOffset.isImm() && SOffset.getImm() == 0));
3242 if (FrameReg != AMDGPU::NoRegister)
3243 SOffset.ChangeToRegister(FrameReg,
false);
3245 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3247 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
3248 int64_t NewOffset = OldImm +
Offset;
3250 if (
TII->isLegalMUBUFImmOffset(NewOffset) &&
3252 MI->eraseFromParent();
3261 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp)) {
3283 return &AMDGPU::VReg_64RegClass;
3285 return &AMDGPU::VReg_96RegClass;
3287 return &AMDGPU::VReg_128RegClass;
3289 return &AMDGPU::VReg_160RegClass;
3291 return &AMDGPU::VReg_192RegClass;
3293 return &AMDGPU::VReg_224RegClass;
3295 return &AMDGPU::VReg_256RegClass;
3297 return &AMDGPU::VReg_288RegClass;
3299 return &AMDGPU::VReg_320RegClass;
3301 return &AMDGPU::VReg_352RegClass;
3303 return &AMDGPU::VReg_384RegClass;
3305 return &AMDGPU::VReg_512RegClass;
3307 return &AMDGPU::VReg_1024RegClass;
3315 return &AMDGPU::VReg_64_Align2RegClass;
3317 return &AMDGPU::VReg_96_Align2RegClass;
3319 return &AMDGPU::VReg_128_Align2RegClass;
3321 return &AMDGPU::VReg_160_Align2RegClass;
3323 return &AMDGPU::VReg_192_Align2RegClass;
3325 return &AMDGPU::VReg_224_Align2RegClass;
3327 return &AMDGPU::VReg_256_Align2RegClass;
3329 return &AMDGPU::VReg_288_Align2RegClass;
3331 return &AMDGPU::VReg_320_Align2RegClass;
3333 return &AMDGPU::VReg_352_Align2RegClass;
3335 return &AMDGPU::VReg_384_Align2RegClass;
3337 return &AMDGPU::VReg_512_Align2RegClass;
3339 return &AMDGPU::VReg_1024_Align2RegClass;
3347 return &AMDGPU::VReg_1RegClass;
3349 return &AMDGPU::VGPR_16RegClass;
3351 return &AMDGPU::VGPR_32RegClass;
3359 return &AMDGPU::AReg_64RegClass;
3361 return &AMDGPU::AReg_96RegClass;
3363 return &AMDGPU::AReg_128RegClass;
3365 return &AMDGPU::AReg_160RegClass;
3367 return &AMDGPU::AReg_192RegClass;
3369 return &AMDGPU::AReg_224RegClass;
3371 return &AMDGPU::AReg_256RegClass;
3373 return &AMDGPU::AReg_288RegClass;
3375 return &AMDGPU::AReg_320RegClass;
3377 return &AMDGPU::AReg_352RegClass;
3379 return &AMDGPU::AReg_384RegClass;
3381 return &AMDGPU::AReg_512RegClass;
3383 return &AMDGPU::AReg_1024RegClass;
3391 return &AMDGPU::AReg_64_Align2RegClass;
3393 return &AMDGPU::AReg_96_Align2RegClass;
3395 return &AMDGPU::AReg_128_Align2RegClass;
3397 return &AMDGPU::AReg_160_Align2RegClass;
3399 return &AMDGPU::AReg_192_Align2RegClass;
3401 return &AMDGPU::AReg_224_Align2RegClass;
3403 return &AMDGPU::AReg_256_Align2RegClass;
3405 return &AMDGPU::AReg_288_Align2RegClass;
3407 return &AMDGPU::AReg_320_Align2RegClass;
3409 return &AMDGPU::AReg_352_Align2RegClass;
3411 return &AMDGPU::AReg_384_Align2RegClass;
3413 return &AMDGPU::AReg_512_Align2RegClass;
3415 return &AMDGPU::AReg_1024_Align2RegClass;
3423 return &AMDGPU::AGPR_LO16RegClass;
3425 return &AMDGPU::AGPR_32RegClass;
3433 return &AMDGPU::AV_64RegClass;
3435 return &AMDGPU::AV_96RegClass;
3437 return &AMDGPU::AV_128RegClass;
3439 return &AMDGPU::AV_160RegClass;
3441 return &AMDGPU::AV_192RegClass;
3443 return &AMDGPU::AV_224RegClass;
3445 return &AMDGPU::AV_256RegClass;
3447 return &AMDGPU::AV_288RegClass;
3449 return &AMDGPU::AV_320RegClass;
3451 return &AMDGPU::AV_352RegClass;
3453 return &AMDGPU::AV_384RegClass;
3455 return &AMDGPU::AV_512RegClass;
3457 return &AMDGPU::AV_1024RegClass;
3465 return &AMDGPU::AV_64_Align2RegClass;
3467 return &AMDGPU::AV_96_Align2RegClass;
3469 return &AMDGPU::AV_128_Align2RegClass;
3471 return &AMDGPU::AV_160_Align2RegClass;
3473 return &AMDGPU::AV_192_Align2RegClass;
3475 return &AMDGPU::AV_224_Align2RegClass;
3477 return &AMDGPU::AV_256_Align2RegClass;
3479 return &AMDGPU::AV_288_Align2RegClass;
3481 return &AMDGPU::AV_320_Align2RegClass;
3483 return &AMDGPU::AV_352_Align2RegClass;
3485 return &AMDGPU::AV_384_Align2RegClass;
3487 return &AMDGPU::AV_512_Align2RegClass;
3489 return &AMDGPU::AV_1024_Align2RegClass;
3497 return &AMDGPU::AV_32RegClass;
3506 return &AMDGPU::SReg_32RegClass;
3508 return &AMDGPU::SReg_64RegClass;
3510 return &AMDGPU::SGPR_96RegClass;
3512 return &AMDGPU::SGPR_128RegClass;
3514 return &AMDGPU::SGPR_160RegClass;
3516 return &AMDGPU::SGPR_192RegClass;
3518 return &AMDGPU::SGPR_224RegClass;
3520 return &AMDGPU::SGPR_256RegClass;
3522 return &AMDGPU::SGPR_288RegClass;
3524 return &AMDGPU::SGPR_320RegClass;
3526 return &AMDGPU::SGPR_352RegClass;
3528 return &AMDGPU::SGPR_384RegClass;
3530 return &AMDGPU::SGPR_512RegClass;
3532 return &AMDGPU::SGPR_1024RegClass;
3540 if (Reg.isVirtual())
3541 RC =
MRI.getRegClass(Reg);
3543 RC = getPhysRegBaseClass(Reg);
3549 unsigned Size = getRegSizeInBits(*SRC);
3551 assert(VRC &&
"Invalid register class size");
3557 unsigned Size = getRegSizeInBits(*SRC);
3559 assert(ARC &&
"Invalid register class size");
3565 unsigned Size = getRegSizeInBits(*VRC);
3567 return &AMDGPU::SGPR_32RegClass;
3569 assert(SRC &&
"Invalid register class size");
3576 unsigned SubIdx)
const {
3579 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
3580 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
3605 if (ReserveHighestRegister) {
3607 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
3611 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
3628 unsigned EltSize)
const {
3630 assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);
3632 const unsigned RegHalves = RegBitWidth / 16;
3633 const unsigned EltHalves = EltSize / 2;
3634 assert(RegSplitParts.size() + 1 >= EltHalves);
3636 const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];
3637 const unsigned NumParts = RegHalves / EltHalves;
3639 return ArrayRef(Parts.data(), NumParts);
3645 return Reg.isVirtual() ?
MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3652 return getSubRegisterClass(SrcRC, MO.
getSubReg());
3677 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3678 unsigned DstSize = getRegSizeInBits(*DstRC);
3679 unsigned NewSize = getRegSizeInBits(*NewRC);
3685 if (SrcSize <= 32 || DstSize <= 32)
3688 return NewSize <= DstSize || NewSize <= SrcSize;
3694 switch (RC->
getID()) {
3696 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3697 case AMDGPU::VGPR_32RegClassID:
3703 case AMDGPU::SGPR_32RegClassID:
3704 case AMDGPU::SGPR_LO16RegClassID:
3710 unsigned Idx)
const {
3711 if (
Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3712 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3716 if (
Idx == AMDGPU::RegisterPressureSets::SReg_32)
3724 static const int Empty[] = { -1 };
3726 if (RegPressureIgnoredUnits[RegUnit])
3729 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3742 std::pair<unsigned, Register> Hint =
MRI.getRegAllocationHint(VirtReg);
3744 switch (Hint.first) {
3751 getMatchingSuperReg(Paired, AMDGPU::lo16, &AMDGPU::VGPR_32RegClass);
3752 }
else if (VRM && VRM->
hasPhys(Paired)) {
3753 PairedPhys = getMatchingSuperReg(VRM->
getPhys(Paired), AMDGPU::lo16,
3754 &AMDGPU::VGPR_32RegClass);
3769 PairedPhys =
TRI->getSubReg(Paired, AMDGPU::lo16);
3770 }
else if (VRM && VRM->
hasPhys(Paired)) {
3771 PairedPhys =
TRI->getSubReg(VRM->
getPhys(Paired), AMDGPU::lo16);
3786 if (AMDGPU::VGPR_16RegClass.
contains(PhysReg) &&
3787 !
MRI.isReserved(PhysReg))
3801 return AMDGPU::SGPR30_SGPR31;
3807 switch (RB.
getID()) {
3808 case AMDGPU::VGPRRegBankID:
3811 case AMDGPU::VCCRegBankID:
3814 case AMDGPU::SGPRRegBankID:
3816 case AMDGPU::AGPRRegBankID:
3827 if (
const RegisterBank *RB = dyn_cast<const RegisterBank *>(RCOrRB))
3830 if (
const auto *RC = dyn_cast<const TargetRegisterClass *>(RCOrRB))
3831 return getAllocatableClass(RC);
3837 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3841 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3847 : &AMDGPU::VReg_64RegClass;
3852 switch ((
int)RCID) {
3853 case AMDGPU::SReg_1RegClassID:
3855 case AMDGPU::SReg_1_XEXECRegClassID:
3860 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3873 if (Reg.isVirtual()) {
3878 :
MRI.getMaxLaneMaskForVReg(Reg);
3882 if ((S.LaneMask & SubLanes) == SubLanes) {
3883 V = S.getVNInfoAt(UseIdx);
3895 for (
MCRegUnit Unit : regunits(Reg.asMCReg())) {
3910 if (!Def || !MDT.dominates(Def, &
Use))
3913 assert(Def->modifiesRegister(Reg,
this));
3919 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3922 AMDGPU::SReg_32RegClass,
3923 AMDGPU::AGPR_32RegClass } ) {
3924 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3927 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3928 &AMDGPU::VGPR_32RegClass)) {
3932 return AMDGPU::NoRegister;
3955 unsigned Size = getRegSizeInBits(*RC);
3989 return std::min(128u, getSubRegIdxSize(
SubReg));
3993 return std::min(32u, getSubRegIdxSize(
SubReg));
4002 bool IncludeCalls)
const {
4004 if (
MRI.isPhysRegUsed(Reg, !IncludeCalls))
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
Analysis containing CSE Info
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, const SIInstrInfo *TII)
static void emitUnsupportedError(const Function &Fn, const MachineInstr &MI, const Twine &ErrMsg)
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI, const MachineInstr &MI)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static const char * getRegisterName(MCRegister Reg)
bool isBottomOfStack() const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
bool empty() const
empty - Tests whether there are no bits in this bitvector.
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
const SIRegisterInfo * getRegisterInfo() const override
const SIFrameLowering * getFrameLowering() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasFlatScratchSTMode() const
unsigned getMaxWaveScratchSize() const
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
MachineDominatorTree & getDomTree()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
LLVM_ABI void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static bool isVOP3(const MachineInstr &MI)
static bool isFLATScratch(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
BitVector getNonWWMRegMask() const
bool checkFlag(Register Reg, uint8_t Flag) const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
SmallVector< StringLiteral > getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC, bool IncludeCalls=true) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
const uint8_t TSFlags
Configurable target specific flags.
ArrayRef< MCPhysReg > getRegisters() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
virtual bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM=nullptr, const LiveRegMatrix *Matrix=nullptr) const
Get a list of 'hint' registers that the register allocator should try first when allocating a physica...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
auto reverse(ContainerTy &&C)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
unsigned getDefRegState(bool B)
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.