84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping()
override {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (
Opc == AMDGPU::G_ANYEXT ||
Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(
S32,
Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(
S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (
Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0, E =
MI.getNumExplicitDefs();
I != E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
478 switch (
MI.getOpcode()) {
479 case TargetOpcode::G_CONSTANT:
480 case TargetOpcode::G_IMPLICIT_DEF: {
484 { { AMDGPU::VGPRRegBankID }, 1 },
485 { { AMDGPU::SGPRRegBankID }, 1 },
486 { { AMDGPU::VCCRegBankID }, 1 }
489 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
494 case TargetOpcode::G_FCONSTANT:
495 case TargetOpcode::G_FRAME_INDEX:
496 case TargetOpcode::G_GLOBAL_VALUE: {
498 { { AMDGPU::VGPRRegBankID }, 1 },
499 { { AMDGPU::SGPRRegBankID }, 1 }
502 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
504 case TargetOpcode::G_AND:
505 case TargetOpcode::G_OR:
506 case TargetOpcode::G_XOR: {
513 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
514 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
515 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
521 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
522 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
523 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
534 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
535 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
536 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
542 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
543 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
544 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
549 case TargetOpcode::G_LOAD:
550 case TargetOpcode::G_ZEXTLOAD:
551 case TargetOpcode::G_SEXTLOAD: {
553 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
562 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
563 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
571 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
572 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
585 case TargetOpcode::G_SELECT: {
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
590 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
591 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
597 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
598 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
599 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
605 case TargetOpcode::G_UADDE:
606 case TargetOpcode::G_USUBE:
607 case TargetOpcode::G_SADDE:
608 case TargetOpcode::G_SSUBE: {
612 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
615 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
616 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
622 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
623 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
624 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
625 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
630 case AMDGPU::G_BRCOND: {
631 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
636 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
642 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
647 case AMDGPU::G_INTRINSIC:
648 case AMDGPU::G_INTRINSIC_CONVERGENT:
650 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
651 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
666 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
667 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
669 MRI->setRegBank(LoLHS, *Bank);
670 MRI->setRegBank(HiLHS, *Bank);
675 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
686 MRI.setType(Reg, NewTy);
706 LLT Ty =
MRI.getType(Src);
709 if (Bank == &AMDGPU::SGPRRegBank)
715 if (Bank != &AMDGPU::VGPRRegBank) {
717 Src =
B.buildCopy(Ty, Src).getReg(0);
718 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
722 unsigned NumParts = Bits / 32;
729 auto Unmerge =
B.buildUnmerge(
S32, Src);
730 for (
unsigned i = 0; i < NumParts; ++i)
734 for (
unsigned i = 0; i < NumParts; ++i) {
736 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
737 MRI.setType(DstPart, NumParts == 1 ? Ty :
S32);
742 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
744 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
786 const unsigned MovExecOpc =
788 const unsigned MovExecTermOpc =
792 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
794 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
796 AMDGPU::EXEC_LO : AMDGPU::EXEC;
799 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
803 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
804 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
807 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
808 .addDef(InitSaveExecReg);
810 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
811 Register NewExec =
MRI.createVirtualRegister(WaveRC);
837 B.setInsertPt(*LoopBB, LoopBB->
end());
839 B.buildInstr(TargetOpcode::PHI)
841 .addReg(InitSaveExecReg)
856 auto NewEnd = BodyBB->
end();
863 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
868 if (!SGPROperandRegs.
count(OldReg))
873 auto OldVal = WaterfalledRegMap.
find(OldReg);
874 if (OldVal != WaterfalledRegMap.
end()) {
875 Op.setReg(OldVal->second);
880 LLT OpTy =
MRI.getType(OpReg);
883 if (OpBank != &AMDGPU::VGPRRegBank) {
886 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
887 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
895 bool Is64 = OpSize % 64 == 0;
896 unsigned PartSize = Is64 ? 64 : 32;
898 unsigned NumParts = OpSize / PartSize;
904 CurrentLaneParts.
push_back(CurrentLaneReg);
906 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
907 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
908 for (
unsigned i = 0; i < NumParts; ++i) {
910 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
911 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
912 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
916 for (
unsigned i = 0; i < NumParts; ++i) {
918 OpParts[i]).getReg(0);
919 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
924 CondReg =
B.buildAnd(
S1, CondReg, CmpReg).getReg(0);
925 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
929 Op.setReg(CurrentLaneReg);
932 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
937 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
941 MRI.setRegClass(CondReg, WaveRC);
944 B.buildInstr(AndSaveExecOpc)
948 MRI.setSimpleHint(NewExec, CondReg);
950 B.setInsertPt(*BodyBB, BodyBB->
end());
953 B.buildInstr(XorTermOpc)
962 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
969 B.setMBB(*RestoreExecBB);
970 B.buildInstr(MovExecTermOpc)
972 .addReg(SaveExecReg);
976 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
988 for (
unsigned Op : OpIndices) {
992 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
993 SGPROperandRegs.
insert(Reg);
997 return !SGPROperandRegs.
empty();
1020 if (Bank == &AMDGPU::SGPRRegBank)
1024 MI.getOperand(
OpIdx).setReg(Reg);
1036 assert(FirstSize % EltSize == 0);
1038 unsigned FirstPartNumElts = FirstSize / EltSize;
1039 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1060 const LLT LoadTy =
MRI.getType(DstReg);
1063 const unsigned MaxNonSmrdLoadSize = 128;
1067 if (DstBank == &AMDGPU::SGPRRegBank) {
1078 if (LoadSize == 32 &&
1082 if (LoadSize == 32 &&
1091 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1093 if (LoadSize == 32) {
1097 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1099 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1100 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1101 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1103 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1104 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1107 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1121 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1123 B.buildTrunc(
MI.getOperand(0), WideLoad);
1125 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1130 MI.eraseFromParent();
1135 if (LoadSize <= MaxNonSmrdLoadSize)
1140 if (SrcRegs.
empty())
1146 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1147 MRI.setType(BasePtrReg, PtrTy);
1153 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1154 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1155 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1156 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1168 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1179 const auto &TFI = *ST.getFrameLowering();
1184 "Stack grows upwards for AMDGPU");
1187 Register AllocSize =
MI.getOperand(1).getReg();
1192 if (SizeBank != &AMDGPU::SGPRRegBank) {
1193 auto WaveReduction =
1194 B.buildIntrinsic(Intrinsic::amdgcn_wave_reduce_umax, {
LLT::scalar(32)})
1197 AllocSize = WaveReduction.getReg(0);
1200 LLT PtrTy =
MRI.getType(Dst);
1205 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1207 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1208 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1210 auto OldSP =
B.buildCopy(PtrTy,
SPReg);
1211 if (Alignment > TFI.getStackAlign()) {
1212 auto StackAlignMask = (Alignment.
value() << ST.getWavefrontSizeLog2()) - 1;
1213 auto Tmp1 =
B.buildPtrAdd(PtrTy, OldSP,
1215 B.buildMaskLowPtrBits(Dst, Tmp1,
1216 Log2(Alignment) + ST.getWavefrontSizeLog2());
1218 B.buildCopy(Dst, OldSP);
1220 auto PtrAdd =
B.buildPtrAdd(PtrTy, Dst, ScaledSize);
1221 B.buildCopy(
SPReg, PtrAdd);
1222 MI.eraseFromParent();
1229 int RsrcIdx)
const {
1230 const int NumDefs =
MI.getNumExplicitDefs();
1234 RsrcIdx += NumDefs + 1;
1241 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1242 if (!
MI.getOperand(
I).isReg())
1246 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1258 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1262 if (std::optional<int64_t> Imm =
1266 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1267 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1268 InstOffsetVal = ImmOffset;
1270 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1271 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1272 return SOffset + ImmOffset;
1287 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1288 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1289 InstOffsetVal = ImmOffset;
1295 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1296 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1298 InstOffsetVal = ImmOffset;
1312 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1318 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1328 VOffsetReg = CombinedOffset;
1330 VOffsetReg =
B.buildCopy(
S32, CombinedOffset).getReg(0);
1331 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1334 SOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1335 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1341 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
1342 return AMDGPU::G_AMDGPU_BUFFER_LOAD;
1343 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
1344 return AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
1345 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
1346 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE;
1347 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
1348 return AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
1349 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT:
1350 return AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
1364 LLT Ty =
MRI.getType(Dst);
1370 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1371 OffsetBank == &AMDGPU::SGPRRegBank)
1379 if (LoadSize == 256 || LoadSize == 512) {
1380 NumLoads = LoadSize / 128;
1381 Ty = Ty.
divide(NumLoads);
1386 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1392 int64_t ImmOffset = 0;
1395 SOffset, ImmOffset, Alignment);
1400 const Align MemAlign(4);
1414 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1421 for (
int i = 0; i < NumLoads; ++i) {
1422 if (NumLoads == 1) {
1425 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1426 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1434 .addDef(LoadParts[i])
1439 .addImm(ImmOffset + 16 * i)
1442 .addMemOperand(MMO);
1448 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1451 B.setInstr(*Span.
begin());
1452 MI.eraseFromParent();
1456 OpsToWaterfall.
insert(RSrc);
1461 if (NumLoads != 1) {
1463 B.buildConcatVectors(Dst, LoadParts);
1465 B.buildMergeLikeInstr(Dst, LoadParts);
1469 if (RSrcBank == &AMDGPU::SGPRRegBank)
1470 MI.eraseFromParent();
1485 LLT Ty =
MRI.getType(DstReg);
1489 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1490 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1491 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1492 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1496 if (DstBank == &AMDGPU::VGPRRegBank) {
1502 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1506 auto ShiftOffset =
Signed ?
B.buildAShr(
S64, SrcReg, OffsetReg)
1507 :
B.buildLShr(
S64, SrcReg, OffsetReg);
1508 auto UnmergeSOffset =
B.buildUnmerge({
S32,
S32}, ShiftOffset);
1515 auto Zero =
B.buildConstant(
S32, 0);
1516 auto WidthImm = ConstWidth->Value.getZExtValue();
1517 if (WidthImm <= 32) {
1521 Signed ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1522 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1524 Signed ?
B.buildAShr(
S32, Extract,
B.buildConstant(
S32, 31)) : Zero;
1525 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1529 auto UpperWidth =
B.buildConstant(
S32, WidthImm - 32);
1532 ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1533 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1534 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1536 MI.eraseFromParent();
1542 auto ExtShift =
B.buildSub(
S32,
B.buildConstant(
S32, 64), WidthReg);
1543 auto SignBit =
B.buildShl(
S64, ShiftOffset, ExtShift);
1545 B.buildAShr(
S64, SignBit, ExtShift);
1547 B.buildLShr(
S64, SignBit, ExtShift);
1548 MI.eraseFromParent();
1554 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1557 auto OffsetMask =
B.buildConstant(
S32, maskTrailingOnes<unsigned>(6));
1558 auto ClampOffset =
B.buildAnd(
S32, OffsetReg, OffsetMask);
1561 auto ShiftWidth =
B.buildShl(
S32, WidthReg,
B.buildConstant(
S32, 16));
1566 auto MergedInputs =
B.buildOr(
S32, ClampOffset, ShiftWidth);
1570 unsigned Opc = Ty ==
S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1571 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1573 auto MIB =
B.buildInstr(
Opc, {DstReg}, {SrcReg, MergedInputs});
1577 MI.eraseFromParent();
1595 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1598 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1602 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1603 bool Accumulate =
true;
1612 Register DstLo =
B.buildMul(
S32, Src0, Src1).getReg(0);
1613 bool MulHiInVgpr =
false;
1615 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1618 DstHi = IsUnsigned ?
B.buildUMulH(
S32, Src0, Src1).getReg(0)
1619 :
B.buildSMulH(
S32, Src0, Src1).getReg(0);
1620 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1625 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1626 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1628 DstHi = IsUnsigned ?
B.buildUMulH(
S32, VSrc0, VSrc1).getReg(0)
1629 :
B.buildSMulH(
S32, VSrc0, VSrc1).getReg(0);
1630 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1646 LLT CarryType = DstOnValu ?
S1 :
S32;
1648 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1650 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1655 Zero =
B.buildConstant(
S32, 0).getReg(0);
1656 MRI.setRegBank(Zero,
1657 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1661 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1662 : AMDGPU::SGPRRegBank);
1664 if (DstOnValu && !MulHiInVgpr) {
1665 Carry =
B.buildTrunc(
S1, Carry).getReg(0);
1666 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1672 DstLo =
B.buildCopy(
S32, DstLo).getReg(0);
1673 DstHi =
B.buildCopy(
S32, DstHi).getReg(0);
1674 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1675 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1678 auto Unmerge =
B.buildUnmerge(
S32, Src2);
1679 Register Src2Lo = Unmerge.getReg(0);
1680 Register Src2Hi = Unmerge.getReg(1);
1681 MRI.setRegBank(Src2Lo, DstBank);
1682 MRI.setRegBank(Src2Hi, DstBank);
1686 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1688 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1689 MRI.setRegBank(Carry, CarryBank);
1692 auto AddLo =
B.buildUAddo(
S32, CarryType, DstLo, Src2Lo);
1693 DstLo = AddLo.getReg(0);
1694 Register CarryLo = AddLo.getReg(1);
1695 MRI.setRegBank(DstLo, DstBank);
1696 MRI.setRegBank(CarryLo, CarryBank);
1698 auto AddHi =
B.buildUAdde(
S32, CarryType, DstHi, Src2Hi, CarryLo);
1699 DstHi = AddHi.getReg(0);
1700 MRI.setRegBank(DstHi, DstBank);
1702 Register CarryHi = AddHi.getReg(1);
1703 MRI.setRegBank(CarryHi, CarryBank);
1708 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1709 MRI.setRegBank(Carry, CarryBank);
1713 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1714 MRI.setRegBank(Carry, CarryBank);
1718 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1721 B.buildCopy(Dst1, Carry);
1723 B.buildTrunc(Dst1, Carry);
1726 MI.eraseFromParent();
1733 case TargetOpcode::G_ASHR:
1734 case TargetOpcode::G_SMIN:
1735 case TargetOpcode::G_SMAX:
1736 return TargetOpcode::G_SEXT;
1737 case TargetOpcode::G_LSHR:
1738 case TargetOpcode::G_UMIN:
1739 case TargetOpcode::G_UMAX:
1740 return TargetOpcode::G_ZEXT;
1742 return TargetOpcode::G_ANYEXT;
1748static std::pair<Register, Register>
1751 auto Bitcast =
B.buildBitcast(
S32, Src);
1753 if (ExtOpcode == TargetOpcode::G_SEXT) {
1754 auto ExtLo =
B.buildSExtInReg(
S32, Bitcast, 16);
1755 auto ShiftHi =
B.buildAShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1756 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1759 auto ShiftHi =
B.buildLShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1760 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1761 auto ExtLo =
B.buildAnd(
S32, Bitcast,
B.buildConstant(
S32, 0xffff));
1762 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1765 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1766 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1774 if (!SrcReg.
empty()) {
1791 LLT StoreVT =
MRI.getType(Reg);
1795 auto Unmerge =
B.buildUnmerge(
S16, Reg);
1799 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
1809static std::pair<Register, unsigned>
1813 return std::pair(
Register(), Const);
1817 return std::pair(
Base, Const);
1820 return std::pair(Reg, 0);
1823std::pair<Register, unsigned>
1836 if (ImmOffset != 0) {
1845 unsigned Overflow = ImmOffset & ~MaxImm;
1846 ImmOffset -= Overflow;
1847 if ((int32_t)Overflow < 0) {
1848 Overflow += ImmOffset;
1853 if (Overflow != 0) {
1855 BaseReg =
B.buildConstant(
S32, Overflow).getReg(0);
1857 auto OverflowVal =
B.buildConstant(
S32, Overflow);
1858 BaseReg =
B.buildAdd(
S32, BaseReg, OverflowVal).getReg(0);
1864 BaseReg =
B.buildConstant(
S32, 0).getReg(0);
1866 return {BaseReg, C1};
1872 LLT SrcTy =
MRI.getType(SrcReg);
1875 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1882 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1883 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1885 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1887 .addUse(SrcReg, 0, AMDGPU::sub0);
1888 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1890 .addUse(SrcReg, 0, AMDGPU::sub1);
1891 B.buildInstr(AMDGPU::REG_SEQUENCE)
1894 .addImm(AMDGPU::sub0)
1896 .addImm(AMDGPU::sub1);
1907 unsigned ConstOffset) {
1913 auto MaterializedOffset =
B.buildConstant(
S32, ConstOffset);
1915 auto Add =
B.buildAdd(
S32, WaterfallIdx, MaterializedOffset);
1916 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1917 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1929 bool IsBooleanSrc =
false) {
1930 if (ExtOpc == AMDGPU::G_ZEXT) {
1931 B.buildConstant(Hi32Reg, 0);
1932 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1936 B.buildCopy(Hi32Reg, Lo32Reg);
1940 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1941 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1944 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1945 B.buildUndef(Hi32Reg);
1949bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1951 const OperandsMapper &OpdMapper)
const {
1958 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1960 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1962 LLT VecTy =
MRI.getType(VecReg);
1973 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1975 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1978 (DstBank == AMDGPU::SGPRRegBank &&
1979 SrcBank == AMDGPU::SGPRRegBank &&
1980 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1981 : AMDGPU::VCCRegBank;
1984 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1985 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
1986 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1991 unsigned NumLanes = DstRegs.size();
1995 EltTy =
MRI.getType(DstRegs[0]);
1997 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1999 for (
unsigned L = 0;
L < NumLanes; ++
L)
2000 Res[L] = UnmergeToEltTy.getReg(L);
2002 for (
unsigned I = 1;
I < NumElem; ++
I) {
2003 auto IC =
B.buildConstant(
S32,
I);
2004 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2006 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2008 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2009 auto S =
B.buildSelect(EltTy, Cmp,
2010 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
2012 for (
unsigned N : { 0, 2, 3 })
2013 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
2015 Res[
L] = S->getOperand(0).getReg();
2019 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2020 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
2021 B.buildCopy(DstReg, Res[L]);
2022 MRI.setRegBank(DstReg, DstBank);
2025 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2026 MI.eraseFromParent();
2037 if (CurrBank && *CurrBank != Bank) {
2038 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2039 MRI.setRegBank(Copy, Bank);
2043 MRI.setRegBank(Reg, Bank);
2047bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2049 const OperandsMapper &OpdMapper)
const {
2056 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2058 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2060 LLT VecTy =
MRI.getType(VecReg);
2071 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2073 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2075 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2078 (DstBank == AMDGPU::SGPRRegBank &&
2079 SrcBank == AMDGPU::SGPRRegBank &&
2080 InsBank == AMDGPU::SGPRRegBank &&
2081 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2082 : AMDGPU::VCCRegBank;
2085 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2086 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
2087 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2092 unsigned NumLanes = InsRegs.size();
2095 InsRegs.push_back(
MI.getOperand(2).getReg());
2097 EltTy =
MRI.getType(InsRegs[0]);
2100 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2103 for (
unsigned I = 0;
I < NumElem; ++
I) {
2104 auto IC =
B.buildConstant(
S32,
I);
2105 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2107 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2109 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2111 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2122 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2123 B.buildBuildVector(
MI.getOperand(0), Ops);
2125 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2126 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2127 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2130 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2131 MI.eraseFromParent();
2144 if (DefRegs.
empty()) {
2152 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2163 if (Src0Regs.
empty())
2168 if (Src1Regs.
empty())
2191 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2193 Register Hi =
B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2194 Register MulLoHi =
B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2196 Register MulHiLo =
B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2197 B.buildAdd(DefRegs[1],
Add, MulHiLo);
2198 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2200 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2201 MI.eraseFromParent();
2207 B.setInstrAndDebugLoc(
MI);
2208 unsigned Opc =
MI.getOpcode();
2211 case AMDGPU::G_CONSTANT:
2212 case AMDGPU::G_IMPLICIT_DEF: {
2214 LLT DstTy =
MRI.getType(DstReg);
2220 if (DstBank == &AMDGPU::VCCRegBank)
2223 if (DefRegs.
empty())
2226 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2229 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2231 MI.getOperand(0).setReg(NewDstReg);
2232 if (
Opc != AMDGPU::G_IMPLICIT_DEF) {
2233 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2234 MI.getOperand(1).setCImm(
2238 MRI.setRegBank(NewDstReg, *DstBank);
2239 B.buildTrunc(DefRegs[0], NewDstReg);
2242 case AMDGPU::G_PHI: {
2244 LLT DstTy =
MRI.getType(DstReg);
2251 if (DstBank == &AMDGPU::VCCRegBank) {
2258 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2262 if (SrcBank != &AMDGPU::VCCRegBank) {
2267 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2268 MI.getOperand(
I).setReg(Copy.getReg(0));
2279 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2280 B.setInsertPt(
B.getMBB(),
MI);
2288 case AMDGPU::G_FCMP:
2292 case AMDGPU::G_ICMP:
2293 case AMDGPU::G_UADDO:
2294 case AMDGPU::G_USUBO:
2295 case AMDGPU::G_UADDE:
2296 case AMDGPU::G_SADDE:
2297 case AMDGPU::G_USUBE:
2298 case AMDGPU::G_SSUBE: {
2299 unsigned BoolDstOp =
2300 (
Opc == AMDGPU::G_ICMP ||
Opc == AMDGPU::G_FCMP) ? 0 : 1;
2301 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2305 if (DstBank != &AMDGPU::SGPRRegBank)
2308 const bool HasCarryIn =
MI.getNumOperands() == 5;
2314 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2315 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2319 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2320 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2321 MI.getOperand(4).setReg(NewSrcReg);
2325 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2330 if (DefRegs.
empty())
2332 B.buildTrunc(DefRegs[0], NewDstReg);
2335 case AMDGPU::G_SELECT: {
2337 LLT DstTy =
MRI.getType(DstReg);
2340 if (CondRegs.
empty())
2347 if (CondBank == &AMDGPU::SGPRRegBank) {
2350 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2352 MI.getOperand(1).setReg(NewCondReg);
2353 B.buildZExt(NewCondReg, CondRegs[0]);
2366 if (DefRegs.
empty()) {
2371 if (Src1Regs.
empty())
2377 if (Src2Regs.
empty())
2384 auto Flags =
MI.getFlags();
2385 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0], Flags);
2386 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1], Flags);
2388 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2389 MI.eraseFromParent();
2392 case AMDGPU::G_BRCOND: {
2393 Register CondReg =
MI.getOperand(0).getReg();
2398 if (CondBank == &AMDGPU::SGPRRegBank) {
2401 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2403 MI.getOperand(0).setReg(NewCondReg);
2404 B.buildZExt(NewCondReg, CondReg);
2412 case AMDGPU::G_XOR: {
2416 LLT DstTy =
MRI.getType(DstReg);
2422 if (DstBank == &AMDGPU::VCCRegBank)
2426 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2435 if (DstTy.
getSizeInBits() == 16 && DstBank == &AMDGPU::SGPRRegBank) {
2439 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2444 if (
MI.getOpcode() == AMDGPU::G_XOR &&
2465 if (DefRegs.
empty()) {
2472 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2478 if (Src0Regs.
empty())
2483 if (Src1Regs.
empty())
2490 auto Flags =
MI.getFlags();
2491 B.buildInstr(
Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]}, Flags);
2492 B.buildInstr(
Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]}, Flags);
2494 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2495 MI.eraseFromParent();
2498 case AMDGPU::G_ABS: {
2504 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2506 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2519 case AMDGPU::G_LSHR:
2520 case AMDGPU::G_ASHR:
2521 case AMDGPU::G_SMIN:
2522 case AMDGPU::G_SMAX:
2523 case AMDGPU::G_UMIN:
2524 case AMDGPU::G_UMAX: {
2526 LLT DstTy =
MRI.getType(DstReg);
2544 if (DstBank == &AMDGPU::VGPRRegBank)
2550 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2555 std::tie(WideSrcLo, WideSrcHi) =
2557 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2558 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2559 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2560 MI.eraseFromParent();
2569 std::tie(WideSrc0Lo, WideSrc0Hi)
2571 std::tie(WideSrc1Lo, WideSrc1Hi)
2573 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2574 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2575 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2576 MI.eraseFromParent();
2584 if (
Opc == AMDGPU::G_SHL ||
Opc == AMDGPU::G_LSHR ||
2585 Opc == AMDGPU::G_ASHR) {
2586 B.setInsertPt(*
MBB,
MI.getIterator());
2594 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2595 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2609 Register SrcReg0 =
MI.getOperand(1).getReg();
2610 Register SrcReg1 =
MI.getOperand(2).getReg();
2613 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2614 "that handles only 64-bit operands.");
2620 if (DstBank == &AMDGPU::SGPRRegBank) {
2621 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2622 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2623 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2624 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2630 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2631 "The destination operand should be in vector registers.");
2634 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2635 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2637 B.buildTrunc(Op0L, SrcReg0);
2640 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2641 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2643 B.buildTrunc(Op1L, SrcReg1);
2645 unsigned NewOpc =
Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2646 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2647 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2651 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2652 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2653 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2654 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2655 MI.eraseFromParent();
2658 case AMDGPU::G_SEXT_INREG: {
2660 if (SrcRegs.
empty())
2664 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2671 int Amt =
MI.getOperand(2).getImm();
2677 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2679 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2681 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2684 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2688 B.buildCopy(DstRegs[0], SrcRegs[0]);
2689 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2693 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2694 MI.eraseFromParent();
2697 case AMDGPU::G_CTPOP:
2698 case AMDGPU::G_BITREVERSE: {
2701 if (DstBank == &AMDGPU::SGPRRegBank)
2706 LLT Ty =
MRI.getType(SrcReg);
2710 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2719 case AMDGPU::G_AMDGPU_FFBH_U32:
2720 case AMDGPU::G_AMDGPU_FFBL_B32:
2721 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2722 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2725 if (DstBank == &AMDGPU::SGPRRegBank)
2730 LLT Ty =
MRI.getType(SrcReg);
2740 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2742 unsigned NewOpc =
Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2743 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2744 :
Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2745 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2747 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2748 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx]});
2749 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx ^ 1]});
2751 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF ||
Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2753 : AMDGPU::G_UADDSAT;
2754 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2756 B.buildUMin(DstReg,
X,
Y);
2757 MI.eraseFromParent();
2760 case AMDGPU::G_SEXT:
2761 case AMDGPU::G_ZEXT:
2762 case AMDGPU::G_ANYEXT: {
2764 LLT SrcTy =
MRI.getType(SrcReg);
2765 const bool Signed =
Opc == AMDGPU::G_SEXT;
2773 LLT DstTy =
MRI.getType(DstReg);
2775 SrcBank != &AMDGPU::SGPRRegBank &&
2776 SrcBank != &AMDGPU::VCCRegBank &&
2786 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2787 }
else if (
Opc == AMDGPU::G_ZEXT) {
2788 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2790 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2794 MRI.setRegBank(DstReg, *SrcBank);
2795 MI.eraseFromParent();
2805 if (SrcBank == &AMDGPU::VCCRegBank) {
2812 const bool UseSel64 = DstSize > 32 &&
2813 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2817 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2818 auto False =
B.buildConstant(SelType, 0);
2820 MRI.setRegBank(True.getReg(0), *DstBank);
2821 MRI.setRegBank(False.getReg(0), *DstBank);
2822 MRI.setRegBank(DstReg, *DstBank);
2825 B.buildSelect(DefRegs[0], SrcReg, True, False);
2827 }
else if (DstSize < 32) {
2828 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2829 MRI.setRegBank(Sel.getReg(0), *DstBank);
2830 B.buildTrunc(DstReg, Sel);
2832 B.buildSelect(DstReg, SrcReg, True, False);
2835 MI.eraseFromParent();
2841 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2850 LLT DstTy =
MRI.getType(DstReg);
2851 LLT SrcTy =
MRI.getType(SrcReg);
2853 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2865 unsigned ConstOffset;
2866 std::tie(BaseIdxReg, ConstOffset) =
2873 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2878 if (ShouldMoveIndexIntoLoop)
2879 MI.getOperand(2).setReg(BaseIdxReg);
2885 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2886 SrcBank == &AMDGPU::SGPRRegBank;
2887 if (DstRegs.
empty()) {
2892 if (NeedCopyToVGPR) {
2894 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2895 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2896 MI.getOperand(0).setReg(TmpReg);
2897 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2904 if (ShouldMoveIndexIntoLoop)
2914 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2915 auto One =
B.buildConstant(
S32, 1);
2926 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2927 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2929 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2930 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2932 MRI.setRegBank(DstReg, *DstBank);
2933 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2934 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2935 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2936 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2940 MI.eraseFromParent();
2946 B.setInstr(*Span.
begin());
2947 MI.eraseFromParent();
2951 if (NeedCopyToVGPR) {
2955 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2956 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2958 Extract0->getOperand(0).setReg(TmpReg0);
2959 Extract1->getOperand(0).setReg(TmpReg1);
2967 if (ShouldMoveIndexIntoLoop)
2972 case AMDGPU::G_INSERT_VECTOR_ELT: {
2976 LLT VecTy =
MRI.getType(DstReg);
2982 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2984 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2992 LLT InsTy =
MRI.getType(InsReg);
2996 unsigned ConstOffset;
2997 std::tie(BaseIdxReg, ConstOffset) =
3004 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
3009 if (ShouldMoveIndexIntoLoop)
3010 MI.getOperand(3).setReg(BaseIdxReg);
3013 if (InsRegs.
empty()) {
3017 if (ShouldMoveIndexIntoLoop) {
3029 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
3030 auto One =
B.buildConstant(
S32, 1);
3039 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
3040 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
3042 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
3043 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3052 MRI.setRegBank(InsReg, *InsSrcBank);
3053 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3054 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3055 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3056 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3057 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3058 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3063 B.setInsertPt(
B.getMBB(),
MI);
3064 B.buildBitcast(DstReg, InsHi);
3065 MI.eraseFromParent();
3069 B.setInstr(*Span.
begin());
3070 MI.eraseFromParent();
3081 B.buildBitcast(DstReg, InsHi);
3084 if (ShouldMoveIndexIntoLoop)
3089 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3090 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3091 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3092 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3093 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3094 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3095 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3096 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3097 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3098 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3099 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3100 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3101 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3102 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3103 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3104 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3105 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3106 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3107 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3108 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3109 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3110 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3115 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3116 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3117 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3118 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3119 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3120 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3121 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3122 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3123 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3124 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3125 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3126 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
3127 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3128 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3129 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3134 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3139 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3140 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3141 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3142 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3143 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3147 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
3151 case AMDGPU::G_INTRINSIC:
3152 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3154 case Intrinsic::amdgcn_readlane: {
3165 case Intrinsic::amdgcn_writelane: {
3175 case Intrinsic::amdgcn_interp_p1:
3176 case Intrinsic::amdgcn_interp_p2:
3177 case Intrinsic::amdgcn_interp_mov:
3178 case Intrinsic::amdgcn_interp_p1_f16:
3179 case Intrinsic::amdgcn_interp_p2_f16:
3180 case Intrinsic::amdgcn_lds_param_load: {
3188 case Intrinsic::amdgcn_interp_inreg_p10:
3189 case Intrinsic::amdgcn_interp_inreg_p2:
3190 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3191 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3192 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3193 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3194 case Intrinsic::amdgcn_permlane16_swap:
3195 case Intrinsic::amdgcn_permlane32_swap:
3198 case Intrinsic::amdgcn_permlane16:
3199 case Intrinsic::amdgcn_permlanex16: {
3207 case Intrinsic::amdgcn_permlane_bcast:
3208 case Intrinsic::amdgcn_permlane_up:
3209 case Intrinsic::amdgcn_permlane_down:
3210 case Intrinsic::amdgcn_permlane_xor:
3215 case Intrinsic::amdgcn_permlane_idx_gen: {
3219 case Intrinsic::amdgcn_sbfe:
3222 case Intrinsic::amdgcn_ubfe:
3225 case Intrinsic::amdgcn_inverse_ballot:
3226 case Intrinsic::amdgcn_s_bitreplicate:
3227 case Intrinsic::amdgcn_s_quadmask:
3228 case Intrinsic::amdgcn_s_wqm:
3232 case Intrinsic::amdgcn_ballot:
3238 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3239 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3240 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3241 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3242 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3252 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
3253 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
3254 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
3256 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
3257 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
3258 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
3259 unsigned LastRegOpIdx =
MI.getNumExplicitOperands() - 1 - NumMods;
3264 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3265 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3266 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3268 case Intrinsic::amdgcn_ds_ordered_add:
3269 case Intrinsic::amdgcn_ds_ordered_swap: {
3276 case Intrinsic::amdgcn_ds_gws_init:
3277 case Intrinsic::amdgcn_ds_gws_barrier:
3278 case Intrinsic::amdgcn_ds_gws_sema_br: {
3284 case Intrinsic::amdgcn_ds_gws_sema_v:
3285 case Intrinsic::amdgcn_ds_gws_sema_p:
3286 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3291 case Intrinsic::amdgcn_ds_append:
3292 case Intrinsic::amdgcn_ds_consume: {
3296 case Intrinsic::amdgcn_s_sendmsg:
3297 case Intrinsic::amdgcn_s_sendmsghalt: {
3302 case Intrinsic::amdgcn_s_setreg: {
3306 case Intrinsic::amdgcn_s_ttracedata:
3309 case Intrinsic::amdgcn_raw_buffer_load_lds:
3310 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3317 case Intrinsic::amdgcn_struct_buffer_load_lds:
3318 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3325 case Intrinsic::amdgcn_load_to_lds:
3326 case Intrinsic::amdgcn_global_load_lds: {
3331 case Intrinsic::amdgcn_lds_direct_load: {
3337 case Intrinsic::amdgcn_exp_row:
3341 case Intrinsic::amdgcn_s_sleep_var:
3345 case Intrinsic::amdgcn_s_barrier_join:
3348 case Intrinsic::amdgcn_s_barrier_init:
3349 case Intrinsic::amdgcn_s_barrier_signal_var:
3353 case Intrinsic::amdgcn_s_get_barrier_state:
3354 case Intrinsic::amdgcn_s_get_named_barrier_state: {
3358 case Intrinsic::amdgcn_s_prefetch_data: {
3360 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3365 MI.eraseFromParent();
3368 case Intrinsic::amdgcn_tensor_load_to_lds:
3369 case Intrinsic::amdgcn_tensor_store_from_lds: {
3376 case Intrinsic::amdgcn_tensor_load_to_lds_d2:
3377 case Intrinsic::amdgcn_tensor_store_from_lds_d2: {
3388 if (RSrcIntrin->IsImage) {
3399 case AMDGPU::G_SI_CALL: {
3410 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3411 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3417 unsigned NonCopyInstrsLen = 0;
3423 while (Start->getOpcode() != FrameSetupOpcode) {
3425 bool IsCopy =
false;
3426 if (Start->getOpcode() == AMDGPU::COPY) {
3427 auto &Dst = Start->getOperand(0);
3430 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3435 auto &Src = Start->getOperand(1);
3438 IsCopy =
Info->getScratchRSrcReg() == Reg;
3446 NonCopyInstrsLen = NonCopyInstrs.
size();
3451 NonCopyInstrs.
resize(NonCopyInstrsLen);
3453 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3459 NonCopyInstrs.
clear();
3460 NonCopyInstrsLen = 0;
3463 while (
End->getOpcode() != FrameDestroyOpcode) {
3465 bool IsCopy =
false;
3466 if (
End->getOpcode() == AMDGPU::COPY) {
3467 auto &Src =
End->getOperand(1);
3470 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3476 NonCopyInstrsLen = NonCopyInstrs.
size();
3481 NonCopyInstrs.
resize(NonCopyInstrsLen);
3485 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3490 B.setInsertPt(
B.getMBB(), Start);
3494 case AMDGPU::G_LOAD:
3495 case AMDGPU::G_ZEXTLOAD:
3496 case AMDGPU::G_SEXTLOAD: {
3501 case AMDGPU::G_DYN_STACKALLOC:
3504 case AMDGPU::G_STACKRESTORE: {
3509 case AMDGPU::G_SBFX:
3512 case AMDGPU::G_UBFX:
3515 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3516 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3519 case AMDGPU::G_PREFETCH: {
3521 MI.eraseFromParent();
3526 if (PtrBank == AMDGPU::VGPRRegBankID &&
3529 MI.eraseFromParent();
3532 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3537 !
MI.getOperand(3).getImm() ))) {
3538 MI.eraseFromParent();
3556 if (RB0 == AMDGPU::InvalidRegBankID)
3558 if (RB1 == AMDGPU::InvalidRegBankID)
3561 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3562 return AMDGPU::SGPRRegBankID;
3564 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3565 return AMDGPU::AGPRRegBankID;
3567 return AMDGPU::VGPRRegBankID;
3571 if (RB0 == AMDGPU::InvalidRegBankID)
3573 if (RB1 == AMDGPU::InvalidRegBankID)
3579 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3580 return AMDGPU::VCCRegBankID;
3588 unsigned RegBank = AMDGPU::InvalidRegBankID;
3596 if (RegBank == AMDGPU::VGPRRegBankID)
3612 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3625 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3631 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3634 MI.getNumOperands());
3647 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3653 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3654 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3658 MI.getNumOperands());
3667 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
3673 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3677 MI.getNumOperands());
3683 int RsrcIdx)
const {
3686 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3688 const int NumOps =
MI.getNumOperands();
3693 for (
int I = 0;
I != NumOps; ++
I) {
3694 if (!
MI.getOperand(
I).isReg())
3708 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3713 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3716 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3727 LLT PtrTy =
MRI.getType(PtrReg);
3731 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3736 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3747 LLT PtrTy =
MRI.getType(PtrReg);
3759 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3760 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3762 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3767 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3769 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3772 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3773 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3776 OpdsMapping[0] = ValMapping;
3777 OpdsMapping[1] = PtrMapping;
3802 return AMDGPU::getValueMapping(Bank,
Size);
3810 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3818 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3835 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3843 assert(SrcBank &&
"src bank should have been assigned already");
3850 DstBank = &AMDGPU::VCCRegBank;
3852 DstBank = &AMDGPU::VCCRegBank;
3858 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3863 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3865 OpdsMapping[0] = &ValMap;
3866 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3867 OpdsMapping[1] = &ValMap;
3874 if (
MI.isRegSequence()) {
3877 unsigned BankID = AMDGPU::SGPRRegBankID;
3879 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3883 if (OpBank != AMDGPU::SGPRRegBankID) {
3884 BankID = AMDGPU::VGPRRegBankID;
3900 if (
auto *
PHI = dyn_cast<GPhi>(&
MI)) {
3901 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3906 ResultBank = DstBank->
getID();
3908 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3913 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3914 ResultBank = AMDGPU::VGPRRegBankID;
3919 unsigned OpBank = Bank->
getID();
3923 assert(ResultBank != AMDGPU::InvalidRegBankID);
3925 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3940 switch (
MI.getOpcode()) {
3947 case AMDGPU::G_MUL: {
3948 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3953 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3954 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3955 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3957 TargetBankID = DstBank->
getID();
3958 if (DstBank == &AMDGPU::VCCRegBank) {
3959 TargetBankID = AMDGPU::VCCRegBankID;
3960 BankLHS = AMDGPU::VCCRegBankID;
3961 BankRHS = AMDGPU::VCCRegBankID;
3964 AMDGPU::SGPRRegBankID);
3966 AMDGPU::SGPRRegBankID);
3970 AMDGPU::VCCRegBankID);
3972 AMDGPU::VCCRegBankID);
3975 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3976 TargetBankID = AMDGPU::VGPRRegBankID;
3977 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3978 TargetBankID = AMDGPU::VCCRegBankID;
3979 BankLHS = AMDGPU::VCCRegBankID;
3980 BankRHS = AMDGPU::VCCRegBankID;
3981 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3982 TargetBankID = AMDGPU::SGPRRegBankID;
3986 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3987 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3988 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3995 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3996 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3999 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4002 getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
4004 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
4007 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
4015 case AMDGPU::G_PTR_ADD:
4016 case AMDGPU::G_PTRMASK:
4020 case AMDGPU::G_LSHR:
4021 case AMDGPU::G_ASHR:
4022 case AMDGPU::G_UADDO:
4023 case AMDGPU::G_USUBO:
4024 case AMDGPU::G_UADDE:
4025 case AMDGPU::G_SADDE:
4026 case AMDGPU::G_USUBE:
4027 case AMDGPU::G_SSUBE:
4029 case AMDGPU::G_SHUFFLE_VECTOR:
4030 case AMDGPU::G_SBFX:
4031 case AMDGPU::G_UBFX:
4032 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
4033 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
4037 case AMDGPU::G_SMIN:
4038 case AMDGPU::G_SMAX:
4039 case AMDGPU::G_UMIN:
4040 case AMDGPU::G_UMAX:
4043 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 64 &&
4049 case AMDGPU::G_FADD:
4050 case AMDGPU::G_FSUB:
4051 case AMDGPU::G_FMUL:
4053 case AMDGPU::G_FFLOOR:
4054 case AMDGPU::G_FCEIL:
4055 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
4056 case AMDGPU::G_FMINNUM:
4057 case AMDGPU::G_FMAXNUM:
4058 case AMDGPU::G_FMINIMUM:
4059 case AMDGPU::G_FMAXIMUM:
4060 case AMDGPU::G_FMINIMUMNUM:
4061 case AMDGPU::G_FMAXIMUMNUM:
4062 case AMDGPU::G_INTRINSIC_TRUNC:
4063 case AMDGPU::G_STRICT_FADD:
4064 case AMDGPU::G_STRICT_FSUB:
4065 case AMDGPU::G_STRICT_FMUL:
4066 case AMDGPU::G_STRICT_FMA: {
4067 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
4074 case AMDGPU::G_FPTOSI:
4075 case AMDGPU::G_FPTOUI:
4076 case AMDGPU::G_SITOFP:
4077 case AMDGPU::G_UITOFP: {
4078 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4079 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4085 case AMDGPU::G_FPTRUNC:
4086 case AMDGPU::G_FPEXT: {
4087 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4088 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4094 case AMDGPU::G_FSQRT:
4095 case AMDGPU::G_FEXP2:
4096 case AMDGPU::G_FLOG2: {
4097 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4103 case AMDGPU::G_SADDSAT:
4104 case AMDGPU::G_SSUBSAT:
4105 case AMDGPU::G_UADDSAT:
4106 case AMDGPU::G_USUBSAT:
4107 case AMDGPU::G_FMAD:
4108 case AMDGPU::G_FLDEXP:
4109 case AMDGPU::G_FMINNUM_IEEE:
4110 case AMDGPU::G_FMAXNUM_IEEE:
4111 case AMDGPU::G_FCANONICALIZE:
4112 case AMDGPU::G_STRICT_FLDEXP:
4113 case AMDGPU::G_BSWAP:
4114 case AMDGPU::G_FSHR:
4115 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
4116 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
4117 case AMDGPU::G_AMDGPU_RCP_IFLAG:
4118 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4119 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4120 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4121 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4122 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4123 case AMDGPU::G_AMDGPU_SMED3:
4124 case AMDGPU::G_AMDGPU_FMED3:
4126 case AMDGPU::G_UMULH:
4127 case AMDGPU::G_SMULH: {
4132 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4133 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4142 bool AllSalu =
true;
4143 bool MulSalu =
true;
4144 for (
unsigned i = 0; i < 5; ++i) {
4147 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4149 if (i == 2 || i == 3) {
4167 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4168 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4169 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4170 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4171 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4174 case AMDGPU::G_IMPLICIT_DEF: {
4175 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4176 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4179 case AMDGPU::G_FCONSTANT:
4180 case AMDGPU::G_CONSTANT:
4181 case AMDGPU::G_GLOBAL_VALUE:
4182 case AMDGPU::G_FRAME_INDEX:
4183 case AMDGPU::G_BLOCK_ADDR:
4184 case AMDGPU::G_READSTEADYCOUNTER:
4185 case AMDGPU::G_READCYCLECOUNTER: {
4186 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4187 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4190 case AMDGPU::G_DYN_STACKALLOC: {
4192 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4194 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4197 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4202 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4203 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4206 case AMDGPU::G_INSERT: {
4211 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4212 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4213 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4214 OpdsMapping[3] =
nullptr;
4217 case AMDGPU::G_EXTRACT: {
4221 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4222 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4223 OpdsMapping[2] =
nullptr;
4226 case AMDGPU::G_BUILD_VECTOR:
4227 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4228 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4231 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4234 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4236 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4237 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4238 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4244 case AMDGPU::G_MERGE_VALUES:
4245 case AMDGPU::G_CONCAT_VECTORS: {
4247 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4248 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4250 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4252 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4253 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4256 case AMDGPU::G_BITREVERSE:
4257 case AMDGPU::G_BITCAST:
4258 case AMDGPU::G_INTTOPTR:
4259 case AMDGPU::G_PTRTOINT:
4260 case AMDGPU::G_FABS:
4261 case AMDGPU::G_FNEG: {
4262 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4264 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4267 case AMDGPU::G_AMDGPU_FFBH_U32:
4268 case AMDGPU::G_AMDGPU_FFBL_B32:
4269 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4270 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4271 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4273 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4274 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4277 case AMDGPU::G_CTPOP: {
4278 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4280 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4285 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4288 case AMDGPU::G_TRUNC: {
4294 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4295 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4298 case AMDGPU::G_ZEXT:
4299 case AMDGPU::G_SEXT:
4300 case AMDGPU::G_ANYEXT:
4301 case AMDGPU::G_SEXT_INREG: {
4310 switch (SrcBank->
getID()) {
4311 case AMDGPU::SGPRRegBankID:
4312 DstBank = AMDGPU::SGPRRegBankID;
4315 DstBank = AMDGPU::VGPRRegBankID;
4321 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4322 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4326 case AMDGPU::G_IS_FPCLASS: {
4328 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4329 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4330 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4331 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4334 case AMDGPU::G_STORE: {
4336 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4341 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4342 OpdsMapping[0] = ValMapping;
4346 case AMDGPU::G_ICMP:
4347 case AMDGPU::G_FCMP: {
4348 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4353 AMDGPU::SGPRRegBankID);
4357 auto canUseSCCICMP = [&]() {
4360 return Size == 32 ||
4365 auto canUseSCCFCMP = [&]() {
4369 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4370 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4371 Op2Bank == AMDGPU::SGPRRegBankID &&
4372 Op3Bank == AMDGPU::SGPRRegBankID &&
4373 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4375 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4376 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4380 const unsigned ResultSize = 1;
4382 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4383 OpdsMapping[1] =
nullptr;
4384 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4385 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4388 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4391 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4392 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4393 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4395 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4397 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4398 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4401 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4404 case AMDGPU::G_INSERT_VECTOR_ELT: {
4406 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4408 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4409 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4410 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4414 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4415 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4419 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4420 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4423 assert(InsertSize == 32 || InsertSize == 64);
4424 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4428 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4431 case AMDGPU::G_UNMERGE_VALUES: {
4436 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4438 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4442 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4443 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4444 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4445 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4446 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4447 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4448 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4449 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4450 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4451 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4452 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4453 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4454 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4455 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4456 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4457 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4458 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4459 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4460 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4461 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4462 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4463 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4482 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4483 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4484 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4485 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4486 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4487 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4488 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4489 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4490 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4491 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4492 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4493 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4494 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4495 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4496 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4519 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4545 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4546 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4547 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4548 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4549 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4557 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4558 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4559 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4561 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4562 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4565 case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
4569 case AMDGPU::G_INTRINSIC:
4570 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4574 case Intrinsic::amdgcn_div_fmas:
4575 case Intrinsic::amdgcn_div_fixup:
4576 case Intrinsic::amdgcn_trig_preop:
4577 case Intrinsic::amdgcn_sin:
4578 case Intrinsic::amdgcn_cos:
4579 case Intrinsic::amdgcn_log_clamp:
4580 case Intrinsic::amdgcn_rcp_legacy:
4581 case Intrinsic::amdgcn_rsq_legacy:
4582 case Intrinsic::amdgcn_rsq_clamp:
4583 case Intrinsic::amdgcn_tanh:
4584 case Intrinsic::amdgcn_fmul_legacy:
4585 case Intrinsic::amdgcn_fma_legacy:
4586 case Intrinsic::amdgcn_frexp_mant:
4587 case Intrinsic::amdgcn_frexp_exp:
4588 case Intrinsic::amdgcn_fract:
4589 case Intrinsic::amdgcn_cvt_pknorm_i16:
4590 case Intrinsic::amdgcn_cvt_pknorm_u16:
4591 case Intrinsic::amdgcn_cvt_pk_i16:
4592 case Intrinsic::amdgcn_cvt_pk_u16:
4593 case Intrinsic::amdgcn_cvt_sr_pk_f16_f32:
4594 case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32:
4595 case Intrinsic::amdgcn_cvt_pk_f16_fp8:
4596 case Intrinsic::amdgcn_cvt_pk_f16_bf8:
4597 case Intrinsic::amdgcn_cvt_pk_fp8_f16:
4598 case Intrinsic::amdgcn_cvt_pk_bf8_f16:
4599 case Intrinsic::amdgcn_cvt_sr_fp8_f16:
4600 case Intrinsic::amdgcn_cvt_sr_bf8_f16:
4601 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp8:
4602 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp8:
4603 case Intrinsic::amdgcn_cvt_scale_pk8_f16_bf8:
4604 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_bf8:
4605 case Intrinsic::amdgcn_cvt_scale_pk8_f16_fp4:
4606 case Intrinsic::amdgcn_cvt_scale_pk8_bf16_fp4:
4607 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:
4608 case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:
4609 case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:
4610 case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:
4611 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:
4612 case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:
4613 case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:
4614 case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:
4615 case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:
4616 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:
4617 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:
4618 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:
4619 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16:
4620 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32:
4621 case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32:
4622 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32:
4623 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16:
4624 case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16:
4625 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f32:
4626 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f32:
4627 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f16:
4628 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f16:
4629 case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_bf16:
4630 case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_bf16:
4631 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_bf16:
4632 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_bf16:
4633 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f16:
4634 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f16:
4635 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f32:
4636 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f32:
4637 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f32:
4638 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f16:
4639 case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_bf16:
4640 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f32:
4641 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f32:
4642 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f16:
4643 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f16:
4644 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_bf16:
4645 case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:
4646 case Intrinsic::amdgcn_sat_pk4_i4_i8:
4647 case Intrinsic::amdgcn_sat_pk4_u4_u8:
4648 case Intrinsic::amdgcn_fmed3:
4649 case Intrinsic::amdgcn_cubeid:
4650 case Intrinsic::amdgcn_cubema:
4651 case Intrinsic::amdgcn_cubesc:
4652 case Intrinsic::amdgcn_cubetc:
4653 case Intrinsic::amdgcn_sffbh:
4654 case Intrinsic::amdgcn_fmad_ftz:
4655 case Intrinsic::amdgcn_mbcnt_lo:
4656 case Intrinsic::amdgcn_mbcnt_hi:
4657 case Intrinsic::amdgcn_mul_u24:
4658 case Intrinsic::amdgcn_mul_i24:
4659 case Intrinsic::amdgcn_mulhi_u24:
4660 case Intrinsic::amdgcn_mulhi_i24:
4661 case Intrinsic::amdgcn_lerp:
4662 case Intrinsic::amdgcn_sad_u8:
4663 case Intrinsic::amdgcn_msad_u8:
4664 case Intrinsic::amdgcn_sad_hi_u8:
4665 case Intrinsic::amdgcn_sad_u16:
4666 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4667 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4668 case Intrinsic::amdgcn_mqsad_u32_u8:
4669 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4670 case Intrinsic::amdgcn_alignbyte:
4671 case Intrinsic::amdgcn_perm:
4672 case Intrinsic::amdgcn_prng_b32:
4673 case Intrinsic::amdgcn_fdot2:
4674 case Intrinsic::amdgcn_sdot2:
4675 case Intrinsic::amdgcn_udot2:
4676 case Intrinsic::amdgcn_sdot4:
4677 case Intrinsic::amdgcn_udot4:
4678 case Intrinsic::amdgcn_sdot8:
4679 case Intrinsic::amdgcn_udot8:
4680 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4681 case Intrinsic::amdgcn_fdot2_f16_f16:
4682 case Intrinsic::amdgcn_fdot2_f32_bf16:
4683 case Intrinsic::amdgcn_fdot2c_f32_bf16:
4684 case Intrinsic::amdgcn_sudot4:
4685 case Intrinsic::amdgcn_sudot8:
4686 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4687 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4688 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4689 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4690 case Intrinsic::amdgcn_cvt_f32_fp8:
4691 case Intrinsic::amdgcn_cvt_f32_fp8_e5m3:
4692 case Intrinsic::amdgcn_cvt_f32_bf8:
4693 case Intrinsic::amdgcn_cvt_off_f32_i4:
4694 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4695 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4696 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4697 case Intrinsic::amdgcn_cvt_pk_fp8_f32_e5m3:
4698 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4699 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4700 case Intrinsic::amdgcn_cvt_sr_fp8_f32_e5m3:
4701 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4702 case Intrinsic::amdgcn_cvt_sr_bf16_f32:
4703 case Intrinsic::amdgcn_cvt_sr_f16_f32:
4704 case Intrinsic::amdgcn_cvt_f16_fp8:
4705 case Intrinsic::amdgcn_cvt_f16_bf8:
4706 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16:
4707 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16:
4708 case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16:
4709 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_bf16:
4710 case Intrinsic::amdgcn_cvt_scalef32_f16_fp8:
4711 case Intrinsic::amdgcn_cvt_scalef32_f16_bf8:
4712 case Intrinsic::amdgcn_cvt_scalef32_f32_fp8:
4713 case Intrinsic::amdgcn_cvt_scalef32_f32_bf8:
4714 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f32:
4715 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f32:
4716 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp8:
4717 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_bf8:
4718 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_f16:
4719 case Intrinsic::amdgcn_cvt_scalef32_pk_fp8_bf16:
4720 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_f16:
4721 case Intrinsic::amdgcn_cvt_scalef32_pk_bf8_bf16:
4722 case Intrinsic::amdgcn_cvt_scalef32_pk_f32_fp4:
4723 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f32:
4724 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp4:
4725 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp4:
4726 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_fp6:
4727 case Intrinsic::amdgcn_cvt_scalef32_pk32_f32_bf6:
4728 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_bf6:
4729 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_bf6:
4730 case Intrinsic::amdgcn_cvt_scalef32_pk32_f16_fp6:
4731 case Intrinsic::amdgcn_cvt_scalef32_pk32_bf16_fp6:
4732 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_bf8:
4733 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_bf8:
4734 case Intrinsic::amdgcn_cvt_scalef32_pk_f16_fp8:
4735 case Intrinsic::amdgcn_cvt_scalef32_pk_bf16_fp8:
4736 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_f16:
4737 case Intrinsic::amdgcn_cvt_scalef32_pk_fp4_bf16:
4738 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f16:
4739 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_bf16:
4740 case Intrinsic::amdgcn_cvt_scalef32_sr_pk_fp4_f32:
4741 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_bf16:
4742 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f16:
4743 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_bf6_f32:
4744 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_bf16:
4745 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f16:
4746 case Intrinsic::amdgcn_cvt_scalef32_sr_pk32_fp6_f32:
4747 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_bf16:
4748 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f16:
4749 case Intrinsic::amdgcn_cvt_scalef32_sr_bf8_f32:
4750 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_bf16:
4751 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f16:
4752 case Intrinsic::amdgcn_cvt_scalef32_sr_fp8_f32:
4753 case Intrinsic::amdgcn_ashr_pk_i8_i32:
4754 case Intrinsic::amdgcn_ashr_pk_u8_i32:
4755 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_fp6_f32:
4756 case Intrinsic::amdgcn_cvt_scalef32_2xpk16_bf6_f32:
4757 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4758 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4759 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4760 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4761 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4762 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4763 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4764 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4765 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4766 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4767 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4768 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4769 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4770 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4771 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4772 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4773 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4774 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4775 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4776 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4777 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4778 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4779 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4780 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4781 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4782 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4783 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4784 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4785 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4786 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
4787 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8:
4788 case Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8:
4789 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8:
4790 case Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8:
4791 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8:
4792 case Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8:
4793 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8:
4794 case Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8:
4795 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8:
4796 case Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8:
4797 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8:
4798 case Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8:
4799 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8:
4800 case Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8:
4801 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8:
4802 case Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8:
4803 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
4804 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
4805 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
4806 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4:
4807 case Intrinsic::amdgcn_wmma_f32_32x16x128_f4:
4808 case Intrinsic::amdgcn_wmma_scale_f32_32x16x128_f4:
4809 case Intrinsic::amdgcn_wmma_scale16_f32_32x16x128_f4:
4810 case Intrinsic::amdgcn_swmmac_f16_16x16x64_f16:
4811 case Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16:
4812 case Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16:
4813 case Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16:
4814 case Intrinsic::amdgcn_swmmac_f32_16x16x64_f16:
4815 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8:
4816 case Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8:
4817 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8:
4818 case Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8:
4819 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8:
4820 case Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8:
4821 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8:
4822 case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8:
4823 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
4824 case Intrinsic::amdgcn_perm_pk16_b4_u4:
4825 case Intrinsic::amdgcn_perm_pk16_b6_u4:
4826 case Intrinsic::amdgcn_perm_pk16_b8_u4:
4828 case Intrinsic::amdgcn_log:
4829 case Intrinsic::amdgcn_exp2:
4830 case Intrinsic::amdgcn_rcp:
4831 case Intrinsic::amdgcn_rsq:
4832 case Intrinsic::amdgcn_sqrt: {
4833 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4839 case Intrinsic::amdgcn_sbfe:
4840 case Intrinsic::amdgcn_ubfe:
4844 case Intrinsic::amdgcn_ds_swizzle:
4845 case Intrinsic::amdgcn_ds_permute:
4846 case Intrinsic::amdgcn_ds_bpermute:
4847 case Intrinsic::amdgcn_update_dpp:
4848 case Intrinsic::amdgcn_mov_dpp8:
4849 case Intrinsic::amdgcn_mov_dpp:
4850 case Intrinsic::amdgcn_strict_wwm:
4851 case Intrinsic::amdgcn_wwm:
4852 case Intrinsic::amdgcn_strict_wqm:
4853 case Intrinsic::amdgcn_wqm:
4854 case Intrinsic::amdgcn_softwqm:
4855 case Intrinsic::amdgcn_set_inactive:
4856 case Intrinsic::amdgcn_set_inactive_chain_arg:
4857 case Intrinsic::amdgcn_permlane64:
4858 case Intrinsic::amdgcn_ds_bpermute_fi_b32:
4860 case Intrinsic::amdgcn_cvt_pkrtz:
4864 case Intrinsic::amdgcn_kernarg_segment_ptr:
4865 case Intrinsic::amdgcn_s_getpc:
4866 case Intrinsic::amdgcn_groupstaticsize:
4867 case Intrinsic::amdgcn_reloc_constant:
4868 case Intrinsic::returnaddress: {
4869 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4870 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4873 case Intrinsic::amdgcn_wqm_vote: {
4874 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4875 OpdsMapping[0] = OpdsMapping[2]
4876 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4879 case Intrinsic::amdgcn_ps_live: {
4880 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4883 case Intrinsic::amdgcn_div_scale: {
4884 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4885 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4886 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4887 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4889 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4890 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4891 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4894 case Intrinsic::amdgcn_class: {
4895 Register Src0Reg =
MI.getOperand(2).getReg();
4896 Register Src1Reg =
MI.getOperand(3).getReg();
4897 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4898 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4899 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4900 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4901 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4902 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4905 case Intrinsic::amdgcn_icmp:
4906 case Intrinsic::amdgcn_fcmp: {
4907 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4909 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4910 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4911 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4912 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4915 case Intrinsic::amdgcn_readlane: {
4918 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4920 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4923 case Intrinsic::amdgcn_readfirstlane: {
4924 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4925 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4926 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4927 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4930 case Intrinsic::amdgcn_writelane: {
4931 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4933 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4936 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4938 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4942 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4943 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4944 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4947 case Intrinsic::amdgcn_if_break: {
4949 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4950 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4951 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4954 case Intrinsic::amdgcn_permlane16:
4955 case Intrinsic::amdgcn_permlanex16: {
4957 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4958 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4959 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4964 case Intrinsic::amdgcn_permlane_bcast:
4965 case Intrinsic::amdgcn_permlane_up:
4966 case Intrinsic::amdgcn_permlane_down:
4967 case Intrinsic::amdgcn_permlane_xor: {
4969 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4970 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4975 case Intrinsic::amdgcn_permlane_idx_gen: {
4977 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4978 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4982 case Intrinsic::amdgcn_permlane16_var:
4983 case Intrinsic::amdgcn_permlanex16_var: {
4985 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4986 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4987 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4988 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4991 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4992 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4993 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4994 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4995 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4996 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4997 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4998 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4999 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
5000 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
5001 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
5002 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
5003 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
5004 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
5005 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
5006 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
5007 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
5008 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
5009 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
5010 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
5011 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
5012 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
5013 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
5014 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
5015 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
5016 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
5017 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
5018 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
5019 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
5020 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
5021 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
5022 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
5023 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
5024 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
5025 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
5026 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
5027 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
5028 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
5029 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8:
5030 case Intrinsic::amdgcn_mfma_f32_16x16x32_f16:
5031 case Intrinsic::amdgcn_mfma_f32_32x32x16_f16:
5032 case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
5033 case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
5034 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
5043 Info->mayNeedAGPRs()
5049 Info->mayNeedAGPRs()
5054 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
5055 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
5058 Info->mayNeedAGPRs()
5065 Info->mayNeedAGPRs()
5073 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
5074 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
5075 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
5076 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
5077 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
5078 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
5079 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
5080 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
5081 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
5082 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
5083 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
5084 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
5085 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
5086 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
5087 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
5088 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
5089 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
5090 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
5091 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
5092 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
5093 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
5094 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
5095 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
5096 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
5097 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
5098 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
5099 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
5100 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
5109 case Intrinsic::amdgcn_interp_p1:
5110 case Intrinsic::amdgcn_interp_p2:
5111 case Intrinsic::amdgcn_interp_mov:
5112 case Intrinsic::amdgcn_interp_p1_f16:
5113 case Intrinsic::amdgcn_interp_p2_f16:
5114 case Intrinsic::amdgcn_lds_param_load: {
5115 const int M0Idx =
MI.getNumOperands() - 1;
5116 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5118 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5120 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5121 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5122 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5126 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5129 case Intrinsic::amdgcn_interp_inreg_p10:
5130 case Intrinsic::amdgcn_interp_inreg_p2:
5131 case Intrinsic::amdgcn_interp_inreg_p10_f16:
5132 case Intrinsic::amdgcn_interp_inreg_p2_f16:
5133 case Intrinsic::amdgcn_interp_p10_rtz_f16:
5134 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
5135 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5136 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5137 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5138 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5139 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5142 case Intrinsic::amdgcn_permlane16_swap:
5143 case Intrinsic::amdgcn_permlane32_swap: {
5144 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5145 OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] =
5146 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5149 case Intrinsic::amdgcn_ballot: {
5150 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5151 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5152 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5153 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
5156 case Intrinsic::amdgcn_inverse_ballot: {
5158 Register MaskReg =
MI.getOperand(2).getReg();
5159 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
5160 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
5161 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5162 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5165 case Intrinsic::amdgcn_bitop3: {
5167 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5168 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5169 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5170 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5173 case Intrinsic::amdgcn_s_quadmask:
5174 case Intrinsic::amdgcn_s_wqm: {
5175 Register MaskReg =
MI.getOperand(2).getReg();
5176 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
5177 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
5178 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
5179 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
5182 case Intrinsic::amdgcn_wave_reduce_add:
5183 case Intrinsic::amdgcn_wave_reduce_sub:
5184 case Intrinsic::amdgcn_wave_reduce_min:
5185 case Intrinsic::amdgcn_wave_reduce_umin:
5186 case Intrinsic::amdgcn_wave_reduce_max:
5187 case Intrinsic::amdgcn_wave_reduce_umax:
5188 case Intrinsic::amdgcn_wave_reduce_and:
5189 case Intrinsic::amdgcn_wave_reduce_or:
5190 case Intrinsic::amdgcn_wave_reduce_xor: {
5191 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5192 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
5193 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5196 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
5199 case Intrinsic::amdgcn_s_bitreplicate:
5200 Register MaskReg =
MI.getOperand(2).getReg();
5201 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
5202 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5203 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
5207 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
5208 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
5209 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
5210 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
5211 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
5214 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
5221 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
5222 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
5223 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: {
5225 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY ||
5226 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY;
5227 unsigned NumMods = IsDualOrBVH8 ? 0 : 1;
5228 unsigned LastRegOpIdx =
MI.getNumExplicitOperands() - 1 - NumMods;
5229 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5230 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5232 OpdsMapping[1] = AMDGPU::getValueMapping(
5233 AMDGPU::VGPRRegBankID,
5234 MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits());
5235 OpdsMapping[2] = AMDGPU::getValueMapping(
5236 AMDGPU::VGPRRegBankID,
5237 MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
5239 OpdsMapping[LastRegOpIdx] =
5241 if (LastRegOpIdx == 3) {
5243 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
5246 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5249 unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2;
5250 for (
unsigned I = FirstSrcOpIdx;
I < LastRegOpIdx; ++
I) {
5251 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
5252 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
5257 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
5258 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
5259 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
5261 case Intrinsic::amdgcn_s_getreg:
5262 case Intrinsic::amdgcn_s_memtime:
5263 case Intrinsic::amdgcn_s_memrealtime:
5264 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
5265 case Intrinsic::amdgcn_s_sendmsg_rtn: {
5266 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5267 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5270 case Intrinsic::amdgcn_global_atomic_csub:
5271 case Intrinsic::amdgcn_global_atomic_fmin_num:
5272 case Intrinsic::amdgcn_global_atomic_fmax_num:
5273 case Intrinsic::amdgcn_flat_atomic_fmin_num:
5274 case Intrinsic::amdgcn_flat_atomic_fmax_num:
5275 case Intrinsic::amdgcn_atomic_cond_sub_u32:
5276 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
5277 case Intrinsic::amdgcn_global_load_tr_b64:
5278 case Intrinsic::amdgcn_global_load_tr_b128:
5279 case Intrinsic::amdgcn_global_load_tr4_b64:
5280 case Intrinsic::amdgcn_global_load_tr6_b96:
5281 case Intrinsic::amdgcn_ds_load_tr8_b64:
5282 case Intrinsic::amdgcn_ds_load_tr16_b128:
5283 case Intrinsic::amdgcn_ds_load_tr4_b64:
5284 case Intrinsic::amdgcn_ds_load_tr6_b96:
5285 case Intrinsic::amdgcn_flat_load_monitor_b32:
5286 case Intrinsic::amdgcn_flat_load_monitor_b64:
5287 case Intrinsic::amdgcn_flat_load_monitor_b128:
5288 case Intrinsic::amdgcn_global_load_monitor_b32:
5289 case Intrinsic::amdgcn_global_load_monitor_b64:
5290 case Intrinsic::amdgcn_global_load_monitor_b128:
5291 case Intrinsic::amdgcn_ds_read_tr4_b64:
5292 case Intrinsic::amdgcn_ds_read_tr6_b96:
5293 case Intrinsic::amdgcn_ds_read_tr8_b64:
5294 case Intrinsic::amdgcn_ds_read_tr16_b64:
5295 case Intrinsic::amdgcn_ds_atomic_async_barrier_arrive_b64:
5296 case Intrinsic::amdgcn_ds_atomic_barrier_arrive_rtn_b64:
5298 case Intrinsic::amdgcn_ds_ordered_add:
5299 case Intrinsic::amdgcn_ds_ordered_swap: {
5300 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5301 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5303 AMDGPU::SGPRRegBankID);
5304 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
5305 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5308 case Intrinsic::amdgcn_ds_append:
5309 case Intrinsic::amdgcn_ds_consume: {
5310 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5311 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5315 case Intrinsic::amdgcn_exp_compr:
5316 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5317 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5319 case Intrinsic::amdgcn_exp:
5321 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5322 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5323 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5324 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5326 case Intrinsic::amdgcn_exp_row:
5327 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5328 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5329 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5330 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5333 case Intrinsic::amdgcn_s_sendmsg:
5334 case Intrinsic::amdgcn_s_sendmsghalt: {
5337 AMDGPU::SGPRRegBankID);
5338 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5341 case Intrinsic::amdgcn_s_setreg: {
5344 AMDGPU::SGPRRegBankID);
5345 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5348 case Intrinsic::amdgcn_s_ttracedata: {
5352 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5355 case Intrinsic::amdgcn_end_cf: {
5357 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5360 case Intrinsic::amdgcn_else: {
5362 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5363 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5364 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
5367 case Intrinsic::amdgcn_init_whole_wave:
5368 case Intrinsic::amdgcn_live_mask: {
5369 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5372 case Intrinsic::amdgcn_wqm_demote:
5373 case Intrinsic::amdgcn_kill: {
5374 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5377 case Intrinsic::amdgcn_raw_buffer_load:
5378 case Intrinsic::amdgcn_raw_ptr_buffer_load:
5379 case Intrinsic::amdgcn_raw_atomic_buffer_load:
5380 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
5381 case Intrinsic::amdgcn_raw_tbuffer_load:
5382 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
5391 case Intrinsic::amdgcn_raw_buffer_load_lds:
5392 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
5399 case Intrinsic::amdgcn_raw_buffer_store:
5400 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5401 case Intrinsic::amdgcn_raw_buffer_store_format:
5402 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5403 case Intrinsic::amdgcn_raw_tbuffer_store:
5404 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5411 case Intrinsic::amdgcn_struct_buffer_load:
5412 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5413 case Intrinsic::amdgcn_struct_tbuffer_load:
5414 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5415 case Intrinsic::amdgcn_struct_atomic_buffer_load:
5416 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
5424 case Intrinsic::amdgcn_struct_buffer_load_lds:
5425 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5433 case Intrinsic::amdgcn_struct_buffer_store:
5434 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5435 case Intrinsic::amdgcn_struct_tbuffer_store:
5436 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5444 case Intrinsic::amdgcn_init_exec_from_input: {
5446 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5449 case Intrinsic::amdgcn_ds_gws_init:
5450 case Intrinsic::amdgcn_ds_gws_barrier:
5451 case Intrinsic::amdgcn_ds_gws_sema_br: {
5452 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5456 AMDGPU::SGPRRegBankID);
5457 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5460 case Intrinsic::amdgcn_ds_gws_sema_v:
5461 case Intrinsic::amdgcn_ds_gws_sema_p:
5462 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5465 AMDGPU::SGPRRegBankID);
5466 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5469 case Intrinsic::amdgcn_global_store_async_from_lds_b8:
5470 case Intrinsic::amdgcn_global_store_async_from_lds_b32:
5471 case Intrinsic::amdgcn_global_store_async_from_lds_b64:
5472 case Intrinsic::amdgcn_global_store_async_from_lds_b128:
5473 case Intrinsic::amdgcn_global_load_async_to_lds_b8:
5474 case Intrinsic::amdgcn_global_load_async_to_lds_b32:
5475 case Intrinsic::amdgcn_global_load_async_to_lds_b64:
5476 case Intrinsic::amdgcn_global_load_async_to_lds_b128:
5477 case Intrinsic::amdgcn_load_to_lds:
5478 case Intrinsic::amdgcn_global_load_lds: {
5483 case Intrinsic::amdgcn_lds_direct_load: {
5484 const int M0Idx =
MI.getNumOperands() - 1;
5485 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5487 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5489 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5490 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5491 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5495 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5498 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5499 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5503 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
5504 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
5505 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
5506 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn: {
5519 case Intrinsic::amdgcn_s_sleep_var:
5522 case Intrinsic::amdgcn_s_barrier_join:
5525 case Intrinsic::amdgcn_s_barrier_init:
5526 case Intrinsic::amdgcn_s_barrier_signal_var:
5530 case Intrinsic::amdgcn_s_barrier_signal_isfirst: {
5531 const unsigned ResultSize = 1;
5533 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5536 case Intrinsic::amdgcn_s_get_barrier_state:
5537 case Intrinsic::amdgcn_s_get_named_barrier_state: {
5542 case Intrinsic::amdgcn_pops_exiting_wave_id:
5544 case Intrinsic::amdgcn_tensor_load_to_lds_d2:
5545 case Intrinsic::amdgcn_tensor_store_from_lds_d2:
5546 case Intrinsic::amdgcn_tensor_load_to_lds:
5547 case Intrinsic::amdgcn_tensor_store_from_lds: {
5550 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I) {
5551 if (
MI.getOperand(
I).isReg()) {
5555 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5560 case Intrinsic::amdgcn_s_prefetch_data: {
5565 case Intrinsic::amdgcn_flat_prefetch:
5566 case Intrinsic::amdgcn_global_prefetch:
5573 case AMDGPU::G_SELECT: {
5574 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5576 AMDGPU::SGPRRegBankID);
5578 AMDGPU::SGPRRegBankID);
5579 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5580 Op3Bank == AMDGPU::SGPRRegBankID;
5582 unsigned CondBankDefault = SGPRSrcs ?
5583 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5586 if (CondBank == AMDGPU::SGPRRegBankID)
5587 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5588 else if (CondBank == AMDGPU::VGPRRegBankID)
5589 CondBank = AMDGPU::VCCRegBankID;
5591 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5592 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5594 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5598 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5599 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5600 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5601 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5603 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5604 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5605 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5606 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5612 case AMDGPU::G_SI_CALL: {
5613 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5619 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5620 if (
MI.getOperand(
I).isReg()) {
5624 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5629 case AMDGPU::G_LOAD:
5630 case AMDGPU::G_ZEXTLOAD:
5631 case AMDGPU::G_SEXTLOAD:
5634 case AMDGPU::G_ATOMICRMW_XCHG:
5635 case AMDGPU::G_ATOMICRMW_ADD:
5636 case AMDGPU::G_ATOMICRMW_SUB:
5637 case AMDGPU::G_ATOMICRMW_AND:
5638 case AMDGPU::G_ATOMICRMW_OR:
5639 case AMDGPU::G_ATOMICRMW_XOR:
5640 case AMDGPU::G_ATOMICRMW_MAX:
5641 case AMDGPU::G_ATOMICRMW_MIN:
5642 case AMDGPU::G_ATOMICRMW_UMAX:
5643 case AMDGPU::G_ATOMICRMW_UMIN:
5644 case AMDGPU::G_ATOMICRMW_FADD:
5645 case AMDGPU::G_ATOMICRMW_FMIN:
5646 case AMDGPU::G_ATOMICRMW_FMAX:
5647 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5648 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5649 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5655 case AMDGPU::G_ATOMIC_CMPXCHG: {
5662 case AMDGPU::G_BRCOND: {
5664 AMDGPU::SGPRRegBankID);
5665 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
5666 if (Bank != AMDGPU::SGPRRegBankID)
5667 Bank = AMDGPU::VCCRegBankID;
5669 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5672 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
5674 case AMDGPU::G_PREFETCH:
5677 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP:
5678 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN:
5679 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
5685 MI.getNumOperands());
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)
static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)
static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)
Implement extending a 32-bit value to a 64-bit value.
static unsigned getExtendOp(unsigned Opc)
static bool isVectorRegisterBank(const RegisterBank &Bank)
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)
Split Ty into 2 pieces.
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)
Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.
static LLT widen96To128(LLT Ty)
static LLT getHalfSizedType(LLT Ty)
static unsigned getSBufferLoadCorrespondingBufferLoadOpcode(unsigned Opc)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static constexpr MCPhysReg SPReg
Interface definition for SIRegisterInfo.
bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const
bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const
const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const
InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const
bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const
Legalize instruction MI where operands in OpIndices must be SGPRs.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const
Handle register layout difference for f16 images for some subtargets.
const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const
void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override
See RegisterBankInfo::applyMapping.
bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const
bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const
const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool isScalarLoadLegal(const MachineInstr &MI) const
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const
const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const
Split 64-bit value Reg into two 32-bit halves and populate them into Regs.
const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const
Return the mapping for a pointer argument.
unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isDivergentRegBank(const RegisterBank *RB) const override
Returns true if the register bank is considered divergent.
void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const
const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
const GCNSubtarget & Subtarget
const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const
bool isSALUMapping(const MachineInstr &MI) const
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const
bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
const SIRegisterInfo * TRI
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool hasScalarCompareEq64() const
bool hasSafeSmemPrefetch() const
bool hasScalarSubwordLoads() const
bool hasFullRate64Ops() const
bool hasIntMinMax64() const
bool hasVmemPrefInsts() const
bool hasScalarDwordx3Loads() const
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
bool hasPseudoScalarTrans() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasSALUFloatInsts() const
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void createdInstr(MachineInstr &MI)=0
An instruction has been created and inserted into the function.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT getScalarType() const
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
TypeSize getValue() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
const InstructionMapping & getInstrMapping() const
The final mapping of the instruction.
MachineInstr & getMI() const
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
const unsigned * Sizes
Hold the sizes of the register banks for all HwModes.
bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
bool isFlatGlobalAddrSpace(unsigned AS)
bool isUniformMMO(const MachineMemOperand *MMO)
bool isExtendedGlobalAddrSpace(unsigned AS)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(APInt RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
auto reverse(ContainerTy &&C)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
const RegisterBank * RegBank
Register bank where the partial value lives.
unsigned Length
Length of this mapping in bits.
Helper struct that represents how a value is mapped through different register banks.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
The llvm::once_flag structure.