29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
77bool AMDGPUInstructionSelector::isVCC(
Register Reg,
83 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
85 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
87 const LLT Ty =
MRI.getType(Reg);
91 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
95 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
116 if (!DstRC || DstRC != SrcRC)
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
142 if (!isVCC(SrcReg, *MRI)) {
150 std::optional<ValueAndVReg> ConstVal =
154 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
166 const int64_t NoMods = 0;
167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
181 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
186 And.setOperandDead(3);
188 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
194 if (!
MRI->getRegClassOrNull(SrcReg))
195 MRI->setRegClass(SrcReg, SrcRC);
209 if (MO.getReg().isPhysical())
221bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
226 STI.
isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
228 .
addReg(
I.getOperand(1).getReg())
233 Register DstReg =
I.getOperand(0).getReg();
240bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
244 Register DstReg =
I.getOperand(0).getReg();
245 Register SrcReg =
I.getOperand(1).getReg();
246 std::optional<ValueAndVReg> Arg =
250 const int64_t
Value = Arg->
Value.getZExtValue();
252 unsigned Opcode = STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
265 unsigned SelectOpcode =
266 STI.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
275bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
276 Register DstReg =
I.getOperand(0).getReg();
277 Register SrcReg =
I.getOperand(1).getReg();
282 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
289bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
290 const Register DefReg =
I.getOperand(0).getReg();
291 const LLT DefTy =
MRI->getType(DefReg);
303 MRI->getRegClassOrRegBank(DefReg);
306 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
313 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
323 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
324 const Register SrcReg =
I.getOperand(i).getReg();
328 const LLT SrcTy =
MRI->getType(SrcReg);
336 I.setDesc(TII.get(TargetOpcode::PHI));
343 unsigned SubIdx)
const {
347 Register DstReg =
MRI->createVirtualRegister(&SubRC);
350 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
352 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
353 .
addReg(Reg, 0, ComposedSubIdx);
378 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
380 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
382 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
388bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
389 Register DstReg =
I.getOperand(0).getReg();
393 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
394 DstRB->
getID() != AMDGPU::VCCRegBankID)
397 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
409bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
412 Register DstReg =
I.getOperand(0).getReg();
414 LLT Ty =
MRI->getType(DstReg);
420 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
421 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
425 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
428 .
add(
I.getOperand(1))
429 .
add(
I.getOperand(2))
436 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
437 I.setDesc(TII.get(
Opc));
443 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
449 .
add(
I.getOperand(1))
450 .
add(
I.getOperand(2))
456 assert(!
Sub &&
"illegal sub should not reach here");
459 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
461 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
463 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
464 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
465 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
466 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
468 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
469 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
472 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
475 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
481 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
482 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
498 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
512bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
517 Register Dst0Reg =
I.getOperand(0).getReg();
518 Register Dst1Reg =
I.getOperand(1).getReg();
519 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
520 I.getOpcode() == AMDGPU::G_UADDE;
521 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
522 I.getOpcode() == AMDGPU::G_USUBE;
524 if (isVCC(Dst1Reg, *MRI)) {
525 unsigned NoCarryOpc =
526 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
527 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
528 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
534 Register Src0Reg =
I.getOperand(2).getReg();
535 Register Src1Reg =
I.getOperand(3).getReg();
538 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
539 .
addReg(
I.getOperand(4).getReg());
542 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
543 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
545 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
546 .
add(
I.getOperand(2))
547 .
add(
I.getOperand(3));
549 if (
MRI->use_nodbg_empty(Dst1Reg)) {
552 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
554 if (!
MRI->getRegClassOrNull(Dst1Reg))
555 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
565 AMDGPU::SReg_32RegClass, *MRI))
572bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
576 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
578 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
582 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
583 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
585 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
586 : AMDGPU::V_MAD_NC_I64_I32_e64;
588 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
593 I.setDesc(TII.get(
Opc));
595 I.addImplicitDefUseOperands(*
MF);
600bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
602 Register DstReg =
I.getOperand(0).getReg();
603 Register SrcReg =
I.getOperand(1).getReg();
604 LLT DstTy =
MRI->getType(DstReg);
605 LLT SrcTy =
MRI->getType(SrcReg);
610 unsigned Offset =
I.getOperand(2).getImm();
611 if (
Offset % 32 != 0 || DstSize > 128)
631 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
636 *SrcRC,
I.getOperand(1));
638 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
645bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
648 LLT DstTy =
MRI->getType(DstReg);
649 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
665 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
666 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
680 MI.eraseFromParent();
684bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
686 const int NumDst =
MI.getNumOperands() - 1;
692 LLT DstTy =
MRI->getType(DstReg0);
693 LLT SrcTy =
MRI->getType(SrcReg);
709 for (
int I = 0, E = NumDst;
I != E; ++
I) {
711 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
712 .
addReg(SrcReg, 0, SubRegs[
I]);
715 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
725 MI.eraseFromParent();
729bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
730 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
731 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
735 LLT SrcTy =
MRI->getType(Src0);
739 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
740 return selectG_MERGE_VALUES(
MI);
747 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
752 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
755 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
756 DstBank->
getID() == AMDGPU::VGPRRegBankID);
757 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
770 const int64_t K0 = ConstSrc0->Value.getSExtValue();
771 const int64_t K1 = ConstSrc1->Value.getSExtValue();
779 MI.eraseFromParent();
785 MI.eraseFromParent();
797 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
798 MI.setDesc(TII.get(AMDGPU::COPY));
801 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
808 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
809 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
815 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
822 MI.eraseFromParent();
847 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
848 if (Shift0 && Shift1) {
849 Opc = AMDGPU::S_PACK_HH_B32_B16;
850 MI.getOperand(1).setReg(ShiftSrc0);
851 MI.getOperand(2).setReg(ShiftSrc1);
853 Opc = AMDGPU::S_PACK_LH_B32_B16;
854 MI.getOperand(2).setReg(ShiftSrc1);
858 if (ConstSrc1 && ConstSrc1->Value == 0) {
860 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
865 MI.eraseFromParent();
869 Opc = AMDGPU::S_PACK_HL_B32_B16;
870 MI.getOperand(1).setReg(ShiftSrc0);
874 MI.setDesc(TII.get(
Opc));
878bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
884 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
886 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
893bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
896 Register DstReg =
I.getOperand(0).getReg();
897 Register Src0Reg =
I.getOperand(1).getReg();
898 Register Src1Reg =
I.getOperand(2).getReg();
899 LLT Src1Ty =
MRI->getType(Src1Reg);
901 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
904 int64_t
Offset =
I.getOperand(3).getImm();
907 if (
Offset % 32 != 0 || InsSize % 32 != 0)
915 if (
SubReg == AMDGPU::NoSubRegister)
933 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
934 if (!Src0RC || !Src1RC)
943 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
952bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
955 Register OffsetReg =
MI.getOperand(2).getReg();
956 Register WidthReg =
MI.getOperand(3).getReg();
959 "scalar BFX instructions are expanded in regbankselect");
960 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
961 "64-bit vector BFX instructions are expanded in regbankselect");
966 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
967 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
972 MI.eraseFromParent();
976bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
995 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1001 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1004 .
addImm(
MI.getOperand(3).getImm());
1017 MI.eraseFromParent();
1026bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1035 Register LaneSelect =
MI.getOperand(3).getReg();
1038 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1040 std::optional<ValueAndVReg> ConstSelect =
1046 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1049 std::optional<ValueAndVReg> ConstVal =
1056 MIB.
addImm(ConstVal->Value.getSExtValue());
1066 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1074 MI.eraseFromParent();
1080bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1084 LLT Ty =
MRI->getType(Dst0);
1087 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1089 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1100 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1102 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1115 MI.eraseFromParent();
1119bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1120 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1121 switch (IntrinsicID) {
1122 case Intrinsic::amdgcn_if_break: {
1127 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1128 .
add(
I.getOperand(0))
1129 .
add(
I.getOperand(2))
1130 .
add(
I.getOperand(3));
1132 Register DstReg =
I.getOperand(0).getReg();
1133 Register Src0Reg =
I.getOperand(2).getReg();
1134 Register Src1Reg =
I.getOperand(3).getReg();
1136 I.eraseFromParent();
1138 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1143 case Intrinsic::amdgcn_interp_p1_f16:
1144 return selectInterpP1F16(
I);
1145 case Intrinsic::amdgcn_wqm:
1146 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1147 case Intrinsic::amdgcn_softwqm:
1148 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1149 case Intrinsic::amdgcn_strict_wwm:
1150 case Intrinsic::amdgcn_wwm:
1151 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1152 case Intrinsic::amdgcn_strict_wqm:
1153 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1154 case Intrinsic::amdgcn_writelane:
1155 return selectWritelane(
I);
1156 case Intrinsic::amdgcn_div_scale:
1157 return selectDivScale(
I);
1158 case Intrinsic::amdgcn_icmp:
1159 case Intrinsic::amdgcn_fcmp:
1162 return selectIntrinsicCmp(
I);
1163 case Intrinsic::amdgcn_ballot:
1164 return selectBallot(
I);
1165 case Intrinsic::amdgcn_reloc_constant:
1166 return selectRelocConstant(
I);
1167 case Intrinsic::amdgcn_groupstaticsize:
1168 return selectGroupStaticSize(
I);
1169 case Intrinsic::returnaddress:
1170 return selectReturnAddress(
I);
1171 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1172 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1173 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1174 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1175 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1176 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1177 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1178 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1179 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1180 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1181 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1182 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1183 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1184 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1185 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1186 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1189 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1190 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1191 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1192 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1199 return selectSMFMACIntrin(
I);
1200 case Intrinsic::amdgcn_permlane16_swap:
1201 case Intrinsic::amdgcn_permlane32_swap:
1202 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1213 if (
Size == 16 && !ST.has16BitInsts())
1216 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1217 unsigned FakeS16Opc,
unsigned S32Opc,
1220 return ST.hasTrue16BitInsts()
1221 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1232 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1233 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1234 AMDGPU::V_CMP_NE_U64_e64);
1236 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1237 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1238 AMDGPU::V_CMP_EQ_U64_e64);
1240 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1241 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1242 AMDGPU::V_CMP_GT_I64_e64);
1244 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1245 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1246 AMDGPU::V_CMP_GE_I64_e64);
1248 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1249 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1250 AMDGPU::V_CMP_LT_I64_e64);
1252 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1253 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1254 AMDGPU::V_CMP_LE_I64_e64);
1256 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1257 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1258 AMDGPU::V_CMP_GT_U64_e64);
1260 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1261 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1262 AMDGPU::V_CMP_GE_U64_e64);
1264 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1265 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1266 AMDGPU::V_CMP_LT_U64_e64);
1268 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1269 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1270 AMDGPU::V_CMP_LE_U64_e64);
1273 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1274 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1275 AMDGPU::V_CMP_EQ_F64_e64);
1277 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1278 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1279 AMDGPU::V_CMP_GT_F64_e64);
1281 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1282 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1283 AMDGPU::V_CMP_GE_F64_e64);
1285 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1286 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1287 AMDGPU::V_CMP_LT_F64_e64);
1289 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1290 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1291 AMDGPU::V_CMP_LE_F64_e64);
1293 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1294 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1295 AMDGPU::V_CMP_NEQ_F64_e64);
1297 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1298 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1299 AMDGPU::V_CMP_O_F64_e64);
1301 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1302 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1303 AMDGPU::V_CMP_U_F64_e64);
1305 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1306 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1307 AMDGPU::V_CMP_NLG_F64_e64);
1309 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1310 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1311 AMDGPU::V_CMP_NLE_F64_e64);
1313 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1314 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1315 AMDGPU::V_CMP_NLT_F64_e64);
1317 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1318 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1319 AMDGPU::V_CMP_NGE_F64_e64);
1321 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1322 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1323 AMDGPU::V_CMP_NGT_F64_e64);
1325 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1326 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1327 AMDGPU::V_CMP_NEQ_F64_e64);
1329 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1330 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1331 AMDGPU::V_CMP_TRU_F64_e64);
1333 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1334 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1335 AMDGPU::V_CMP_F_F64_e64);
1340 unsigned Size)
const {
1347 return AMDGPU::S_CMP_LG_U64;
1349 return AMDGPU::S_CMP_EQ_U64;
1358 return AMDGPU::S_CMP_LG_U32;
1360 return AMDGPU::S_CMP_EQ_U32;
1362 return AMDGPU::S_CMP_GT_I32;
1364 return AMDGPU::S_CMP_GE_I32;
1366 return AMDGPU::S_CMP_LT_I32;
1368 return AMDGPU::S_CMP_LE_I32;
1370 return AMDGPU::S_CMP_GT_U32;
1372 return AMDGPU::S_CMP_GE_U32;
1374 return AMDGPU::S_CMP_LT_U32;
1376 return AMDGPU::S_CMP_LE_U32;
1378 return AMDGPU::S_CMP_EQ_F32;
1380 return AMDGPU::S_CMP_GT_F32;
1382 return AMDGPU::S_CMP_GE_F32;
1384 return AMDGPU::S_CMP_LT_F32;
1386 return AMDGPU::S_CMP_LE_F32;
1388 return AMDGPU::S_CMP_LG_F32;
1390 return AMDGPU::S_CMP_O_F32;
1392 return AMDGPU::S_CMP_U_F32;
1394 return AMDGPU::S_CMP_NLG_F32;
1396 return AMDGPU::S_CMP_NLE_F32;
1398 return AMDGPU::S_CMP_NLT_F32;
1400 return AMDGPU::S_CMP_NGE_F32;
1402 return AMDGPU::S_CMP_NGT_F32;
1404 return AMDGPU::S_CMP_NEQ_F32;
1416 return AMDGPU::S_CMP_EQ_F16;
1418 return AMDGPU::S_CMP_GT_F16;
1420 return AMDGPU::S_CMP_GE_F16;
1422 return AMDGPU::S_CMP_LT_F16;
1424 return AMDGPU::S_CMP_LE_F16;
1426 return AMDGPU::S_CMP_LG_F16;
1428 return AMDGPU::S_CMP_O_F16;
1430 return AMDGPU::S_CMP_U_F16;
1432 return AMDGPU::S_CMP_NLG_F16;
1434 return AMDGPU::S_CMP_NLE_F16;
1436 return AMDGPU::S_CMP_NLT_F16;
1438 return AMDGPU::S_CMP_NGE_F16;
1440 return AMDGPU::S_CMP_NGT_F16;
1442 return AMDGPU::S_CMP_NEQ_F16;
1451bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1456 Register SrcReg =
I.getOperand(2).getReg();
1461 Register CCReg =
I.getOperand(0).getReg();
1462 if (!isVCC(CCReg, *MRI)) {
1463 int Opcode = getS_CMPOpcode(Pred,
Size);
1467 .
add(
I.getOperand(2))
1468 .
add(
I.getOperand(3));
1469 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1474 I.eraseFromParent();
1478 if (
I.getOpcode() == AMDGPU::G_FCMP)
1488 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1490 .
add(
I.getOperand(2))
1492 .
add(
I.getOperand(3))
1495 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1496 .
add(
I.getOperand(2))
1497 .
add(
I.getOperand(3));
1503 I.eraseFromParent();
1507bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1508 Register Dst =
I.getOperand(0).getReg();
1509 if (isVCC(Dst, *MRI))
1512 LLT DstTy =
MRI->getType(Dst);
1518 Register SrcReg =
I.getOperand(2).getReg();
1527 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1528 I.eraseFromParent();
1539 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1540 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1542 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1544 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1545 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1547 SelectedMI.
addImm(Src0Mods);
1548 SelectedMI.
addReg(Src0Reg);
1550 SelectedMI.
addImm(Src1Mods);
1551 SelectedMI.
addReg(Src1Reg);
1561 I.eraseFromParent();
1572 if (
MI->getParent() !=
MBB)
1576 if (
MI->getOpcode() == AMDGPU::COPY) {
1577 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1578 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1579 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1580 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1585 if (isa<GAnyCmp>(
MI))
1597bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1600 Register DstReg =
I.getOperand(0).getReg();
1601 Register SrcReg =
I.getOperand(2).getReg();
1602 const unsigned BallotSize =
MRI->getType(DstReg).getSizeInBits();
1607 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1610 std::optional<ValueAndVReg> Arg =
1615 if (BallotSize != WaveSize) {
1620 const int64_t
Value = Arg->
Value.getZExtValue();
1623 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1640 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1651 if (BallotSize != WaveSize) {
1652 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1654 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1661 I.eraseFromParent();
1665bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1666 Register DstReg =
I.getOperand(0).getReg();
1672 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1677 auto *RelocSymbol = cast<GlobalVariable>(
1682 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1685 I.eraseFromParent();
1689bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1692 Register DstReg =
I.getOperand(0).getReg();
1694 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1695 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1712 I.eraseFromParent();
1716bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1723 unsigned Depth =
I.getOperand(2).getImm();
1736 I.eraseFromParent();
1747 AMDGPU::SReg_64RegClass,
DL);
1750 I.eraseFromParent();
1754bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1758 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1759 .
add(
MI.getOperand(1));
1762 MI.eraseFromParent();
1764 if (!
MRI->getRegClassOrNull(Reg))
1769bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1775 unsigned IndexOperand =
MI.getOperand(7).getImm();
1776 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1777 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1779 if (WaveDone && !WaveRelease) {
1783 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1786 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1787 IndexOperand &= ~0x3f;
1788 unsigned CountDw = 0;
1791 CountDw = (IndexOperand >> 24) & 0xf;
1792 IndexOperand &= ~(0xf << 24);
1794 if (CountDw < 1 || CountDw > 4) {
1797 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1805 Fn,
"ds_ordered_count: bad index operand",
DL));
1808 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1811 unsigned Offset0 = OrderedCountIndex << 2;
1812 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1815 Offset1 |= (CountDw - 1) << 6;
1818 Offset1 |= ShaderType << 2;
1820 unsigned Offset = Offset0 | (Offset1 << 8);
1829 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1838 MI.eraseFromParent();
1844 case Intrinsic::amdgcn_ds_gws_init:
1845 return AMDGPU::DS_GWS_INIT;
1846 case Intrinsic::amdgcn_ds_gws_barrier:
1847 return AMDGPU::DS_GWS_BARRIER;
1848 case Intrinsic::amdgcn_ds_gws_sema_v:
1849 return AMDGPU::DS_GWS_SEMA_V;
1850 case Intrinsic::amdgcn_ds_gws_sema_br:
1851 return AMDGPU::DS_GWS_SEMA_BR;
1852 case Intrinsic::amdgcn_ds_gws_sema_p:
1853 return AMDGPU::DS_GWS_SEMA_P;
1854 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1855 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1861bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1863 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1868 const bool HasVSrc =
MI.getNumOperands() == 3;
1869 assert(HasVSrc ||
MI.getNumOperands() == 2);
1871 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1873 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1887 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1888 Readfirstlane = OffsetDef;
1893 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1903 std::tie(BaseOffset, ImmOffset) =
1906 if (Readfirstlane) {
1916 AMDGPU::SReg_32RegClass, *MRI))
1920 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1948 MI.eraseFromParent();
1952bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1953 bool IsAppend)
const {
1954 Register PtrBase =
MI.getOperand(2).getReg();
1955 LLT PtrTy =
MRI->getType(PtrBase);
1959 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1962 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1963 PtrBase =
MI.getOperand(2).getReg();
1969 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1980 MI.eraseFromParent();
1984bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1997 TFE = TexFailCtrl & 0x1;
1999 LWE = TexFailCtrl & 0x2;
2002 return TexFailCtrl == 0;
2005bool AMDGPUInstructionSelector::selectImageIntrinsic(
2014 unsigned IntrOpcode =
Intr->BaseOpcode;
2019 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2023 int NumVDataDwords = -1;
2024 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2025 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2031 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
2035 bool IsTexFail =
false;
2037 TFE, LWE, IsTexFail))
2040 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
2041 const bool IsA16 = (
Flags & 1) != 0;
2042 const bool IsG16 = (
Flags & 2) != 0;
2045 if (IsA16 && !STI.
hasG16() && !IsG16)
2049 unsigned DMaskLanes = 0;
2051 if (BaseOpcode->
Atomic) {
2052 VDataOut =
MI.getOperand(0).getReg();
2053 VDataIn =
MI.getOperand(2).getReg();
2054 LLT Ty =
MRI->getType(VDataIn);
2057 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2062 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2064 DMask = Is64Bit ? 0xf : 0x3;
2065 NumVDataDwords = Is64Bit ? 4 : 2;
2067 DMask = Is64Bit ? 0x3 : 0x1;
2068 NumVDataDwords = Is64Bit ? 2 : 1;
2071 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
2074 if (BaseOpcode->
Store) {
2075 VDataIn =
MI.getOperand(1).getReg();
2076 VDataTy =
MRI->getType(VDataIn);
2081 VDataOut =
MI.getOperand(0).getReg();
2082 VDataTy =
MRI->getType(VDataOut);
2083 NumVDataDwords = DMaskLanes;
2086 NumVDataDwords = (DMaskLanes + 1) / 2;
2091 if (Subtarget->
hasG16() && IsG16) {
2095 IntrOpcode = G16MappingInfo->
G16;
2099 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2101 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
2108 int NumVAddrRegs = 0;
2109 int NumVAddrDwords = 0;
2110 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
2113 if (!AddrOp.
isReg())
2121 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
2128 NumVAddrRegs != 1 &&
2130 : NumVAddrDwords == NumVAddrRegs);
2131 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2142 NumVDataDwords, NumVAddrDwords);
2143 }
else if (IsGFX11Plus) {
2145 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2146 : AMDGPU::MIMGEncGfx11Default,
2147 NumVDataDwords, NumVAddrDwords);
2148 }
else if (IsGFX10Plus) {
2150 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2151 : AMDGPU::MIMGEncGfx10Default,
2152 NumVDataDwords, NumVAddrDwords);
2156 NumVDataDwords, NumVAddrDwords);
2160 <<
"requested image instruction is not supported on this GPU\n");
2167 NumVDataDwords, NumVAddrDwords);
2170 NumVDataDwords, NumVAddrDwords);
2180 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
2183 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2184 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2187 if (!
MRI->use_empty(VDataOut)) {
2200 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2202 if (
SrcOp.isReg()) {
2208 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2210 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2221 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2223 MIB.
addImm(IsA16 ? -1 : 0);
2237 MIB.
addImm(IsD16 ? -1 : 0);
2239 MI.eraseFromParent();
2247bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2258 unsigned Offset =
MI.getOperand(6).getImm();
2262 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2263 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2264 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2266 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2267 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2269 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2270 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2282 MI.eraseFromParent();
2286bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2288 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2289 switch (IntrinsicID) {
2290 case Intrinsic::amdgcn_end_cf:
2291 return selectEndCfIntrinsic(
I);
2292 case Intrinsic::amdgcn_ds_ordered_add:
2293 case Intrinsic::amdgcn_ds_ordered_swap:
2294 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2295 case Intrinsic::amdgcn_ds_gws_init:
2296 case Intrinsic::amdgcn_ds_gws_barrier:
2297 case Intrinsic::amdgcn_ds_gws_sema_v:
2298 case Intrinsic::amdgcn_ds_gws_sema_br:
2299 case Intrinsic::amdgcn_ds_gws_sema_p:
2300 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2301 return selectDSGWSIntrinsic(
I, IntrinsicID);
2302 case Intrinsic::amdgcn_ds_append:
2303 return selectDSAppendConsume(
I,
true);
2304 case Intrinsic::amdgcn_ds_consume:
2305 return selectDSAppendConsume(
I,
false);
2306 case Intrinsic::amdgcn_init_whole_wave:
2307 return selectInitWholeWave(
I);
2308 case Intrinsic::amdgcn_raw_buffer_load_lds:
2309 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2310 case Intrinsic::amdgcn_struct_buffer_load_lds:
2311 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2312 return selectBufferLoadLds(
I);
2317 case Intrinsic::amdgcn_load_to_lds:
2318 case Intrinsic::amdgcn_global_load_lds:
2319 return selectGlobalLoadLds(
I);
2320 case Intrinsic::amdgcn_exp_compr:
2323 F.getContext().diagnose(
2329 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2330 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2331 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2332 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2333 return selectDSBvhStackIntrinsic(
I);
2334 case Intrinsic::amdgcn_s_barrier_init:
2335 case Intrinsic::amdgcn_s_barrier_signal_var:
2336 return selectNamedBarrierInit(
I, IntrinsicID);
2337 case Intrinsic::amdgcn_s_barrier_join:
2338 case Intrinsic::amdgcn_s_get_named_barrier_state:
2339 return selectNamedBarrierInst(
I, IntrinsicID);
2340 case Intrinsic::amdgcn_s_get_barrier_state:
2341 return selectSGetBarrierState(
I, IntrinsicID);
2342 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2343 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2348bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2355 Register DstReg =
I.getOperand(0).getReg();
2360 if (!isVCC(CCReg, *MRI)) {
2361 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2362 AMDGPU::S_CSELECT_B32;
2369 if (!
MRI->getRegClassOrNull(CCReg))
2372 .
add(
I.getOperand(2))
2373 .
add(
I.getOperand(3));
2378 I.eraseFromParent();
2387 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2389 .
add(
I.getOperand(3))
2391 .
add(
I.getOperand(2))
2392 .
add(
I.getOperand(1));
2395 I.eraseFromParent();
2399bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2400 Register DstReg =
I.getOperand(0).getReg();
2401 Register SrcReg =
I.getOperand(1).getReg();
2402 const LLT DstTy =
MRI->getType(DstReg);
2403 const LLT SrcTy =
MRI->getType(SrcReg);
2418 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2427 if (!SrcRC || !DstRC)
2436 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2441 .
addReg(SrcReg, 0, AMDGPU::lo16);
2442 I.eraseFromParent();
2450 Register LoReg =
MRI->createVirtualRegister(DstRC);
2451 Register HiReg =
MRI->createVirtualRegister(DstRC);
2453 .
addReg(SrcReg, 0, AMDGPU::sub0);
2455 .
addReg(SrcReg, 0, AMDGPU::sub1);
2457 if (IsVALU && STI.
hasSDWA()) {
2461 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2471 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2472 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2473 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2475 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2485 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2486 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2487 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2499 And.setOperandDead(3);
2500 Or.setOperandDead(3);
2504 I.eraseFromParent();
2512 unsigned SubRegIdx = DstSize < 32
2513 ?
static_cast<unsigned>(AMDGPU::sub0)
2515 if (SubRegIdx == AMDGPU::NoSubRegister)
2521 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2525 if (SrcWithSubRC != SrcRC) {
2530 I.getOperand(1).setSubReg(SubRegIdx);
2533 I.setDesc(TII.get(TargetOpcode::COPY));
2539 Mask = maskTrailingOnes<unsigned>(
Size);
2540 int SignedMask =
static_cast<int>(Mask);
2541 return SignedMask >= -16 && SignedMask <= 64;
2545const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2549 if (
auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))
2553 if (
auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
2558bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2559 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2560 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2563 const Register DstReg =
I.getOperand(0).getReg();
2564 const Register SrcReg =
I.getOperand(1).getReg();
2566 const LLT DstTy =
MRI->getType(DstReg);
2567 const LLT SrcTy =
MRI->getType(SrcReg);
2568 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2575 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2578 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2580 return selectCOPY(
I);
2583 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2586 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2588 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2589 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2595 I.eraseFromParent();
2601 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2611 I.eraseFromParent();
2615 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2621 I.eraseFromParent();
2625 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2627 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2631 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2632 const unsigned SextOpc = SrcSize == 8 ?
2633 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2636 I.eraseFromParent();
2642 if (DstSize > 32 && SrcSize == 32) {
2643 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2644 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2659 I.eraseFromParent();
2664 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2665 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2668 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2670 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2671 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2672 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2674 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2685 I.eraseFromParent();
2701 I.eraseFromParent();
2736 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2743 assert(Mask.size() == 2);
2745 if (Mask[0] == 1 && Mask[1] <= 1) {
2753bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2757 Register Dst =
I.getOperand(0).getReg();
2759 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2762 Register Src =
I.getOperand(1).getReg();
2768 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2770 I.eraseFromParent();
2778bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2792 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2807 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2808 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2809 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2810 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2812 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2813 .
addReg(Src, 0, AMDGPU::sub0);
2814 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2815 .
addReg(Src, 0, AMDGPU::sub1);
2816 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2820 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2825 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2830 MI.eraseFromParent();
2835bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2838 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2845 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2846 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2847 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2848 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2854 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2855 .
addReg(Src, 0, AMDGPU::sub0);
2856 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2857 .
addReg(Src, 0, AMDGPU::sub1);
2858 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2863 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2867 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2873 MI.eraseFromParent();
2878 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2881void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2884 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2886 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2890 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2895 for (
unsigned i = 1; i != 3; ++i) {
2902 assert(GEPInfo.Imm == 0);
2907 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2908 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2910 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2914 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2917bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2918 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2921bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2922 if (!
MI.hasOneMemOperand())
2932 if (!
Ptr || isa<UndefValue, Argument, Constant, GlobalValue>(
Ptr))
2938 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2940 AMDGPU::SGPRRegBankID;
2943 return I &&
I->getMetadata(
"amdgpu.uniform");
2947 for (
const GEPInfo &GEPInfo : AddrInfo) {
2948 if (!GEPInfo.VgprParts.empty())
2954void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2955 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2962 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2967bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2974 if (Reg.isPhysical())
2978 const unsigned Opcode =
MI.getOpcode();
2980 if (Opcode == AMDGPU::COPY)
2983 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2984 Opcode == AMDGPU::G_XOR)
2988 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2989 return GI->is(Intrinsic::amdgcn_class);
2991 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2994bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3009 if (!isVCC(CondReg, *MRI)) {
3013 CondPhysReg = AMDGPU::SCC;
3014 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3015 ConstrainRC = &AMDGPU::SReg_32RegClass;
3023 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3024 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3027 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3034 CondPhysReg =
TRI.getVCC();
3035 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3036 ConstrainRC =
TRI.getBoolRC();
3039 if (!
MRI->getRegClassOrNull(CondReg))
3040 MRI->setRegClass(CondReg, ConstrainRC);
3042 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3045 .
addMBB(
I.getOperand(1).getMBB());
3047 I.eraseFromParent();
3051bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3053 Register DstReg =
I.getOperand(0).getReg();
3055 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3056 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3061 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3064bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3065 Register DstReg =
I.getOperand(0).getReg();
3066 Register SrcReg =
I.getOperand(1).getReg();
3067 Register MaskReg =
I.getOperand(2).getReg();
3068 LLT Ty =
MRI->getType(DstReg);
3069 LLT MaskTy =
MRI->getType(MaskReg);
3076 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3086 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3087 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3090 !CanCopyLow32 && !CanCopyHi32) {
3091 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3095 I.eraseFromParent();
3099 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3101 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3106 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3115 "ptrmask should have been narrowed during legalize");
3117 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3123 I.eraseFromParent();
3127 Register HiReg =
MRI->createVirtualRegister(&RegRC);
3128 Register LoReg =
MRI->createVirtualRegister(&RegRC);
3131 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3132 .
addReg(SrcReg, 0, AMDGPU::sub0);
3133 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3134 .
addReg(SrcReg, 0, AMDGPU::sub1);
3143 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
3144 MaskedLo =
MRI->createVirtualRegister(&RegRC);
3146 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3147 .
addReg(MaskReg, 0, AMDGPU::sub0);
3148 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3157 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3158 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3160 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3161 .
addReg(MaskReg, 0, AMDGPU::sub1);
3162 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3172 I.eraseFromParent();
3178static std::pair<Register, unsigned>
3185 std::tie(IdxBaseReg,
Offset) =
3187 if (IdxBaseReg == AMDGPU::NoRegister) {
3191 IdxBaseReg = IdxReg;
3198 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3199 return std::pair(IdxReg, SubRegs[0]);
3200 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3203bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3209 LLT DstTy =
MRI->getType(DstReg);
3210 LLT SrcTy =
MRI->getType(SrcReg);
3218 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3222 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3224 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3225 if (!SrcRC || !DstRC)
3240 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3244 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3247 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3251 MI.eraseFromParent();
3259 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3261 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3264 MI.eraseFromParent();
3275 MI.eraseFromParent();
3280bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3287 LLT VecTy =
MRI->getType(DstReg);
3288 LLT ValTy =
MRI->getType(ValReg);
3300 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3304 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3306 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3314 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3318 std::tie(IdxReg,
SubReg) =
3321 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3328 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3332 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3337 MI.eraseFromParent();
3349 MI.eraseFromParent();
3353bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3357 unsigned Size =
MI.getOperand(3).getImm();
3360 const bool HasVIndex =
MI.getNumOperands() == 9;
3364 VIndex =
MI.getOperand(4).getReg();
3368 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3369 std::optional<ValueAndVReg> MaybeVOffset =
3371 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3377 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3378 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3379 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3380 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3383 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3384 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3385 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3386 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3389 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3390 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3391 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3392 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3398 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3399 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3400 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3401 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3407 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3408 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3409 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3410 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3417 .
add(
MI.getOperand(2));
3421 if (HasVIndex && HasVOffset) {
3422 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3423 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3430 }
else if (HasVIndex) {
3432 }
else if (HasVOffset) {
3436 MIB.
add(
MI.getOperand(1));
3437 MIB.
add(
MI.getOperand(5 + OpOffset));
3438 MIB.
add(
MI.getOperand(6 + OpOffset));
3440 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3450 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3452 StorePtrI.
V =
nullptr;
3466 MI.eraseFromParent();
3471Register AMDGPUInstructionSelector::matchZeroExtendFromS32(
Register Reg)
const {
3478 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3484 return Def->getOperand(1).getReg();
3491Register AMDGPUInstructionSelector::matchSignExtendFromS32(
Register Reg)
const {
3498 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3506 return Def->getOperand(1).getReg();
3509 return matchZeroExtendFromS32(Reg);
3517AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3519 : matchZeroExtendFromS32(Reg);
3525AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3527 : matchSignExtendFromS32(Reg);
3531AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3532 bool IsSigned)
const {
3534 return matchSignExtendFromS32OrS32(Reg);
3536 return matchZeroExtendFromS32OrS32(Reg);
3539Register AMDGPUInstructionSelector::matchAnyExtendFromS32(
Register Reg)
const {
3546 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3553 return Def->getOperand(1).getReg();
3558bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3563 unsigned Size =
MI.getOperand(3).getImm();
3569 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3572 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3575 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3580 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3585 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3592 .
add(
MI.getOperand(2));
3598 if (!isSGPR(
Addr)) {
3600 if (isSGPR(AddrDef->Reg)) {
3601 Addr = AddrDef->Reg;
3602 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3605 if (isSGPR(SAddr)) {
3606 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3607 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3618 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3630 MIB.
add(
MI.getOperand(4))
3631 .
add(
MI.getOperand(5));
3635 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3645 sizeof(int32_t),
Align(4));
3649 MI.eraseFromParent();
3653bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3655 unsigned OpcodeOpIdx =
3656 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3657 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3658 MI.removeOperand(OpcodeOpIdx);
3659 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3665bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3668 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3669 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3671 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3672 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3674 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3675 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3677 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3678 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3680 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3681 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3683 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3684 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3686 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3687 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3689 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3690 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3692 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3693 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3695 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3696 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3698 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3699 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3701 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3702 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3704 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3705 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3707 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3708 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3710 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3711 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3713 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3714 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3716 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3717 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3719 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3720 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3722 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3723 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3725 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3726 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3728 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3729 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3731 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3732 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3734 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3735 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3737 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3738 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3740 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3741 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3743 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3744 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3746 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3747 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3749 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3750 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3756 auto VDst_In =
MI.getOperand(4);
3758 MI.setDesc(TII.get(
Opc));
3759 MI.removeOperand(4);
3760 MI.removeOperand(1);
3761 MI.addOperand(VDst_In);
3762 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3766bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3768 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3771 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3775 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3776 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3777 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3779 MI.removeOperand(2);
3780 MI.setDesc(TII.get(Opcode));
3789bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3793 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3798 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3809 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3813 MI.eraseFromParent();
3822 unsigned NumOpcodes = 0;
3835 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3846 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3860 if (Src.size() == 3) {
3867 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3868 if (Src[
I] ==
LHS) {
3878 Bits = SrcBits[Src.size()];
3884 switch (
MI->getOpcode()) {
3885 case TargetOpcode::G_AND:
3886 case TargetOpcode::G_OR:
3887 case TargetOpcode::G_XOR: {
3892 if (!getOperandBits(
LHS, LHSBits) ||
3893 !getOperandBits(
RHS, RHSBits)) {
3895 return std::make_pair(0, 0);
3901 NumOpcodes +=
Op.first;
3902 LHSBits =
Op.second;
3907 NumOpcodes +=
Op.first;
3908 RHSBits =
Op.second;
3913 return std::make_pair(0, 0);
3917 switch (
MI->getOpcode()) {
3918 case TargetOpcode::G_AND:
3919 TTbl = LHSBits & RHSBits;
3921 case TargetOpcode::G_OR:
3922 TTbl = LHSBits | RHSBits;
3924 case TargetOpcode::G_XOR:
3925 TTbl = LHSBits ^ RHSBits;
3931 return std::make_pair(NumOpcodes + 1, TTbl);
3934bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3940 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3946 unsigned NumOpcodes;
3948 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3952 if (NumOpcodes < 2 || Src.empty())
3956 if (NumOpcodes == 2 && IsB32) {
3964 }
else if (NumOpcodes < 4) {
3971 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3974 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
3979 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3981 if (RB->
getID() != AMDGPU::SGPRRegBankID)
3987 Register NewReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3998 while (Src.size() < 3)
3999 Src.push_back(Src[0]);
4016 MI.eraseFromParent();
4021bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4034 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4044 MI.eraseFromParent();
4050 if (!
I.isPreISelOpcode()) {
4052 return selectCOPY(
I);
4056 switch (
I.getOpcode()) {
4057 case TargetOpcode::G_AND:
4058 case TargetOpcode::G_OR:
4059 case TargetOpcode::G_XOR:
4060 if (selectBITOP3(
I))
4064 return selectG_AND_OR_XOR(
I);
4065 case TargetOpcode::G_ADD:
4066 case TargetOpcode::G_SUB:
4067 case TargetOpcode::G_PTR_ADD:
4070 return selectG_ADD_SUB(
I);
4071 case TargetOpcode::G_UADDO:
4072 case TargetOpcode::G_USUBO:
4073 case TargetOpcode::G_UADDE:
4074 case TargetOpcode::G_USUBE:
4075 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4076 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4077 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4078 return selectG_AMDGPU_MAD_64_32(
I);
4079 case TargetOpcode::G_INTTOPTR:
4080 case TargetOpcode::G_BITCAST:
4081 case TargetOpcode::G_PTRTOINT:
4082 case TargetOpcode::G_FREEZE:
4083 return selectCOPY(
I);
4084 case TargetOpcode::G_FNEG:
4087 return selectG_FNEG(
I);
4088 case TargetOpcode::G_FABS:
4091 return selectG_FABS(
I);
4092 case TargetOpcode::G_EXTRACT:
4093 return selectG_EXTRACT(
I);
4094 case TargetOpcode::G_MERGE_VALUES:
4095 case TargetOpcode::G_CONCAT_VECTORS:
4096 return selectG_MERGE_VALUES(
I);
4097 case TargetOpcode::G_UNMERGE_VALUES:
4098 return selectG_UNMERGE_VALUES(
I);
4099 case TargetOpcode::G_BUILD_VECTOR:
4100 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4101 return selectG_BUILD_VECTOR(
I);
4102 case TargetOpcode::G_IMPLICIT_DEF:
4103 return selectG_IMPLICIT_DEF(
I);
4104 case TargetOpcode::G_INSERT:
4105 return selectG_INSERT(
I);
4106 case TargetOpcode::G_INTRINSIC:
4107 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4108 return selectG_INTRINSIC(
I);
4109 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4110 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4111 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4112 case TargetOpcode::G_ICMP:
4113 case TargetOpcode::G_FCMP:
4114 if (selectG_ICMP_or_FCMP(
I))
4117 case TargetOpcode::G_LOAD:
4118 case TargetOpcode::G_ZEXTLOAD:
4119 case TargetOpcode::G_SEXTLOAD:
4120 case TargetOpcode::G_STORE:
4121 case TargetOpcode::G_ATOMIC_CMPXCHG:
4122 case TargetOpcode::G_ATOMICRMW_XCHG:
4123 case TargetOpcode::G_ATOMICRMW_ADD:
4124 case TargetOpcode::G_ATOMICRMW_SUB:
4125 case TargetOpcode::G_ATOMICRMW_AND:
4126 case TargetOpcode::G_ATOMICRMW_OR:
4127 case TargetOpcode::G_ATOMICRMW_XOR:
4128 case TargetOpcode::G_ATOMICRMW_MIN:
4129 case TargetOpcode::G_ATOMICRMW_MAX:
4130 case TargetOpcode::G_ATOMICRMW_UMIN:
4131 case TargetOpcode::G_ATOMICRMW_UMAX:
4132 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4133 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4134 case TargetOpcode::G_ATOMICRMW_FADD:
4135 case TargetOpcode::G_ATOMICRMW_FMIN:
4136 case TargetOpcode::G_ATOMICRMW_FMAX:
4137 return selectG_LOAD_STORE_ATOMICRMW(
I);
4138 case TargetOpcode::G_SELECT:
4139 return selectG_SELECT(
I);
4140 case TargetOpcode::G_TRUNC:
4141 return selectG_TRUNC(
I);
4142 case TargetOpcode::G_SEXT:
4143 case TargetOpcode::G_ZEXT:
4144 case TargetOpcode::G_ANYEXT:
4145 case TargetOpcode::G_SEXT_INREG:
4152 return selectG_SZA_EXT(
I);
4153 case TargetOpcode::G_FPEXT:
4154 if (selectG_FPEXT(
I))
4157 case TargetOpcode::G_BRCOND:
4158 return selectG_BRCOND(
I);
4159 case TargetOpcode::G_GLOBAL_VALUE:
4160 return selectG_GLOBAL_VALUE(
I);
4161 case TargetOpcode::G_PTRMASK:
4162 return selectG_PTRMASK(
I);
4163 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4164 return selectG_EXTRACT_VECTOR_ELT(
I);
4165 case TargetOpcode::G_INSERT_VECTOR_ELT:
4166 return selectG_INSERT_VECTOR_ELT(
I);
4167 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4168 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4169 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4170 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4171 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4174 assert(
Intr &&
"not an image intrinsic with image pseudo");
4175 return selectImageIntrinsic(
I,
Intr);
4177 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4178 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4179 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4180 return selectBVHIntersectRayIntrinsic(
I);
4181 case AMDGPU::G_SBFX:
4182 case AMDGPU::G_UBFX:
4183 return selectG_SBFX_UBFX(
I);
4184 case AMDGPU::G_SI_CALL:
4185 I.setDesc(TII.get(AMDGPU::SI_CALL));
4187 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4188 return selectWaveAddress(
I);
4189 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4190 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4193 case AMDGPU::G_STACKRESTORE:
4194 return selectStackRestore(
I);
4196 return selectPHI(
I);
4197 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4198 return selectCOPY_SCC_VCC(
I);
4199 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4200 return selectCOPY_VCC_SCC(
I);
4201 case AMDGPU::G_AMDGPU_READANYLANE:
4202 return selectReadAnyLane(
I);
4203 case TargetOpcode::G_CONSTANT:
4204 case TargetOpcode::G_FCONSTANT:
4212AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4219std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4220 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4224 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4225 Src =
MI->getOperand(1).getReg();
4228 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4233 if (LHS &&
LHS->isZero()) {
4235 Src =
MI->getOperand(2).getReg();
4239 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4240 Src =
MI->getOperand(1).getReg();
4247 return std::pair(Src, Mods);
4250Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4252 bool ForceVGPR)
const {
4253 if ((Mods != 0 || ForceVGPR) &&
4261 TII.get(AMDGPU::COPY), VGPRSrc)
4273AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4280AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4283 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4287 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4296AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4299 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4305 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4314AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4323AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4326 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4330 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4337AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4341 std::tie(Src, Mods) =
4342 selectVOP3ModsImpl(Root.
getReg(),
false);
4346 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4353AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4356 std::tie(Src, Mods) =
4357 selectVOP3ModsImpl(Root.
getReg(),
true,
4362 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4369AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4372 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4400 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4403 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4404 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4405 return DstSize * 2 == SrcSize;
4411 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4415 std::optional<ValueAndVReg> ShiftAmt;
4418 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4419 unsigned Shift = ShiftAmt->Value.getZExtValue();
4420 return Shift * 2 == SrcSize;
4428 if (
MI->getOpcode() != AMDGPU::G_SHL)
4432 std::optional<ValueAndVReg> ShiftAmt;
4435 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4436 unsigned Shift = ShiftAmt->Value.getZExtValue();
4437 return Shift * 2 == SrcSize;
4445 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4447 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4448 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4455 LLT OpTy =
MRI.getType(Reg);
4457 return TypeClass::SCALAR;
4459 return TypeClass::VECTOR_OF_TWO;
4460 return TypeClass::NONE_OF_LISTED;
4466 if (NegType != TypeClass::VECTOR_OF_TWO && NegType != TypeClass::SCALAR)
4467 return SrcStatus::INVALID;
4470 case SrcStatus::IS_SAME:
4471 if (NegType == TypeClass::VECTOR_OF_TWO) {
4477 return SrcStatus::IS_BOTH_NEG;
4479 if (NegType == TypeClass::SCALAR) {
4485 return SrcStatus::IS_HI_NEG;
4488 case SrcStatus::IS_HI_NEG:
4489 if (NegType == TypeClass::VECTOR_OF_TWO) {
4495 return SrcStatus::IS_LO_NEG;
4497 if (NegType == TypeClass::SCALAR) {
4503 return SrcStatus::IS_SAME;
4506 case SrcStatus::IS_LO_NEG:
4507 if (NegType == TypeClass::VECTOR_OF_TWO) {
4513 return SrcStatus::IS_HI_NEG;
4515 if (NegType == TypeClass::SCALAR) {
4521 return SrcStatus::IS_BOTH_NEG;
4524 case SrcStatus::IS_BOTH_NEG:
4525 if (NegType == TypeClass::VECTOR_OF_TWO) {
4531 return SrcStatus::IS_SAME;
4533 if (NegType == TypeClass::SCALAR) {
4539 return SrcStatus::IS_LO_NEG;
4542 case SrcStatus::IS_UPPER_HALF:
4556 return SrcStatus::IS_UPPER_HALF_NEG;
4557 case SrcStatus::IS_LOWER_HALF:
4558 if (NegType == TypeClass::VECTOR_OF_TWO) {
4565 return SrcStatus::IS_LOWER_HALF_NEG;
4567 if (NegType == TypeClass::SCALAR) {
4574 return SrcStatus::IS_LOWER_HALF;
4577 case SrcStatus::IS_UPPER_HALF_NEG:
4591 return SrcStatus::IS_UPPER_HALF;
4592 case SrcStatus::IS_LOWER_HALF_NEG:
4593 if (NegType == TypeClass::VECTOR_OF_TWO) {
4600 return SrcStatus::IS_LOWER_HALF;
4602 if (NegType == TypeClass::SCALAR) {
4609 return SrcStatus::IS_LOWER_HALF_NEG;
4618static std::optional<std::pair<Register, SrcStatus>>
4623 unsigned Opc =
MI->getOpcode();
4627 case AMDGPU::G_BITCAST:
4628 return std::optional<std::pair<Register, SrcStatus>>(
4629 {
MI->getOperand(1).getReg(), Curr.second});
4631 if (
MI->getOperand(1).getReg().isPhysical())
4632 return std::nullopt;
4633 return std::optional<std::pair<Register, SrcStatus>>(
4634 {
MI->getOperand(1).getReg(), Curr.second});
4635 case AMDGPU::G_FNEG: {
4637 if (Stat == SrcStatus::INVALID)
4638 return std::nullopt;
4639 return std::optional<std::pair<Register, SrcStatus>>(
4640 {
MI->getOperand(1).getReg(), Stat});
4647 switch (Curr.second) {
4648 case SrcStatus::IS_SAME:
4650 return std::optional<std::pair<Register, SrcStatus>>(
4651 {
MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF});
4653 if (Curr.first ==
MI->getOperand(0).getReg())
4654 return std::optional<std::pair<Register, SrcStatus>>(
4655 {
MI->getOperand(2).getReg(), SrcStatus::IS_LOWER_HALF});
4656 return std::optional<std::pair<Register, SrcStatus>>(
4657 {
MI->getOperand(2).getReg(), SrcStatus::IS_UPPER_HALF});
4660 case SrcStatus::IS_HI_NEG:
4668 return std::optional<std::pair<Register, SrcStatus>>(
4669 {
MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF_NEG});
4672 if (Curr.first ==
MI->getOperand(0).getReg())
4673 return std::optional<std::pair<Register, SrcStatus>>(
4674 {
MI->getOperand(2).getReg(), SrcStatus::IS_LOWER_HALF_NEG});
4675 return std::optional<std::pair<Register, SrcStatus>>(
4676 {
MI->getOperand(2).getReg(), SrcStatus::IS_UPPER_HALF_NEG});
4679 case SrcStatus::IS_UPPER_HALF:
4681 return std::optional<std::pair<Register, SrcStatus>>(
4682 {
MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF});
4684 case SrcStatus::IS_LOWER_HALF:
4686 return std::optional<std::pair<Register, SrcStatus>>(
4687 {
MI->getOperand(1).getReg(), SrcStatus::IS_UPPER_HALF});
4689 case SrcStatus::IS_UPPER_HALF_NEG:
4691 return std::optional<std::pair<Register, SrcStatus>>(
4692 {
MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF_NEG});
4694 case SrcStatus::IS_LOWER_HALF_NEG:
4696 return std::optional<std::pair<Register, SrcStatus>>(
4697 {
MI->getOperand(1).getReg(), SrcStatus::IS_UPPER_HALF_NEG});
4702 return std::nullopt;
4712 bool HasNeg =
false;
4714 bool HasOpsel =
true;
4719 unsigned Opc =
MI->getOpcode();
4721 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4724 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4725 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(*MI).getIntrinsicID();
4727 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4733 (Stat >= SrcStatus::NEG_START && Stat <= SrcStatus::NEG_END)) {
4737 (Stat >= SrcStatus::HALF_START && Stat <= SrcStatus::HALF_END)) {
4751 while (
Depth <= MaxDepth && Curr.has_value()) {
4754 Statlist.push_back(Curr.value());
4761static std::pair<Register, SrcStatus>
4765 std::pair<Register, SrcStatus> LastSameOrNeg = {Reg, SrcStatus::IS_SAME};
4768 while (
Depth <= MaxDepth && Curr.has_value()) {
4772 if (Stat == SrcStatus::IS_SAME || Stat == SrcStatus::IS_HI_NEG ||
4773 Stat == SrcStatus::IS_LO_NEG || Stat == SrcStatus::IS_BOTH_NEG)
4774 LastSameOrNeg = Curr.value();
4779 return LastSameOrNeg;
4784 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4785 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4786 return Width1 == Width2;
4791 if (HiStat == SrcStatus::IS_UPPER_HALF_NEG) {
4794 }
else if (HiStat == SrcStatus::IS_UPPER_HALF)
4796 else if (HiStat == SrcStatus::IS_LOWER_HALF_NEG)
4798 else if (HiStat == SrcStatus::IS_HI_NEG)
4801 if (LoStat == SrcStatus::IS_UPPER_HALF_NEG) {
4804 }
else if (LoStat == SrcStatus::IS_UPPER_HALF)
4806 else if (LoStat == SrcStatus::IS_LOWER_HALF_NEG)
4808 else if (LoStat == SrcStatus::IS_HI_NEG)
4818 return S == SrcStatus::IS_UPPER_HALF || S == SrcStatus::IS_UPPER_HALF_NEG ||
4819 S == SrcStatus::IS_LOWER_HALF || S == SrcStatus::IS_LOWER_HALF_NEG;
4822 IsHalfState(HiStat);
4825std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4831 return {RootReg, Mods};
4838 if (Stat.second == SrcStatus::IS_BOTH_NEG)
4840 else if (Stat.second == SrcStatus::IS_HI_NEG)
4842 else if (Stat.second == SrcStatus::IS_LO_NEG)
4847 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4850 return {Stat.first, Mods};
4856 if (StatlistHi.
empty()) {
4858 return {Stat.first, Mods};
4864 if (StatlistLo.
empty()) {
4866 return {Stat.first, Mods};
4869 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4870 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4871 if (StatlistHi[
I].first == StatlistLo[J].first &&
4873 StatlistHi[
I].first, RootReg, TII, MRI))
4874 return {StatlistHi[
I].first,
4875 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4881 return {Stat.first, Mods};
4891 return RB->
getID() == RBNo;
4908 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4913 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
4919 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
4922 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
4930AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
4935 std::tie(Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
4945AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4947 return selectVOP3PRetHelper(Root);
4951AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4953 return selectVOP3PRetHelper(Root,
true);
4957AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4960 "expected i1 value");
4974 switch (Elts.
size()) {
4976 DstRegClass = &AMDGPU::VReg_256RegClass;
4979 DstRegClass = &AMDGPU::VReg_128RegClass;
4982 DstRegClass = &AMDGPU::VReg_64RegClass;
4989 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
4990 .addDef(
MRI.createVirtualRegister(DstRegClass));
4991 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5002 if (ModOpcode == TargetOpcode::G_FNEG) {
5006 for (
auto El : Elts) {
5012 if (Elts.size() != NegAbsElts.
size()) {
5021 assert(ModOpcode == TargetOpcode::G_FABS);
5029AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5034 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
5035 assert(BV->getNumSources() > 0);
5038 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5041 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5042 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
5049 if (BV->getNumSources() == EltsF32.
size()) {
5060AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5066 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5074 if (CV->getNumSources() == EltsV2F16.
size()) {
5086AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5092 assert(CV->getNumSources() > 0);
5095 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5099 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5100 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
5107 if (CV->getNumSources() == EltsV2F16.
size()) {
5119AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5120 std::optional<FPValueAndVReg> FPValReg;
5124 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5144AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5150 std::optional<ValueAndVReg> ShiftAmt;
5152 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5153 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5154 Key = ShiftAmt->Value.getZExtValue() / 8;
5165AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5172 std::optional<ValueAndVReg> ShiftAmt;
5174 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5175 ShiftAmt->Value.getZExtValue() == 16) {
5187AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5194 S32 = matchAnyExtendFromS32(Src);
5198 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5203 Src =
Def->getOperand(2).getReg();
5216AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5219 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5230AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5233 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5241 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5248AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5251 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5259 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5268bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5270 bool IsSigned)
const {
5287 OffsetReg =
Def->Reg;
5302 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5306 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5307 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5308 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5322bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5326 bool *ScaleOffset)
const {
5333 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5335 if (AddrInfo.
empty())
5338 const GEPInfo &GEPI = AddrInfo[0];
5339 std::optional<int64_t> EncodedImm;
5342 *ScaleOffset =
false;
5347 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5348 AddrInfo.
size() > 1) {
5349 const GEPInfo &GEPI2 = AddrInfo[1];
5350 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5351 Register OffsetReg = GEPI2.SgprParts[1];
5354 selectScaleOffset(Root, OffsetReg,
false );
5355 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5357 Base = GEPI2.SgprParts[0];
5358 *SOffset = OffsetReg;
5368 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5380 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5381 Base = GEPI.SgprParts[0];
5387 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
5393 Base = GEPI.SgprParts[0];
5394 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5395 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5400 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5401 Register OffsetReg = GEPI.SgprParts[1];
5403 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5404 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5406 Base = GEPI.SgprParts[0];
5407 *SOffset = OffsetReg;
5416AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5419 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5421 return std::nullopt;
5428AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5430 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5432 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5433 return std::nullopt;
5435 const GEPInfo &GEPInfo = AddrInfo[0];
5436 Register PtrReg = GEPInfo.SgprParts[0];
5437 std::optional<int64_t> EncodedImm =
5440 return std::nullopt;
5449AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5452 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5454 return std::nullopt;
5463AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5467 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5468 return std::nullopt;
5477std::pair<Register, int>
5478AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5488 int64_t ConstOffset;
5490 std::tie(PtrBase, ConstOffset, IsInBounds) =
5491 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5497 if (ConstOffset == 0 ||
5499 !isFlatScratchBaseLegal(Root.
getReg())) ||
5503 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5507 return std::pair(PtrBase, ConstOffset);
5511AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5521AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5531AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5542AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5544 bool NeedIOffset)
const {
5547 int64_t ConstOffset;
5548 int64_t ImmOffset = 0;
5552 std::tie(PtrBase, ConstOffset, std::ignore) =
5553 getPtrBaseWithConstantOffset(
Addr, *MRI);
5555 if (ConstOffset != 0) {
5560 ImmOffset = ConstOffset;
5563 if (isSGPR(PtrBaseDef->Reg)) {
5564 if (ConstOffset > 0) {
5570 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5572 std::tie(SplitImmOffset, RemainderOffset) =
5578 : isUInt<32>(RemainderOffset)) {
5582 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5584 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5586 .
addImm(RemainderOffset);
5614 unsigned NumLiterals =
5618 return std::nullopt;
5625 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5630 if (isSGPR(SAddr)) {
5631 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5635 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5637 if (
Register VOffset = matchExtendFromS32OrS32(
5669 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5670 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5671 return std::nullopt;
5677 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5679 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5697AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5698 return selectGlobalSAddr(Root, 0);
5702AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5707 I.getOperand(
I.getNumOperands() - 1).getImm() & ~AMDGPU::CPol::SCAL;
5708 return selectGlobalSAddr(Root, PassedCPol);
5712AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5717AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5723 I.getOperand(
I.getNumOperands() - 1).getImm() & ~AMDGPU::CPol::SCAL;
5724 return selectGlobalSAddr(Root, PassedCPol,
false);
5728AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5731 int64_t ConstOffset;
5732 int64_t ImmOffset = 0;
5736 std::tie(PtrBase, ConstOffset, std::ignore) =
5737 getPtrBaseWithConstantOffset(
Addr, *MRI);
5739 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
5743 ImmOffset = ConstOffset;
5747 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5748 int FI = AddrDef->MI->getOperand(1).
getIndex();
5757 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5758 Register LHS = AddrDef->MI->getOperand(1).getReg();
5759 Register RHS = AddrDef->MI->getOperand(2).getReg();
5763 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5764 isSGPR(RHSDef->Reg)) {
5765 int FI = LHSDef->MI->getOperand(1).getIndex();
5769 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5771 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5779 return std::nullopt;
5788bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5799 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5801 return (VMax & 3) + (
SMax & 3) >= 4;
5805AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5808 int64_t ConstOffset;
5809 int64_t ImmOffset = 0;
5813 std::tie(PtrBase, ConstOffset, std::ignore) =
5814 getPtrBaseWithConstantOffset(
Addr, *MRI);
5817 if (ConstOffset != 0 &&
5821 ImmOffset = ConstOffset;
5825 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5826 return std::nullopt;
5828 Register RHS = AddrDef->MI->getOperand(2).getReg();
5830 return std::nullopt;
5832 Register LHS = AddrDef->MI->getOperand(1).getReg();
5835 if (OrigAddr !=
Addr) {
5836 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5837 return std::nullopt;
5839 if (!isFlatScratchBaseLegalSV(OrigAddr))
5840 return std::nullopt;
5843 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
5844 return std::nullopt;
5846 unsigned CPol = selectScaleOffset(Root, RHS,
true )
5850 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5851 int FI = LHSDef->MI->getOperand(1).getIndex();
5865 return std::nullopt;
5876AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5885 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5890 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5914 std::optional<int> FI;
5919 int64_t ConstOffset;
5920 std::tie(PtrBase, ConstOffset, std::ignore) =
5921 getPtrBaseWithConstantOffset(VAddr, *MRI);
5922 if (ConstOffset != 0) {
5927 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5933 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5956bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
5969bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
5971 unsigned Size)
const {
5972 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
5974 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
5987 return Addr->getOpcode() == TargetOpcode::G_OR ||
5988 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
5995bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6009 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6010 std::optional<ValueAndVReg> RhsValReg =
6016 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6017 RhsValReg->Value.getSExtValue() > -0x40000000)
6026bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6044bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6053 std::optional<DefinitionAndSourceRegister> BaseDef =
6055 std::optional<ValueAndVReg> RHSOffset =
6065 (RHSOffset->Value.getSExtValue() < 0 &&
6066 RHSOffset->Value.getSExtValue() > -0x40000000)))
6069 Register LHS = BaseDef->MI->getOperand(1).getReg();
6070 Register RHS = BaseDef->MI->getOperand(2).getReg();
6074bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6075 unsigned ShAmtBits)
const {
6076 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6078 std::optional<APInt>
RHS =
6083 if (
RHS->countr_one() >= ShAmtBits)
6087 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
6091AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6096 std::optional<DefinitionAndSourceRegister>
Def =
6098 assert(Def &&
"this shouldn't be an optional result");
6153std::pair<Register, unsigned>
6154AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6156 int64_t ConstAddr = 0;
6160 std::tie(PtrBase,
Offset, std::ignore) =
6161 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6164 if (isDSOffsetLegal(PtrBase,
Offset)) {
6166 return std::pair(PtrBase,
Offset);
6168 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6177 return std::pair(Root.
getReg(), 0);
6181AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6184 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6192AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6193 return selectDSReadWrite2(Root, 4);
6197AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6198 return selectDSReadWrite2(Root, 8);
6202AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6203 unsigned Size)
const {
6206 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
6214std::pair<Register, unsigned>
6215AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6216 unsigned Size)
const {
6218 int64_t ConstAddr = 0;
6222 std::tie(PtrBase,
Offset, std::ignore) =
6223 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6226 int64_t OffsetValue0 =
Offset;
6228 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6230 return std::pair(PtrBase, OffsetValue0 /
Size);
6232 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6240 return std::pair(Root.
getReg(), 0);
6248std::tuple<Register, int64_t, bool>
6249AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6252 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6253 return {Root, 0,
false};
6256 std::optional<ValueAndVReg> MaybeOffset =
6259 return {Root, 0,
false};
6274 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6275 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6276 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6277 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6279 B.buildInstr(AMDGPU::S_MOV_B32)
6282 B.buildInstr(AMDGPU::S_MOV_B32)
6289 B.buildInstr(AMDGPU::REG_SEQUENCE)
6292 .addImm(AMDGPU::sub0)
6294 .addImm(AMDGPU::sub1);
6298 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6299 B.buildInstr(AMDGPU::S_MOV_B64)
6304 B.buildInstr(AMDGPU::REG_SEQUENCE)
6307 .addImm(AMDGPU::sub0_sub1)
6309 .addImm(AMDGPU::sub2_sub3);
6316 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6325 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6332AMDGPUInstructionSelector::MUBUFAddressData
6333AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6334 MUBUFAddressData
Data;
6340 std::tie(PtrBase,
Offset, std::ignore) =
6341 getPtrBaseWithConstantOffset(Src, *MRI);
6342 if (isUInt<32>(
Offset)) {
6349 Data.N2 = InputAdd->getOperand(1).getReg();
6350 Data.N3 = InputAdd->getOperand(2).getReg();
6365bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
6372 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6378void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6384 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6385 B.buildInstr(AMDGPU::S_MOV_B32)
6391bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6399 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6400 if (!shouldUseAddr64(AddrData))
6406 Offset = AddrData.Offset;
6412 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
6414 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
6427 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
6438 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6442bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6450 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6451 if (shouldUseAddr64(AddrData))
6457 Offset = AddrData.Offset;
6463 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6468AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6474 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6490 MIB.
addReg(AMDGPU::SGPR_NULL);
6504AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6509 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6520 MIB.
addReg(AMDGPU::SGPR_NULL);
6532AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6537 SOffset = AMDGPU::SGPR_NULL;
6543static std::optional<uint64_t>
6547 if (!OffsetVal || !isInt<32>(*OffsetVal))
6548 return std::nullopt;
6549 return Lo_32(*OffsetVal);
6553AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6554 std::optional<uint64_t> OffsetVal =
6559 std::optional<int64_t> EncodedImm =
6568AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6575 std::optional<int64_t> EncodedImm =
6584AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6592 return std::nullopt;
6594 std::optional<int64_t> EncodedOffset =
6597 return std::nullopt;
6604std::pair<Register, unsigned>
6605AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6606 bool &Matched)
const {
6611 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6621 const auto CheckAbsNeg = [&]() {
6626 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6657AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6662 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6673AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6677 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6685bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6689 Register CCReg =
I.getOperand(0).getReg();
6694 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6695 .
addImm(
I.getOperand(2).getImm());
6699 I.eraseFromParent();
6704bool AMDGPUInstructionSelector::selectSGetBarrierState(
6709 std::optional<int64_t> BarValImm =
6713 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6718 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6719 : AMDGPU::S_GET_BARRIER_STATE_M0;
6722 auto DstReg =
I.getOperand(0).getReg();
6724 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6731 I.eraseFromParent();
6736 if (HasInlineConst) {
6740 case Intrinsic::amdgcn_s_barrier_join:
6741 return AMDGPU::S_BARRIER_JOIN_IMM;
6742 case Intrinsic::amdgcn_s_get_named_barrier_state:
6743 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6749 case Intrinsic::amdgcn_s_barrier_join:
6750 return AMDGPU::S_BARRIER_JOIN_M0;
6751 case Intrinsic::amdgcn_s_get_named_barrier_state:
6752 return AMDGPU::S_GET_BARRIER_STATE_M0;
6757bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6765 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6771 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6778 Register TmpReg2 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6784 Register TmpReg3 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6785 constexpr unsigned ShAmt = 16;
6791 Register TmpReg4 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6801 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6802 ? AMDGPU::S_BARRIER_INIT_M0
6803 : AMDGPU::S_BARRIER_SIGNAL_M0;
6807 I.eraseFromParent();
6811bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6815 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6818 std::optional<int64_t> BarValImm =
6823 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6829 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6835 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6844 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6845 auto DstReg =
I.getOperand(0).getReg();
6847 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6854 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6858 I.eraseFromParent();
6865 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6866 "Expected G_CONSTANT");
6867 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6873 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6874 "Expected G_CONSTANT");
6875 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6882 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6883 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6889 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6890 "Expected G_CONSTANT");
6891 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6916 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6920void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6922 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6927void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6929 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6935void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6937 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6942void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6944 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6950void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6952 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6957void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6959 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6964void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6966 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6971void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
6973 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6982 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6991 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6998void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7000 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7016 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7018 assert(ExpVal != INT_MIN);
7036 if (
MI.getOperand(
OpIdx).getImm())
7038 MIB.
addImm((int64_t)Mods);
7045 if (
MI.getOperand(
OpIdx).getImm())
7047 MIB.
addImm((int64_t)Mods);
7053 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7061 MIB.
addImm((int64_t)Mods);
7076void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7078 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7087bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7091bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasSafeCUPrefetch() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool hasScaleOffset() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasVMemToLDSLoad() const
bool hasDOTOpSelHazard() const
bool hasRestrictedSOffset() const
bool hasMadU64U32NoCarry() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasSignedGVSOffset() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownOnes(Register R)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
MCRegister getExec() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
LLVM_ABI Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.