29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
72 ? Def->getOperand(1).getReg()
76bool AMDGPUInstructionSelector::isVCC(
Register Reg,
82 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
84 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
86 const LLT Ty =
MRI.getType(Reg);
90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
94 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
95 return RB->
getID() == AMDGPU::VCCRegBankID;
98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
99 unsigned NewOpc)
const {
100 MI.setDesc(TII.get(NewOpc));
115 if (!DstRC || DstRC != SrcRC)
122bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
125 I.setDesc(TII.get(TargetOpcode::COPY));
132 if (isVCC(DstReg, *MRI)) {
133 if (SrcReg == AMDGPU::SCC) {
141 if (!isVCC(SrcReg, *MRI)) {
149 std::optional<ValueAndVReg> ConstVal =
153 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
155 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
157 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
165 const int64_t NoMods = 0;
166 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
172 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
180 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
185 And.setOperandDead(3);
187 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
193 if (!
MRI->getRegClassOrNull(SrcReg))
194 MRI->setRegClass(SrcReg, SrcRC);
208 if (MO.getReg().isPhysical())
220bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
225 STI.
isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
227 .
addReg(
I.getOperand(1).getReg())
232 Register DstReg =
I.getOperand(0).getReg();
239bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
243 Register DstReg =
I.getOperand(0).getReg();
244 Register SrcReg =
I.getOperand(1).getReg();
245 std::optional<ValueAndVReg> Arg =
249 const int64_t
Value = Arg->
Value.getZExtValue();
251 unsigned Opcode = STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
264 unsigned SelectOpcode =
265 STI.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
274bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
275 Register DstReg =
I.getOperand(0).getReg();
276 Register SrcReg =
I.getOperand(1).getReg();
281 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
288bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
289 const Register DefReg =
I.getOperand(0).getReg();
290 const LLT DefTy =
MRI->getType(DefReg);
302 MRI->getRegClassOrRegBank(DefReg);
305 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
312 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
322 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
323 const Register SrcReg =
I.getOperand(i).getReg();
327 const LLT SrcTy =
MRI->getType(SrcReg);
335 I.setDesc(TII.get(TargetOpcode::PHI));
342 unsigned SubIdx)
const {
346 Register DstReg =
MRI->createVirtualRegister(&SubRC);
349 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
351 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
352 .
addReg(Reg, 0, ComposedSubIdx);
377 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
379 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
381 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
387bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
388 Register DstReg =
I.getOperand(0).getReg();
392 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
393 DstRB->
getID() != AMDGPU::VCCRegBankID)
396 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
408bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
411 Register DstReg =
I.getOperand(0).getReg();
413 LLT Ty =
MRI->getType(DstReg);
419 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
420 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
424 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
427 .
add(
I.getOperand(1))
428 .
add(
I.getOperand(2))
435 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
436 I.setDesc(TII.get(Opc));
442 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
448 .
add(
I.getOperand(1))
449 .
add(
I.getOperand(2))
455 assert(!Sub &&
"illegal sub should not reach here");
458 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
460 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
462 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
463 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
464 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
465 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
467 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
468 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
471 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
474 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
480 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
481 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
511bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
516 Register Dst0Reg =
I.getOperand(0).getReg();
517 Register Dst1Reg =
I.getOperand(1).getReg();
518 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
519 I.getOpcode() == AMDGPU::G_UADDE;
520 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
521 I.getOpcode() == AMDGPU::G_USUBE;
523 if (isVCC(Dst1Reg, *MRI)) {
524 unsigned NoCarryOpc =
525 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
526 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
527 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
533 Register Src0Reg =
I.getOperand(2).getReg();
534 Register Src1Reg =
I.getOperand(3).getReg();
537 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
538 .
addReg(
I.getOperand(4).getReg());
541 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
542 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
544 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
545 .
add(
I.getOperand(2))
546 .
add(
I.getOperand(3));
548 if (
MRI->use_nodbg_empty(Dst1Reg)) {
551 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
553 if (!
MRI->getRegClassOrNull(Dst1Reg))
554 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
564 AMDGPU::SReg_32RegClass, *MRI))
571bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
575 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
579 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
580 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
582 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
583 I.setDesc(TII.get(Opc));
585 I.addImplicitDefUseOperands(*
MF);
590bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
592 Register DstReg =
I.getOperand(0).getReg();
593 Register SrcReg =
I.getOperand(1).getReg();
594 LLT DstTy =
MRI->getType(DstReg);
595 LLT SrcTy =
MRI->getType(SrcReg);
600 unsigned Offset =
I.getOperand(2).getImm();
601 if (
Offset % 32 != 0 || DstSize > 128)
621 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
626 *SrcRC,
I.getOperand(1));
628 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
635bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
638 LLT DstTy =
MRI->getType(DstReg);
639 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
655 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
656 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
670 MI.eraseFromParent();
674bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
676 const int NumDst =
MI.getNumOperands() - 1;
682 LLT DstTy =
MRI->getType(DstReg0);
683 LLT SrcTy =
MRI->getType(SrcReg);
699 for (
int I = 0, E = NumDst;
I != E; ++
I) {
701 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
702 .
addReg(SrcReg, 0, SubRegs[
I]);
705 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
715 MI.eraseFromParent();
719bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
720 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
721 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
725 LLT SrcTy =
MRI->getType(Src0);
729 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
730 return selectG_MERGE_VALUES(
MI);
737 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
742 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
745 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
746 DstBank->
getID() == AMDGPU::VGPRRegBankID);
747 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
760 const int64_t K0 = ConstSrc0->Value.getSExtValue();
761 const int64_t K1 = ConstSrc1->Value.getSExtValue();
769 MI.eraseFromParent();
775 MI.eraseFromParent();
787 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
788 MI.setDesc(TII.get(AMDGPU::COPY));
791 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
798 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
799 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
805 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
812 MI.eraseFromParent();
837 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
838 if (Shift0 && Shift1) {
839 Opc = AMDGPU::S_PACK_HH_B32_B16;
840 MI.getOperand(1).setReg(ShiftSrc0);
841 MI.getOperand(2).setReg(ShiftSrc1);
843 Opc = AMDGPU::S_PACK_LH_B32_B16;
844 MI.getOperand(2).setReg(ShiftSrc1);
848 if (ConstSrc1 && ConstSrc1->Value == 0) {
850 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
855 MI.eraseFromParent();
859 Opc = AMDGPU::S_PACK_HL_B32_B16;
860 MI.getOperand(1).setReg(ShiftSrc0);
864 MI.setDesc(TII.get(Opc));
868bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
874 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
876 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
883bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
886 Register DstReg =
I.getOperand(0).getReg();
887 Register Src0Reg =
I.getOperand(1).getReg();
888 Register Src1Reg =
I.getOperand(2).getReg();
889 LLT Src1Ty =
MRI->getType(Src1Reg);
891 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
894 int64_t
Offset =
I.getOperand(3).getImm();
897 if (
Offset % 32 != 0 || InsSize % 32 != 0)
905 if (
SubReg == AMDGPU::NoSubRegister)
923 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
924 if (!Src0RC || !Src1RC)
933 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
942bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
945 Register OffsetReg =
MI.getOperand(2).getReg();
946 Register WidthReg =
MI.getOperand(3).getReg();
949 "scalar BFX instructions are expanded in regbankselect");
950 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
951 "64-bit vector BFX instructions are expanded in regbankselect");
956 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
957 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
962 MI.eraseFromParent();
966bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
985 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
991 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1007 MI.eraseFromParent();
1016bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1025 Register LaneSelect =
MI.getOperand(3).getReg();
1028 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1030 std::optional<ValueAndVReg> ConstSelect =
1036 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1039 std::optional<ValueAndVReg> ConstVal =
1046 MIB.
addImm(ConstVal->Value.getSExtValue());
1056 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1064 MI.eraseFromParent();
1070bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1074 LLT Ty =
MRI->getType(Dst0);
1077 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1079 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1090 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1092 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1105 MI.eraseFromParent();
1109bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1110 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1111 switch (IntrinsicID) {
1112 case Intrinsic::amdgcn_if_break: {
1117 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1118 .
add(
I.getOperand(0))
1119 .
add(
I.getOperand(2))
1120 .
add(
I.getOperand(3));
1122 Register DstReg =
I.getOperand(0).getReg();
1123 Register Src0Reg =
I.getOperand(2).getReg();
1124 Register Src1Reg =
I.getOperand(3).getReg();
1126 I.eraseFromParent();
1128 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1133 case Intrinsic::amdgcn_interp_p1_f16:
1134 return selectInterpP1F16(
I);
1135 case Intrinsic::amdgcn_wqm:
1136 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1137 case Intrinsic::amdgcn_softwqm:
1138 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1139 case Intrinsic::amdgcn_strict_wwm:
1140 case Intrinsic::amdgcn_wwm:
1141 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1142 case Intrinsic::amdgcn_strict_wqm:
1143 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1144 case Intrinsic::amdgcn_writelane:
1145 return selectWritelane(
I);
1146 case Intrinsic::amdgcn_div_scale:
1147 return selectDivScale(
I);
1148 case Intrinsic::amdgcn_icmp:
1149 case Intrinsic::amdgcn_fcmp:
1152 return selectIntrinsicCmp(
I);
1153 case Intrinsic::amdgcn_ballot:
1154 return selectBallot(
I);
1155 case Intrinsic::amdgcn_reloc_constant:
1156 return selectRelocConstant(
I);
1157 case Intrinsic::amdgcn_groupstaticsize:
1158 return selectGroupStaticSize(
I);
1159 case Intrinsic::returnaddress:
1160 return selectReturnAddress(
I);
1161 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1162 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1163 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1164 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1165 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1166 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1167 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1168 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1169 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1170 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1171 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1172 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1173 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1174 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1175 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1176 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1177 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1178 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1179 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1180 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1181 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1182 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1183 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1184 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1185 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1186 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1187 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1189 return selectSMFMACIntrin(
I);
1190 case Intrinsic::amdgcn_permlane16_swap:
1191 case Intrinsic::amdgcn_permlane32_swap:
1192 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1203 if (
Size == 16 && !ST.has16BitInsts())
1206 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1207 unsigned FakeS16Opc,
unsigned S32Opc,
1210 return ST.hasTrue16BitInsts()
1211 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1222 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1223 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1224 AMDGPU::V_CMP_NE_U64_e64);
1226 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1227 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1228 AMDGPU::V_CMP_EQ_U64_e64);
1230 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1231 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1232 AMDGPU::V_CMP_GT_I64_e64);
1234 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1235 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1236 AMDGPU::V_CMP_GE_I64_e64);
1238 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1239 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1240 AMDGPU::V_CMP_LT_I64_e64);
1242 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1243 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1244 AMDGPU::V_CMP_LE_I64_e64);
1246 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1247 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1248 AMDGPU::V_CMP_GT_U64_e64);
1250 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1251 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1252 AMDGPU::V_CMP_GE_U64_e64);
1254 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1255 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1256 AMDGPU::V_CMP_LT_U64_e64);
1258 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1259 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1260 AMDGPU::V_CMP_LE_U64_e64);
1263 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1264 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1265 AMDGPU::V_CMP_EQ_F64_e64);
1267 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1268 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1269 AMDGPU::V_CMP_GT_F64_e64);
1271 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1272 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1273 AMDGPU::V_CMP_GE_F64_e64);
1275 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1276 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1277 AMDGPU::V_CMP_LT_F64_e64);
1279 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1280 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1281 AMDGPU::V_CMP_LE_F64_e64);
1283 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1284 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1285 AMDGPU::V_CMP_NEQ_F64_e64);
1287 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1288 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1289 AMDGPU::V_CMP_O_F64_e64);
1291 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1292 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1293 AMDGPU::V_CMP_U_F64_e64);
1295 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1296 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1297 AMDGPU::V_CMP_NLG_F64_e64);
1299 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1300 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1301 AMDGPU::V_CMP_NLE_F64_e64);
1303 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1304 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1305 AMDGPU::V_CMP_NLT_F64_e64);
1307 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1308 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1309 AMDGPU::V_CMP_NGE_F64_e64);
1311 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1312 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1313 AMDGPU::V_CMP_NGT_F64_e64);
1315 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1316 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1317 AMDGPU::V_CMP_NEQ_F64_e64);
1319 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1320 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1321 AMDGPU::V_CMP_TRU_F64_e64);
1323 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1324 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1325 AMDGPU::V_CMP_F_F64_e64);
1330 unsigned Size)
const {
1337 return AMDGPU::S_CMP_LG_U64;
1339 return AMDGPU::S_CMP_EQ_U64;
1348 return AMDGPU::S_CMP_LG_U32;
1350 return AMDGPU::S_CMP_EQ_U32;
1352 return AMDGPU::S_CMP_GT_I32;
1354 return AMDGPU::S_CMP_GE_I32;
1356 return AMDGPU::S_CMP_LT_I32;
1358 return AMDGPU::S_CMP_LE_I32;
1360 return AMDGPU::S_CMP_GT_U32;
1362 return AMDGPU::S_CMP_GE_U32;
1364 return AMDGPU::S_CMP_LT_U32;
1366 return AMDGPU::S_CMP_LE_U32;
1368 return AMDGPU::S_CMP_EQ_F32;
1370 return AMDGPU::S_CMP_GT_F32;
1372 return AMDGPU::S_CMP_GE_F32;
1374 return AMDGPU::S_CMP_LT_F32;
1376 return AMDGPU::S_CMP_LE_F32;
1378 return AMDGPU::S_CMP_LG_F32;
1380 return AMDGPU::S_CMP_O_F32;
1382 return AMDGPU::S_CMP_U_F32;
1384 return AMDGPU::S_CMP_NLG_F32;
1386 return AMDGPU::S_CMP_NLE_F32;
1388 return AMDGPU::S_CMP_NLT_F32;
1390 return AMDGPU::S_CMP_NGE_F32;
1392 return AMDGPU::S_CMP_NGT_F32;
1394 return AMDGPU::S_CMP_NEQ_F32;
1406 return AMDGPU::S_CMP_EQ_F16;
1408 return AMDGPU::S_CMP_GT_F16;
1410 return AMDGPU::S_CMP_GE_F16;
1412 return AMDGPU::S_CMP_LT_F16;
1414 return AMDGPU::S_CMP_LE_F16;
1416 return AMDGPU::S_CMP_LG_F16;
1418 return AMDGPU::S_CMP_O_F16;
1420 return AMDGPU::S_CMP_U_F16;
1422 return AMDGPU::S_CMP_NLG_F16;
1424 return AMDGPU::S_CMP_NLE_F16;
1426 return AMDGPU::S_CMP_NLT_F16;
1428 return AMDGPU::S_CMP_NGE_F16;
1430 return AMDGPU::S_CMP_NGT_F16;
1432 return AMDGPU::S_CMP_NEQ_F16;
1441bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1446 Register SrcReg =
I.getOperand(2).getReg();
1451 Register CCReg =
I.getOperand(0).getReg();
1452 if (!isVCC(CCReg, *MRI)) {
1453 int Opcode = getS_CMPOpcode(Pred,
Size);
1457 .
add(
I.getOperand(2))
1458 .
add(
I.getOperand(3));
1459 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1464 I.eraseFromParent();
1468 if (
I.getOpcode() == AMDGPU::G_FCMP)
1476 I.getOperand(0).getReg())
1477 .
add(
I.getOperand(2))
1478 .
add(
I.getOperand(3));
1482 I.eraseFromParent();
1486bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1487 Register Dst =
I.getOperand(0).getReg();
1488 if (isVCC(Dst, *MRI))
1491 LLT DstTy =
MRI->getType(Dst);
1497 Register SrcReg =
I.getOperand(2).getReg();
1506 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1507 I.eraseFromParent();
1518 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1519 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1521 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1523 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1524 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1526 SelectedMI.
addImm(Src0Mods);
1527 SelectedMI.
addReg(Src0Reg);
1529 SelectedMI.
addImm(Src1Mods);
1530 SelectedMI.
addReg(Src1Reg);
1540 I.eraseFromParent();
1551 if (
MI->getParent() !=
MBB)
1555 if (
MI->getOpcode() == AMDGPU::COPY) {
1556 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1557 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1558 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1559 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1564 if (isa<GAnyCmp>(
MI))
1576bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1579 Register DstReg =
I.getOperand(0).getReg();
1580 Register SrcReg =
I.getOperand(2).getReg();
1581 const unsigned BallotSize =
MRI->getType(DstReg).getSizeInBits();
1586 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1589 std::optional<ValueAndVReg> Arg =
1594 if (BallotSize != WaveSize) {
1599 const int64_t
Value = Arg->
Value.getZExtValue();
1602 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1619 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1630 if (BallotSize != WaveSize) {
1631 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1633 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1640 I.eraseFromParent();
1644bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1645 Register DstReg =
I.getOperand(0).getReg();
1651 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1656 auto *RelocSymbol = cast<GlobalVariable>(
1661 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1664 I.eraseFromParent();
1668bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1671 Register DstReg =
I.getOperand(0).getReg();
1673 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1674 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1691 I.eraseFromParent();
1695bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1702 unsigned Depth =
I.getOperand(2).getImm();
1715 I.eraseFromParent();
1726 AMDGPU::SReg_64RegClass,
DL);
1729 I.eraseFromParent();
1733bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1737 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1738 .
add(
MI.getOperand(1));
1741 MI.eraseFromParent();
1743 if (!
MRI->getRegClassOrNull(Reg))
1748bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1754 unsigned IndexOperand =
MI.getOperand(7).getImm();
1755 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1756 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1758 if (WaveDone && !WaveRelease)
1761 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1762 IndexOperand &= ~0x3f;
1763 unsigned CountDw = 0;
1766 CountDw = (IndexOperand >> 24) & 0xf;
1767 IndexOperand &= ~(0xf << 24);
1769 if (CountDw < 1 || CountDw > 4) {
1771 "ds_ordered_count: dword count must be between 1 and 4");
1778 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1781 unsigned Offset0 = OrderedCountIndex << 2;
1782 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1785 Offset1 |= (CountDw - 1) << 6;
1788 Offset1 |= ShaderType << 2;
1790 unsigned Offset = Offset0 | (Offset1 << 8);
1799 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1808 MI.eraseFromParent();
1814 case Intrinsic::amdgcn_ds_gws_init:
1815 return AMDGPU::DS_GWS_INIT;
1816 case Intrinsic::amdgcn_ds_gws_barrier:
1817 return AMDGPU::DS_GWS_BARRIER;
1818 case Intrinsic::amdgcn_ds_gws_sema_v:
1819 return AMDGPU::DS_GWS_SEMA_V;
1820 case Intrinsic::amdgcn_ds_gws_sema_br:
1821 return AMDGPU::DS_GWS_SEMA_BR;
1822 case Intrinsic::amdgcn_ds_gws_sema_p:
1823 return AMDGPU::DS_GWS_SEMA_P;
1824 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1825 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1831bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1833 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1838 const bool HasVSrc =
MI.getNumOperands() == 3;
1839 assert(HasVSrc ||
MI.getNumOperands() == 2);
1841 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1843 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1857 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1858 Readfirstlane = OffsetDef;
1863 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1873 std::tie(BaseOffset, ImmOffset) =
1876 if (Readfirstlane) {
1886 AMDGPU::SReg_32RegClass, *MRI))
1890 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1918 MI.eraseFromParent();
1922bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1923 bool IsAppend)
const {
1924 Register PtrBase =
MI.getOperand(2).getReg();
1925 LLT PtrTy =
MRI->getType(PtrBase);
1929 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1932 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1933 PtrBase =
MI.getOperand(2).getReg();
1939 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1950 MI.eraseFromParent();
1954bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1962bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1969 if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||
1970 IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {
1975 MI.eraseFromParent();
1980 if (STI.
hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
1988 MI.eraseFromParent();
2000 TFE = (TexFailCtrl & 0x1) ?
true :
false;
2002 LWE = (TexFailCtrl & 0x2) ?
true :
false;
2005 return TexFailCtrl == 0;
2008bool AMDGPUInstructionSelector::selectImageIntrinsic(
2017 unsigned IntrOpcode =
Intr->BaseOpcode;
2022 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2026 int NumVDataDwords = -1;
2027 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2028 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2034 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
2038 bool IsTexFail =
false;
2040 TFE, LWE, IsTexFail))
2043 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
2044 const bool IsA16 = (
Flags & 1) != 0;
2045 const bool IsG16 = (
Flags & 2) != 0;
2048 if (IsA16 && !STI.
hasG16() && !IsG16)
2052 unsigned DMaskLanes = 0;
2054 if (BaseOpcode->
Atomic) {
2055 VDataOut =
MI.getOperand(0).getReg();
2056 VDataIn =
MI.getOperand(2).getReg();
2057 LLT Ty =
MRI->getType(VDataIn);
2060 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2065 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2067 DMask = Is64Bit ? 0xf : 0x3;
2068 NumVDataDwords = Is64Bit ? 4 : 2;
2070 DMask = Is64Bit ? 0x3 : 0x1;
2071 NumVDataDwords = Is64Bit ? 2 : 1;
2074 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
2077 if (BaseOpcode->
Store) {
2078 VDataIn =
MI.getOperand(1).getReg();
2079 VDataTy =
MRI->getType(VDataIn);
2084 VDataOut =
MI.getOperand(0).getReg();
2085 VDataTy =
MRI->getType(VDataOut);
2086 NumVDataDwords = DMaskLanes;
2089 NumVDataDwords = (DMaskLanes + 1) / 2;
2094 if (Subtarget->
hasG16() && IsG16) {
2098 IntrOpcode = G16MappingInfo->
G16;
2102 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2104 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
2111 int NumVAddrRegs = 0;
2112 int NumVAddrDwords = 0;
2113 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
2116 if (!AddrOp.
isReg())
2124 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
2131 NumVAddrRegs != 1 &&
2133 : NumVAddrDwords == NumVAddrRegs);
2134 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2145 NumVDataDwords, NumVAddrDwords);
2146 }
else if (IsGFX11Plus) {
2148 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2149 : AMDGPU::MIMGEncGfx11Default,
2150 NumVDataDwords, NumVAddrDwords);
2151 }
else if (IsGFX10Plus) {
2153 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2154 : AMDGPU::MIMGEncGfx10Default,
2155 NumVDataDwords, NumVAddrDwords);
2159 NumVDataDwords, NumVAddrDwords);
2163 <<
"requested image instruction is not supported on this GPU\n");
2170 NumVDataDwords, NumVAddrDwords);
2173 NumVDataDwords, NumVAddrDwords);
2183 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
2186 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2187 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2190 if (!
MRI->use_empty(VDataOut)) {
2203 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2205 if (
SrcOp.isReg()) {
2211 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2213 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2224 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2226 MIB.
addImm(IsA16 ? -1 : 0);
2240 MIB.
addImm(IsD16 ? -1 : 0);
2242 MI.eraseFromParent();
2250bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2261 unsigned Offset =
MI.getOperand(6).getImm();
2263 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2271 MI.eraseFromParent();
2275bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2277 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2278 switch (IntrinsicID) {
2279 case Intrinsic::amdgcn_end_cf:
2280 return selectEndCfIntrinsic(
I);
2281 case Intrinsic::amdgcn_ds_ordered_add:
2282 case Intrinsic::amdgcn_ds_ordered_swap:
2283 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2284 case Intrinsic::amdgcn_ds_gws_init:
2285 case Intrinsic::amdgcn_ds_gws_barrier:
2286 case Intrinsic::amdgcn_ds_gws_sema_v:
2287 case Intrinsic::amdgcn_ds_gws_sema_br:
2288 case Intrinsic::amdgcn_ds_gws_sema_p:
2289 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2290 return selectDSGWSIntrinsic(
I, IntrinsicID);
2291 case Intrinsic::amdgcn_ds_append:
2292 return selectDSAppendConsume(
I,
true);
2293 case Intrinsic::amdgcn_ds_consume:
2294 return selectDSAppendConsume(
I,
false);
2295 case Intrinsic::amdgcn_init_whole_wave:
2296 return selectInitWholeWave(
I);
2297 case Intrinsic::amdgcn_s_barrier:
2298 case Intrinsic::amdgcn_s_barrier_signal:
2299 case Intrinsic::amdgcn_s_barrier_wait:
2300 return selectSBarrier(
I);
2301 case Intrinsic::amdgcn_raw_buffer_load_lds:
2302 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2303 case Intrinsic::amdgcn_struct_buffer_load_lds:
2304 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2305 return selectBufferLoadLds(
I);
2306 case Intrinsic::amdgcn_global_load_lds:
2307 return selectGlobalLoadLds(
I);
2308 case Intrinsic::amdgcn_exp_compr:
2312 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2313 F.getContext().diagnose(NoFpRet);
2317 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2318 return selectDSBvhStackIntrinsic(
I);
2319 case Intrinsic::amdgcn_s_barrier_init:
2320 case Intrinsic::amdgcn_s_barrier_signal_var:
2321 return selectNamedBarrierInit(
I, IntrinsicID);
2322 case Intrinsic::amdgcn_s_barrier_join:
2323 case Intrinsic::amdgcn_s_get_named_barrier_state:
2324 return selectNamedBarrierInst(
I, IntrinsicID);
2325 case Intrinsic::amdgcn_s_get_barrier_state:
2326 return selectSGetBarrierState(
I, IntrinsicID);
2327 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2328 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2333bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2340 Register DstReg =
I.getOperand(0).getReg();
2345 if (!isVCC(CCReg, *MRI)) {
2346 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2347 AMDGPU::S_CSELECT_B32;
2354 if (!
MRI->getRegClassOrNull(CCReg))
2357 .
add(
I.getOperand(2))
2358 .
add(
I.getOperand(3));
2363 I.eraseFromParent();
2372 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2374 .
add(
I.getOperand(3))
2376 .
add(
I.getOperand(2))
2377 .
add(
I.getOperand(1));
2380 I.eraseFromParent();
2384bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2385 Register DstReg =
I.getOperand(0).getReg();
2386 Register SrcReg =
I.getOperand(1).getReg();
2387 const LLT DstTy =
MRI->getType(DstReg);
2388 const LLT SrcTy =
MRI->getType(SrcReg);
2403 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2412 if (!SrcRC || !DstRC)
2421 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2426 .
addReg(SrcReg, 0, AMDGPU::lo16);
2427 I.eraseFromParent();
2435 Register LoReg =
MRI->createVirtualRegister(DstRC);
2436 Register HiReg =
MRI->createVirtualRegister(DstRC);
2438 .
addReg(SrcReg, 0, AMDGPU::sub0);
2440 .
addReg(SrcReg, 0, AMDGPU::sub1);
2442 if (IsVALU && STI.
hasSDWA()) {
2446 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2456 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2457 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2458 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2460 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2470 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2471 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2472 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2484 And.setOperandDead(3);
2485 Or.setOperandDead(3);
2489 I.eraseFromParent();
2497 unsigned SubRegIdx =
2499 if (SubRegIdx == AMDGPU::NoSubRegister)
2505 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2509 if (SrcWithSubRC != SrcRC) {
2514 I.getOperand(1).setSubReg(SubRegIdx);
2517 I.setDesc(TII.get(TargetOpcode::COPY));
2523 Mask = maskTrailingOnes<unsigned>(
Size);
2524 int SignedMask =
static_cast<int>(Mask);
2525 return SignedMask >= -16 && SignedMask <= 64;
2529const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2533 if (
auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))
2537 if (
auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
2542bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2543 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2544 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2547 const Register DstReg =
I.getOperand(0).getReg();
2548 const Register SrcReg =
I.getOperand(1).getReg();
2550 const LLT DstTy =
MRI->getType(DstReg);
2551 const LLT SrcTy =
MRI->getType(SrcReg);
2552 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2559 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2562 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2564 return selectCOPY(
I);
2567 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2570 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2572 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2573 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2579 I.eraseFromParent();
2585 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2595 I.eraseFromParent();
2599 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2605 I.eraseFromParent();
2609 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2611 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2615 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2616 const unsigned SextOpc = SrcSize == 8 ?
2617 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2620 I.eraseFromParent();
2626 if (DstSize > 32 && SrcSize == 32) {
2627 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2628 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2643 I.eraseFromParent();
2648 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2649 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2652 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2654 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2655 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2656 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2658 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2669 I.eraseFromParent();
2685 I.eraseFromParent();
2720 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2727 assert(Mask.size() == 2);
2729 if (Mask[0] == 1 && Mask[1] <= 1) {
2737bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2741 Register Dst =
I.getOperand(0).getReg();
2743 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2746 Register Src =
I.getOperand(1).getReg();
2752 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2754 I.eraseFromParent();
2762bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2776 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2791 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2792 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2793 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2794 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2796 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2797 .
addReg(Src, 0, AMDGPU::sub0);
2798 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2799 .
addReg(Src, 0, AMDGPU::sub1);
2800 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2804 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2809 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2814 MI.eraseFromParent();
2819bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2822 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2829 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2830 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2831 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2832 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2838 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2839 .
addReg(Src, 0, AMDGPU::sub0);
2840 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2841 .
addReg(Src, 0, AMDGPU::sub1);
2842 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2847 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2851 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2857 MI.eraseFromParent();
2862 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2865void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2868 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2870 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2874 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2879 for (
unsigned i = 1; i != 3; ++i) {
2886 assert(GEPInfo.Imm == 0);
2891 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2892 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2894 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2898 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2901bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2902 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2905bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2906 if (!
MI.hasOneMemOperand())
2916 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2917 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2923 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2925 AMDGPU::SGPRRegBankID;
2928 return I &&
I->getMetadata(
"amdgpu.uniform");
2932 for (
const GEPInfo &GEPInfo : AddrInfo) {
2933 if (!GEPInfo.VgprParts.empty())
2939void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2940 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2947 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2952bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2959 if (Reg.isPhysical())
2963 const unsigned Opcode =
MI.getOpcode();
2965 if (Opcode == AMDGPU::COPY)
2968 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2969 Opcode == AMDGPU::G_XOR)
2973 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2974 return GI->is(Intrinsic::amdgcn_class);
2976 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2979bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2994 if (!isVCC(CondReg, *MRI)) {
2998 CondPhysReg = AMDGPU::SCC;
2999 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3000 ConstrainRC = &AMDGPU::SReg_32RegClass;
3008 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3009 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3012 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3019 CondPhysReg =
TRI.getVCC();
3020 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3021 ConstrainRC =
TRI.getBoolRC();
3024 if (!
MRI->getRegClassOrNull(CondReg))
3025 MRI->setRegClass(CondReg, ConstrainRC);
3027 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3030 .
addMBB(
I.getOperand(1).getMBB());
3032 I.eraseFromParent();
3036bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3038 Register DstReg =
I.getOperand(0).getReg();
3040 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3041 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3046 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3049bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3050 Register DstReg =
I.getOperand(0).getReg();
3051 Register SrcReg =
I.getOperand(1).getReg();
3052 Register MaskReg =
I.getOperand(2).getReg();
3053 LLT Ty =
MRI->getType(DstReg);
3054 LLT MaskTy =
MRI->getType(MaskReg);
3061 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3071 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3072 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3075 !CanCopyLow32 && !CanCopyHi32) {
3076 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3080 I.eraseFromParent();
3084 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3086 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3091 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3100 "ptrmask should have been narrowed during legalize");
3102 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3108 I.eraseFromParent();
3112 Register HiReg =
MRI->createVirtualRegister(&RegRC);
3113 Register LoReg =
MRI->createVirtualRegister(&RegRC);
3116 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3117 .
addReg(SrcReg, 0, AMDGPU::sub0);
3118 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3119 .
addReg(SrcReg, 0, AMDGPU::sub1);
3128 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
3129 MaskedLo =
MRI->createVirtualRegister(&RegRC);
3131 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3132 .
addReg(MaskReg, 0, AMDGPU::sub0);
3133 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3142 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3143 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3145 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3146 .
addReg(MaskReg, 0, AMDGPU::sub1);
3147 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3152 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3157 I.eraseFromParent();
3163static std::pair<Register, unsigned>
3170 std::tie(IdxBaseReg,
Offset) =
3172 if (IdxBaseReg == AMDGPU::NoRegister) {
3176 IdxBaseReg = IdxReg;
3183 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3184 return std::pair(IdxReg, SubRegs[0]);
3185 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3188bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3194 LLT DstTy =
MRI->getType(DstReg);
3195 LLT SrcTy =
MRI->getType(SrcReg);
3203 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3207 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3209 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3210 if (!SrcRC || !DstRC)
3225 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3229 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3232 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3236 MI.eraseFromParent();
3244 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3246 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3249 MI.eraseFromParent();
3260 MI.eraseFromParent();
3265bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3272 LLT VecTy =
MRI->getType(DstReg);
3273 LLT ValTy =
MRI->getType(ValReg);
3285 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3289 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3291 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3299 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3303 std::tie(IdxReg,
SubReg) =
3306 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3313 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3317 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3322 MI.eraseFromParent();
3334 MI.eraseFromParent();
3338bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3341 unsigned Size =
MI.getOperand(3).getImm();
3344 const bool HasVIndex =
MI.getNumOperands() == 9;
3348 VIndex =
MI.getOperand(4).getReg();
3352 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3353 std::optional<ValueAndVReg> MaybeVOffset =
3355 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3361 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3362 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3363 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3364 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3367 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3368 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3369 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3370 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3373 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3374 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3375 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3376 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3382 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3383 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3384 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3385 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3391 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3392 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3393 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3394 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3401 .
add(
MI.getOperand(2));
3405 if (HasVIndex && HasVOffset) {
3406 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3407 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3414 }
else if (HasVIndex) {
3416 }
else if (HasVOffset) {
3420 MIB.
add(
MI.getOperand(1));
3421 MIB.
add(
MI.getOperand(5 + OpOffset));
3422 MIB.
add(
MI.getOperand(6 + OpOffset));
3424 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3434 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3436 StorePtrI.
V =
nullptr;
3450 MI.eraseFromParent();
3462 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3465 assert(Def->getNumOperands() == 3 &&
3468 return Def->getOperand(1).getReg();
3474bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3476 unsigned Size =
MI.getOperand(3).getImm();
3482 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3485 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3488 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3493 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3498 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3505 .
add(
MI.getOperand(2));
3511 if (!isSGPR(
Addr)) {
3513 if (isSGPR(AddrDef->Reg)) {
3514 Addr = AddrDef->Reg;
3515 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3518 if (isSGPR(SAddr)) {
3519 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3531 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3543 MIB.
add(
MI.getOperand(4))
3544 .
add(
MI.getOperand(5));
3548 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3558 sizeof(int32_t),
Align(4));
3562 MI.eraseFromParent();
3566bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3567 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3568 MI.removeOperand(1);
3569 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3575bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3578 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3579 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3581 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3582 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3584 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3585 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3587 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3588 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3590 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3591 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3593 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3594 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3596 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3597 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3599 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3600 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3602 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3603 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3605 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3606 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3608 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3609 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3611 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3612 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3614 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3615 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3617 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3618 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3620 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3621 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3623 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3624 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3626 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3627 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3629 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3630 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3632 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3633 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3635 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3636 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3638 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3639 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3641 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3642 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3644 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3645 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3647 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3648 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3650 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3651 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3653 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3654 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3656 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3657 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3659 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3660 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3666 auto VDst_In =
MI.getOperand(4);
3668 MI.setDesc(TII.get(Opc));
3669 MI.removeOperand(4);
3670 MI.removeOperand(1);
3671 MI.addOperand(VDst_In);
3672 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3676bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3678 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3681 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3685 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3686 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3687 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3689 MI.removeOperand(2);
3690 MI.setDesc(TII.get(Opcode));
3699bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3703 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3708 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3719 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3723 MI.eraseFromParent();
3732 unsigned NumOpcodes = 0;
3745 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3756 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3770 if (Src.size() == 3) {
3777 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3778 if (Src[
I] ==
LHS) {
3788 Bits = SrcBits[Src.size()];
3794 switch (
MI->getOpcode()) {
3795 case TargetOpcode::G_AND:
3796 case TargetOpcode::G_OR:
3797 case TargetOpcode::G_XOR: {
3802 if (!getOperandBits(
LHS, LHSBits) ||
3803 !getOperandBits(
RHS, RHSBits)) {
3805 return std::make_pair(0, 0);
3811 NumOpcodes +=
Op.first;
3812 LHSBits =
Op.second;
3817 NumOpcodes +=
Op.first;
3818 RHSBits =
Op.second;
3823 return std::make_pair(0, 0);
3827 switch (
MI->getOpcode()) {
3828 case TargetOpcode::G_AND:
3829 TTbl = LHSBits & RHSBits;
3831 case TargetOpcode::G_OR:
3832 TTbl = LHSBits | RHSBits;
3834 case TargetOpcode::G_XOR:
3835 TTbl = LHSBits ^ RHSBits;
3841 return std::make_pair(NumOpcodes + 1, TTbl);
3844bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3850 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3856 unsigned NumOpcodes;
3858 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3862 if (NumOpcodes < 2 || Src.empty())
3866 if (NumOpcodes == 2 && IsB32) {
3874 }
else if (NumOpcodes < 4) {
3881 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3886 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3888 if (RB->
getID() != AMDGPU::SGPRRegBankID)
3894 Register NewReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3905 while (Src.size() < 3)
3906 Src.push_back(Src[0]);
3923 MI.eraseFromParent();
3928bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3941 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3951 MI.eraseFromParent();
3957 if (!
I.isPreISelOpcode()) {
3959 return selectCOPY(
I);
3963 switch (
I.getOpcode()) {
3964 case TargetOpcode::G_AND:
3965 case TargetOpcode::G_OR:
3966 case TargetOpcode::G_XOR:
3967 if (selectBITOP3(
I))
3971 return selectG_AND_OR_XOR(
I);
3972 case TargetOpcode::G_ADD:
3973 case TargetOpcode::G_SUB:
3974 case TargetOpcode::G_PTR_ADD:
3977 return selectG_ADD_SUB(
I);
3978 case TargetOpcode::G_UADDO:
3979 case TargetOpcode::G_USUBO:
3980 case TargetOpcode::G_UADDE:
3981 case TargetOpcode::G_USUBE:
3982 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3983 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3984 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3985 return selectG_AMDGPU_MAD_64_32(
I);
3986 case TargetOpcode::G_INTTOPTR:
3987 case TargetOpcode::G_BITCAST:
3988 case TargetOpcode::G_PTRTOINT:
3989 case TargetOpcode::G_FREEZE:
3990 return selectCOPY(
I);
3991 case TargetOpcode::G_FNEG:
3994 return selectG_FNEG(
I);
3995 case TargetOpcode::G_FABS:
3998 return selectG_FABS(
I);
3999 case TargetOpcode::G_EXTRACT:
4000 return selectG_EXTRACT(
I);
4001 case TargetOpcode::G_MERGE_VALUES:
4002 case TargetOpcode::G_CONCAT_VECTORS:
4003 return selectG_MERGE_VALUES(
I);
4004 case TargetOpcode::G_UNMERGE_VALUES:
4005 return selectG_UNMERGE_VALUES(
I);
4006 case TargetOpcode::G_BUILD_VECTOR:
4007 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4008 return selectG_BUILD_VECTOR(
I);
4009 case TargetOpcode::G_IMPLICIT_DEF:
4010 return selectG_IMPLICIT_DEF(
I);
4011 case TargetOpcode::G_INSERT:
4012 return selectG_INSERT(
I);
4013 case TargetOpcode::G_INTRINSIC:
4014 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4015 return selectG_INTRINSIC(
I);
4016 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4017 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4018 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4019 case TargetOpcode::G_ICMP:
4020 case TargetOpcode::G_FCMP:
4021 if (selectG_ICMP_or_FCMP(
I))
4024 case TargetOpcode::G_LOAD:
4025 case TargetOpcode::G_ZEXTLOAD:
4026 case TargetOpcode::G_SEXTLOAD:
4027 case TargetOpcode::G_STORE:
4028 case TargetOpcode::G_ATOMIC_CMPXCHG:
4029 case TargetOpcode::G_ATOMICRMW_XCHG:
4030 case TargetOpcode::G_ATOMICRMW_ADD:
4031 case TargetOpcode::G_ATOMICRMW_SUB:
4032 case TargetOpcode::G_ATOMICRMW_AND:
4033 case TargetOpcode::G_ATOMICRMW_OR:
4034 case TargetOpcode::G_ATOMICRMW_XOR:
4035 case TargetOpcode::G_ATOMICRMW_MIN:
4036 case TargetOpcode::G_ATOMICRMW_MAX:
4037 case TargetOpcode::G_ATOMICRMW_UMIN:
4038 case TargetOpcode::G_ATOMICRMW_UMAX:
4039 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4040 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4041 case TargetOpcode::G_ATOMICRMW_FADD:
4042 case TargetOpcode::G_ATOMICRMW_FMIN:
4043 case TargetOpcode::G_ATOMICRMW_FMAX:
4044 return selectG_LOAD_STORE_ATOMICRMW(
I);
4045 case TargetOpcode::G_SELECT:
4046 return selectG_SELECT(
I);
4047 case TargetOpcode::G_TRUNC:
4048 return selectG_TRUNC(
I);
4049 case TargetOpcode::G_SEXT:
4050 case TargetOpcode::G_ZEXT:
4051 case TargetOpcode::G_ANYEXT:
4052 case TargetOpcode::G_SEXT_INREG:
4059 return selectG_SZA_EXT(
I);
4060 case TargetOpcode::G_FPEXT:
4061 if (selectG_FPEXT(
I))
4064 case TargetOpcode::G_BRCOND:
4065 return selectG_BRCOND(
I);
4066 case TargetOpcode::G_GLOBAL_VALUE:
4067 return selectG_GLOBAL_VALUE(
I);
4068 case TargetOpcode::G_PTRMASK:
4069 return selectG_PTRMASK(
I);
4070 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4071 return selectG_EXTRACT_VECTOR_ELT(
I);
4072 case TargetOpcode::G_INSERT_VECTOR_ELT:
4073 return selectG_INSERT_VECTOR_ELT(
I);
4074 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4075 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4076 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4077 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4078 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4081 assert(
Intr &&
"not an image intrinsic with image pseudo");
4082 return selectImageIntrinsic(
I,
Intr);
4084 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
4085 return selectBVHIntrinsic(
I);
4086 case AMDGPU::G_SBFX:
4087 case AMDGPU::G_UBFX:
4088 return selectG_SBFX_UBFX(
I);
4089 case AMDGPU::G_SI_CALL:
4090 I.setDesc(TII.get(AMDGPU::SI_CALL));
4092 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4093 return selectWaveAddress(
I);
4094 case AMDGPU::G_STACKRESTORE:
4095 return selectStackRestore(
I);
4097 return selectPHI(
I);
4098 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4099 return selectCOPY_SCC_VCC(
I);
4100 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4101 return selectCOPY_VCC_SCC(
I);
4102 case AMDGPU::G_AMDGPU_READANYLANE:
4103 return selectReadAnyLane(
I);
4104 case TargetOpcode::G_CONSTANT:
4105 case TargetOpcode::G_FCONSTANT:
4113AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4120std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4121 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4125 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4126 Src =
MI->getOperand(1).getReg();
4129 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4134 if (LHS &&
LHS->isZero()) {
4136 Src =
MI->getOperand(2).getReg();
4140 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4141 Src =
MI->getOperand(1).getReg();
4148 return std::pair(Src, Mods);
4151Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4153 bool ForceVGPR)
const {
4154 if ((Mods != 0 || ForceVGPR) &&
4162 TII.get(AMDGPU::COPY), VGPRSrc)
4174AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4181AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4184 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4188 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4197AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4200 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4206 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4215AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4224AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4227 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4231 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4238AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4242 std::tie(Src, Mods) =
4243 selectVOP3ModsImpl(Root.
getReg(),
false);
4247 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4254AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4257 std::tie(Src, Mods) =
4258 selectVOP3ModsImpl(Root.
getReg(),
true,
4263 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4270AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4273 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4280std::pair<Register, unsigned>
4281AMDGPUInstructionSelector::selectVOP3PModsImpl(
4286 if (
MI->getOpcode() == AMDGPU::G_FNEG &&
4291 Src =
MI->getOperand(1).getReg();
4292 MI =
MRI.getVRegDef(Src);
4303 return std::pair(Src, Mods);
4307AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4313 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
4322AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4328 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
4337AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
4342 "expected i1 value");
4352AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4355 "expected i1 value");
4369 switch (Elts.
size()) {
4371 DstRegClass = &AMDGPU::VReg_256RegClass;
4374 DstRegClass = &AMDGPU::VReg_128RegClass;
4377 DstRegClass = &AMDGPU::VReg_64RegClass;
4384 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
4385 .addDef(
MRI.createVirtualRegister(DstRegClass));
4386 for (
unsigned i = 0; i < Elts.
size(); ++i) {
4397 if (ModOpcode == TargetOpcode::G_FNEG) {
4401 for (
auto El : Elts) {
4407 if (Elts.size() != NegAbsElts.
size()) {
4416 assert(ModOpcode == TargetOpcode::G_FABS);
4424AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
4429 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
4430 assert(BV->getNumSources() > 0);
4433 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
4436 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
4437 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
4444 if (BV->getNumSources() == EltsF32.
size()) {
4455AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4461 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4469 if (CV->getNumSources() == EltsV2F16.
size()) {
4481AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4487 assert(CV->getNumSources() > 0);
4490 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4494 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4495 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4502 if (CV->getNumSources() == EltsV2F16.
size()) {
4514AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4515 std::optional<FPValueAndVReg> FPValReg;
4519 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4539AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4545 std::optional<ValueAndVReg> ShiftAmt;
4547 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4548 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4549 Key = ShiftAmt->Value.getZExtValue() / 8;
4560AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4567 std::optional<ValueAndVReg> ShiftAmt;
4569 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4570 ShiftAmt->Value.getZExtValue() == 16) {
4582AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4585 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4595AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4598 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4606 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4613AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4616 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4624 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4630bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4640 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4642 if (AddrInfo.
empty())
4645 const GEPInfo &GEPI = AddrInfo[0];
4646 std::optional<int64_t> EncodedImm;
4651 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4652 AddrInfo.
size() > 1) {
4653 const GEPInfo &GEPI2 = AddrInfo[1];
4654 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4657 Base = GEPI2.SgprParts[0];
4658 *SOffset = OffsetReg;
4668 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4680 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4681 Base = GEPI.SgprParts[0];
4687 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4693 Base = GEPI.SgprParts[0];
4694 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4695 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4700 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4702 Base = GEPI.SgprParts[0];
4703 *SOffset = OffsetReg;
4712AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4715 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4716 return std::nullopt;
4723AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4725 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4727 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4728 return std::nullopt;
4730 const GEPInfo &GEPInfo = AddrInfo[0];
4731 Register PtrReg = GEPInfo.SgprParts[0];
4732 std::optional<int64_t> EncodedImm =
4735 return std::nullopt;
4744AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4746 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4747 return std::nullopt;
4754AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4757 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4758 return std::nullopt;
4765std::pair<Register, int>
4766AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4776 int64_t ConstOffset;
4777 std::tie(PtrBase, ConstOffset) =
4778 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4781 !isFlatScratchBaseLegal(Root.
getReg())))
4784 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
4788 return std::pair(PtrBase, ConstOffset);
4792AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4802AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4812AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4823AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4826 int64_t ConstOffset;
4827 int64_t ImmOffset = 0;
4831 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4833 if (ConstOffset != 0) {
4837 ImmOffset = ConstOffset;
4840 if (isSGPR(PtrBaseDef->Reg)) {
4841 if (ConstOffset > 0) {
4847 int64_t SplitImmOffset, RemainderOffset;
4851 if (isUInt<32>(RemainderOffset)) {
4855 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4857 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4859 .
addImm(RemainderOffset);
4876 unsigned NumLiterals =
4880 return std::nullopt;
4887 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4892 if (isSGPR(SAddr)) {
4893 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4913 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4914 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4915 return std::nullopt;
4921 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4923 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4934AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4937 int64_t ConstOffset;
4938 int64_t ImmOffset = 0;
4942 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4944 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4948 ImmOffset = ConstOffset;
4952 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4953 int FI = AddrDef->MI->getOperand(1).
getIndex();
4962 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4963 Register LHS = AddrDef->MI->getOperand(1).getReg();
4964 Register RHS = AddrDef->MI->getOperand(2).getReg();
4968 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4969 isSGPR(RHSDef->Reg)) {
4970 int FI = LHSDef->MI->getOperand(1).getIndex();
4974 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4976 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4984 return std::nullopt;
4993bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5004 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5006 return (VMax & 3) + (
SMax & 3) >= 4;
5010AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5013 int64_t ConstOffset;
5014 int64_t ImmOffset = 0;
5018 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
5021 if (ConstOffset != 0 &&
5024 ImmOffset = ConstOffset;
5028 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5029 return std::nullopt;
5031 Register RHS = AddrDef->MI->getOperand(2).getReg();
5033 return std::nullopt;
5035 Register LHS = AddrDef->MI->getOperand(1).getReg();
5038 if (OrigAddr !=
Addr) {
5039 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5040 return std::nullopt;
5042 if (!isFlatScratchBaseLegalSV(OrigAddr))
5043 return std::nullopt;
5046 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
5047 return std::nullopt;
5049 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5050 int FI = LHSDef->MI->getOperand(1).getIndex();
5059 return std::nullopt;
5069AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5078 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5083 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5107 std::optional<int> FI;
5112 int64_t ConstOffset;
5113 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
5114 if (ConstOffset != 0) {
5119 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5125 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5148bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
5161bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
5163 unsigned Size)
const {
5164 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
5166 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
5179 return Addr->getOpcode() == TargetOpcode::G_OR ||
5180 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
5187bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
5201 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
5202 std::optional<ValueAndVReg> RhsValReg =
5208 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
5209 RhsValReg->Value.getSExtValue() > -0x40000000)
5218bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
5236bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
5245 std::optional<DefinitionAndSourceRegister> BaseDef =
5247 std::optional<ValueAndVReg> RHSOffset =
5257 (RHSOffset->Value.getSExtValue() < 0 &&
5258 RHSOffset->Value.getSExtValue() > -0x40000000)))
5261 Register LHS = BaseDef->MI->getOperand(1).getReg();
5262 Register RHS = BaseDef->MI->getOperand(2).getReg();
5266bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
5267 unsigned ShAmtBits)
const {
5268 assert(
MI.getOpcode() == TargetOpcode::G_AND);
5270 std::optional<APInt>
RHS =
5275 if (
RHS->countr_one() >= ShAmtBits)
5279 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
5283AMDGPUInstructionSelector::selectMUBUFScratchOffset(
5288 std::optional<DefinitionAndSourceRegister>
Def =
5290 assert(Def &&
"this shouldn't be an optional result");
5345std::pair<Register, unsigned>
5346AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
5348 int64_t ConstAddr = 0;
5352 std::tie(PtrBase,
Offset) =
5353 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5356 if (isDSOffsetLegal(PtrBase,
Offset)) {
5358 return std::pair(PtrBase,
Offset);
5360 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5369 return std::pair(Root.
getReg(), 0);
5373AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
5376 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
5384AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
5385 return selectDSReadWrite2(Root, 4);
5389AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
5390 return selectDSReadWrite2(Root, 8);
5394AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
5395 unsigned Size)
const {
5398 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
5406std::pair<Register, unsigned>
5407AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
5408 unsigned Size)
const {
5410 int64_t ConstAddr = 0;
5414 std::tie(PtrBase,
Offset) =
5415 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5418 int64_t OffsetValue0 =
Offset;
5420 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
5422 return std::pair(PtrBase, OffsetValue0 /
Size);
5424 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5432 return std::pair(Root.
getReg(), 0);
5439std::pair<Register, int64_t>
5440AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
5443 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
5447 std::optional<ValueAndVReg> MaybeOffset =
5463 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5464 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5465 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5466 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5468 B.buildInstr(AMDGPU::S_MOV_B32)
5471 B.buildInstr(AMDGPU::S_MOV_B32)
5478 B.buildInstr(AMDGPU::REG_SEQUENCE)
5481 .addImm(AMDGPU::sub0)
5483 .addImm(AMDGPU::sub1);
5487 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5488 B.buildInstr(AMDGPU::S_MOV_B64)
5493 B.buildInstr(AMDGPU::REG_SEQUENCE)
5496 .addImm(AMDGPU::sub0_sub1)
5498 .addImm(AMDGPU::sub2_sub3);
5505 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5514 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5521AMDGPUInstructionSelector::MUBUFAddressData
5522AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5523 MUBUFAddressData
Data;
5529 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5530 if (isUInt<32>(
Offset)) {
5537 Data.N2 = InputAdd->getOperand(1).getReg();
5538 Data.N3 = InputAdd->getOperand(2).getReg();
5553bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5560 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5566void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5572 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5573 B.buildInstr(AMDGPU::S_MOV_B32)
5579bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5587 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5588 if (!shouldUseAddr64(AddrData))
5594 Offset = AddrData.Offset;
5600 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5602 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5615 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5626 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5630bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5638 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5639 if (shouldUseAddr64(AddrData))
5645 Offset = AddrData.Offset;
5651 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5656AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5662 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5678 MIB.
addReg(AMDGPU::SGPR_NULL);
5692AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5697 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5708 MIB.
addReg(AMDGPU::SGPR_NULL);
5720AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5725 SOffset = AMDGPU::SGPR_NULL;
5731static std::optional<uint64_t>
5735 if (!OffsetVal || !isInt<32>(*OffsetVal))
5736 return std::nullopt;
5737 return Lo_32(*OffsetVal);
5741AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5742 std::optional<uint64_t> OffsetVal =
5747 std::optional<int64_t> EncodedImm =
5756AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5763 std::optional<int64_t> EncodedImm =
5772AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5780 return std::nullopt;
5782 std::optional<int64_t> EncodedOffset =
5785 return std::nullopt;
5792std::pair<Register, unsigned>
5793AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5794 bool &Matched)
const {
5799 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5809 const auto CheckAbsNeg = [&]() {
5814 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
5845AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5850 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5861AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5865 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5873bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5877 Register CCReg =
I.getOperand(0).getReg();
5879 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5880 .
addImm(
I.getOperand(2).getImm());
5884 I.eraseFromParent();
5889bool AMDGPUInstructionSelector::selectSGetBarrierState(
5894 std::optional<int64_t> BarValImm =
5898 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5903 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
5904 : AMDGPU::S_GET_BARRIER_STATE_M0;
5907 auto DstReg =
I.getOperand(0).getReg();
5909 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
5916 I.eraseFromParent();
5921 if (HasInlineConst) {
5925 case Intrinsic::amdgcn_s_barrier_join:
5926 return AMDGPU::S_BARRIER_JOIN_IMM;
5927 case Intrinsic::amdgcn_s_get_named_barrier_state:
5928 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5934 case Intrinsic::amdgcn_s_barrier_join:
5935 return AMDGPU::S_BARRIER_JOIN_M0;
5936 case Intrinsic::amdgcn_s_get_named_barrier_state:
5937 return AMDGPU::S_GET_BARRIER_STATE_M0;
5942bool AMDGPUInstructionSelector::selectNamedBarrierInit(
5950 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5956 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5963 Register TmpReg2 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5969 Register TmpReg3 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5970 constexpr unsigned ShAmt = 16;
5976 Register TmpReg4 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5986 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
5987 ? AMDGPU::S_BARRIER_INIT_M0
5988 : AMDGPU::S_BARRIER_SIGNAL_M0;
5992 I.eraseFromParent();
5996bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6000 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6003 std::optional<int64_t> BarValImm =
6008 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6014 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6020 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6029 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6030 auto DstReg =
I.getOperand(0).getReg();
6032 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6039 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6043 I.eraseFromParent();
6050 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6051 "Expected G_CONSTANT");
6052 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6058 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6059 "Expected G_CONSTANT");
6060 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6067 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
6068 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6074 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6075 "Expected G_CONSTANT");
6076 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6095 MIB.
addImm(
MI.getOperand(OpIdx).getImm() != 0);
6101 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6105void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6107 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6112void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6114 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6115 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x2)
6120void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6122 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6127void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6129 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6130 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x1)
6135void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6137 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6142void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6144 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6149void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6151 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6156void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
6158 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6166 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6167 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
6175 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6176 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
6182void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
6184 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6185 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
6200 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
6202 assert(ExpVal != INT_MIN);
6213 MIB.
addImm((
MI.getOperand(OpIdx).getImm() + 3) % 4);
6217void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
6219 unsigned Val =
MI.getOperand(OpIdx).getImm();
6228bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
6232bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
MCRegister getExec() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.