29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
120 RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
128 const MachineOperand &Src =
I.getOperand(1);
129 MachineOperand &Dst =
I.getOperand(0);
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
135 const TargetRegisterClass *RC
136 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
139 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
142 if (!isVCC(SrcReg, *MRI)) {
144 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
147 const TargetRegisterClass *SrcRC
148 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
150 std::optional<ValueAndVReg> ConstVal =
154 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
165 assert(Subtarget->useRealTrue16Insts());
166 const int64_t NoMods = 0;
167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
180 bool IsSGPR = TRI.isSGPRClass(SrcRC);
181 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
188 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
194 if (!MRI->getRegClassOrNull(SrcReg))
195 MRI->setRegClass(SrcReg, SrcRC);
200 const TargetRegisterClass *RC =
201 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
202 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
208 for (
const MachineOperand &MO :
I.operands()) {
209 if (MO.getReg().isPhysical())
212 const TargetRegisterClass *RC =
213 TRI.getConstrainedRegClassForOperand(MO, *MRI);
216 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
221bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
226 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
228 .
addReg(
I.getOperand(1).getReg())
233 Register DstReg =
I.getOperand(0).getReg();
237 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
240bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
244 Register DstReg =
I.getOperand(0).getReg();
245 Register SrcReg =
I.getOperand(1).getReg();
246 std::optional<ValueAndVReg> Arg =
250 const int64_t
Value = Arg->Value.getZExtValue();
252 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
259 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
265 unsigned SelectOpcode =
266 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
275bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
276 Register DstReg =
I.getOperand(0).getReg();
277 Register SrcReg =
I.getOperand(1).getReg();
282 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
289bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
290 const Register DefReg =
I.getOperand(0).getReg();
291 const LLT DefTy = MRI->getType(DefReg);
303 MRI->getRegClassOrRegBank(DefReg);
305 const TargetRegisterClass *DefRC =
314 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
323 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
324 const Register SrcReg =
I.getOperand(i).getReg();
326 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
328 const LLT SrcTy = MRI->getType(SrcReg);
329 const TargetRegisterClass *SrcRC =
330 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
331 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
336 I.setDesc(TII.get(TargetOpcode::PHI));
337 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
343 unsigned SubIdx)
const {
347 Register DstReg = MRI->createVirtualRegister(&SubRC);
350 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
352 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
378 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
380 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
382 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
388bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
389 Register DstReg =
I.getOperand(0).getReg();
390 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
392 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
393 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
394 DstRB->
getID() != AMDGPU::VCCRegBankID)
397 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
409bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
412 Register DstReg =
I.getOperand(0).getReg();
414 LLT Ty = MRI->getType(DstReg);
419 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
420 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
421 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
425 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
428 .
add(
I.getOperand(1))
429 .
add(
I.getOperand(2))
435 if (STI.hasAddNoCarry()) {
436 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
437 I.setDesc(TII.get(
Opc));
443 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
445 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
449 .
add(
I.getOperand(1))
450 .
add(
I.getOperand(2))
456 assert(!
Sub &&
"illegal sub should not reach here");
458 const TargetRegisterClass &RC
459 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
460 const TargetRegisterClass &HalfRC
461 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
463 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
464 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
465 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
466 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
468 Register DstLo = MRI->createVirtualRegister(&HalfRC);
469 Register DstHi = MRI->createVirtualRegister(&HalfRC);
472 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
475 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
480 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
481 Register CarryReg = MRI->createVirtualRegister(CarryRC);
482 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
487 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
498 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
505 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
512bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
517 Register Dst0Reg =
I.getOperand(0).getReg();
518 Register Dst1Reg =
I.getOperand(1).getReg();
519 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
520 I.getOpcode() == AMDGPU::G_UADDE;
521 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
522 I.getOpcode() == AMDGPU::G_USUBE;
524 if (isVCC(Dst1Reg, *MRI)) {
525 unsigned NoCarryOpc =
526 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
527 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
528 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
534 Register Src0Reg =
I.getOperand(2).getReg();
535 Register Src1Reg =
I.getOperand(3).getReg();
538 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
539 .
addReg(
I.getOperand(4).getReg());
542 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
543 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
545 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
546 .
add(
I.getOperand(2))
547 .
add(
I.getOperand(3));
549 if (MRI->use_nodbg_empty(Dst1Reg)) {
550 CarryInst.setOperandDead(3);
552 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
554 if (!MRI->getRegClassOrNull(Dst1Reg))
555 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
558 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
559 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
560 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
564 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
565 AMDGPU::SReg_32RegClass, *MRI))
572bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
576 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
577 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
578 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
581 if (Subtarget->hasMADIntraFwdBug())
582 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
583 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
585 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
586 : AMDGPU::V_MAD_NC_I64_I32_e64;
588 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
593 I.setDesc(TII.get(
Opc));
595 I.addImplicitDefUseOperands(*
MF);
600bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
602 Register DstReg =
I.getOperand(0).getReg();
603 Register SrcReg =
I.getOperand(1).getReg();
604 LLT DstTy = MRI->getType(DstReg);
605 LLT SrcTy = MRI->getType(SrcReg);
610 unsigned Offset =
I.getOperand(2).getImm();
611 if (
Offset % 32 != 0 || DstSize > 128)
619 const TargetRegisterClass *DstRC =
620 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
621 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
624 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
625 const TargetRegisterClass *SrcRC =
626 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
631 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
636 *SrcRC,
I.getOperand(1));
638 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
645bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
646 MachineBasicBlock *BB =
MI.getParent();
648 LLT DstTy = MRI->getType(DstReg);
649 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
656 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
658 const TargetRegisterClass *DstRC =
659 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
663 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
664 MachineInstrBuilder MIB =
665 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
666 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
667 MachineOperand &Src =
MI.getOperand(
I + 1);
671 const TargetRegisterClass *SrcRC
672 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
673 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
677 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
680 MI.eraseFromParent();
684bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
685 MachineBasicBlock *BB =
MI.getParent();
686 const int NumDst =
MI.getNumOperands() - 1;
688 MachineOperand &Src =
MI.getOperand(NumDst);
692 LLT DstTy = MRI->getType(DstReg0);
693 LLT SrcTy = MRI->getType(SrcReg);
698 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
700 const TargetRegisterClass *SrcRC =
701 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
702 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
708 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
709 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
710 MachineOperand &Dst =
MI.getOperand(
I);
711 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
712 .
addReg(SrcReg, 0, SubRegs[
I]);
715 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
716 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
719 const TargetRegisterClass *DstRC =
720 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
721 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
725 MI.eraseFromParent();
729bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
730 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
731 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
735 LLT SrcTy = MRI->getType(Src0);
739 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
740 return selectG_MERGE_VALUES(
MI);
747 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
751 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
752 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
755 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
756 DstBank->
getID() == AMDGPU::VGPRRegBankID);
757 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
760 MachineBasicBlock *BB =
MI.getParent();
770 const int64_t K0 = ConstSrc0->Value.getSExtValue();
771 const int64_t K1 = ConstSrc1->Value.getSExtValue();
772 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
773 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
774 uint32_t
Imm = Lo16 | (Hi16 << 16);
779 MI.eraseFromParent();
780 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
785 MI.eraseFromParent();
786 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
797 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
798 MI.setDesc(TII.get(AMDGPU::COPY));
801 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
802 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
803 RBI.constrainGenericRegister(Src0, RC, *MRI);
808 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
809 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
815 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
822 MI.eraseFromParent();
847 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
848 if (Shift0 && Shift1) {
849 Opc = AMDGPU::S_PACK_HH_B32_B16;
850 MI.getOperand(1).setReg(ShiftSrc0);
851 MI.getOperand(2).setReg(ShiftSrc1);
853 Opc = AMDGPU::S_PACK_LH_B32_B16;
854 MI.getOperand(2).setReg(ShiftSrc1);
858 if (ConstSrc1 && ConstSrc1->Value == 0) {
860 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
865 MI.eraseFromParent();
868 if (STI.hasSPackHL()) {
869 Opc = AMDGPU::S_PACK_HL_B32_B16;
870 MI.getOperand(1).setReg(ShiftSrc0);
874 MI.setDesc(TII.get(
Opc));
878bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
879 const MachineOperand &MO =
I.getOperand(0);
883 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
884 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
885 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
886 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
893bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
896 Register DstReg =
I.getOperand(0).getReg();
897 Register Src0Reg =
I.getOperand(1).getReg();
898 Register Src1Reg =
I.getOperand(2).getReg();
899 LLT Src1Ty = MRI->getType(Src1Reg);
901 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
904 int64_t
Offset =
I.getOperand(3).getImm();
907 if (
Offset % 32 != 0 || InsSize % 32 != 0)
914 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
915 if (
SubReg == AMDGPU::NoSubRegister)
918 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
919 const TargetRegisterClass *DstRC =
920 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
924 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
925 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
926 const TargetRegisterClass *Src0RC =
927 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
928 const TargetRegisterClass *Src1RC =
929 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
933 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
934 if (!Src0RC || !Src1RC)
937 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
938 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
939 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
943 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
952bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
955 Register OffsetReg =
MI.getOperand(2).getReg();
956 Register WidthReg =
MI.getOperand(3).getReg();
958 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
959 "scalar BFX instructions are expanded in regbankselect");
960 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
961 "64-bit vector BFX instructions are expanded in regbankselect");
964 MachineBasicBlock *
MBB =
MI.getParent();
966 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
967 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
972 MI.eraseFromParent();
976bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
977 if (STI.getLDSBankCount() != 16)
983 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
984 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
985 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
995 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
997 MachineBasicBlock *
MBB =
MI.getParent();
1001 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1004 .
addImm(
MI.getOperand(3).getImm());
1017 MI.eraseFromParent();
1026bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1028 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1031 MachineBasicBlock *
MBB =
MI.getParent();
1035 Register LaneSelect =
MI.getOperand(3).getReg();
1038 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1040 std::optional<ValueAndVReg> ConstSelect =
1046 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1049 std::optional<ValueAndVReg> ConstVal =
1055 STI.hasInv2PiInlineImm())) {
1056 MIB.
addImm(ConstVal->Value.getSExtValue());
1064 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1066 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1074 MI.eraseFromParent();
1080bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1084 LLT Ty = MRI->getType(Dst0);
1087 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1089 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1096 MachineBasicBlock *
MBB =
MI.getParent();
1100 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1102 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1115 MI.eraseFromParent();
1119bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1121 switch (IntrinsicID) {
1122 case Intrinsic::amdgcn_if_break: {
1127 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1128 .
add(
I.getOperand(0))
1129 .
add(
I.getOperand(2))
1130 .
add(
I.getOperand(3));
1132 Register DstReg =
I.getOperand(0).getReg();
1133 Register Src0Reg =
I.getOperand(2).getReg();
1134 Register Src1Reg =
I.getOperand(3).getReg();
1136 I.eraseFromParent();
1139 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1143 case Intrinsic::amdgcn_interp_p1_f16:
1144 return selectInterpP1F16(
I);
1145 case Intrinsic::amdgcn_wqm:
1146 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1147 case Intrinsic::amdgcn_softwqm:
1148 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1149 case Intrinsic::amdgcn_strict_wwm:
1150 case Intrinsic::amdgcn_wwm:
1151 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1152 case Intrinsic::amdgcn_strict_wqm:
1153 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1154 case Intrinsic::amdgcn_writelane:
1155 return selectWritelane(
I);
1156 case Intrinsic::amdgcn_div_scale:
1157 return selectDivScale(
I);
1158 case Intrinsic::amdgcn_icmp:
1159 case Intrinsic::amdgcn_fcmp:
1162 return selectIntrinsicCmp(
I);
1163 case Intrinsic::amdgcn_ballot:
1164 return selectBallot(
I);
1165 case Intrinsic::amdgcn_reloc_constant:
1166 return selectRelocConstant(
I);
1167 case Intrinsic::amdgcn_groupstaticsize:
1168 return selectGroupStaticSize(
I);
1169 case Intrinsic::returnaddress:
1170 return selectReturnAddress(
I);
1171 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1172 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1173 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1174 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1175 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1176 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1177 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1178 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1179 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1180 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1181 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1182 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1183 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1184 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1185 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1186 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1189 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1190 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1191 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1192 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1199 return selectSMFMACIntrin(
I);
1200 case Intrinsic::amdgcn_permlane16_swap:
1201 case Intrinsic::amdgcn_permlane32_swap:
1202 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1213 if (
Size == 16 && !ST.has16BitInsts())
1216 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1217 unsigned FakeS16Opc,
unsigned S32Opc,
1220 return ST.hasTrue16BitInsts()
1221 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1232 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1233 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1234 AMDGPU::V_CMP_NE_U64_e64);
1236 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1237 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1238 AMDGPU::V_CMP_EQ_U64_e64);
1240 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1241 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1242 AMDGPU::V_CMP_GT_I64_e64);
1244 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1245 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1246 AMDGPU::V_CMP_GE_I64_e64);
1248 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1249 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1250 AMDGPU::V_CMP_LT_I64_e64);
1252 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1253 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1254 AMDGPU::V_CMP_LE_I64_e64);
1256 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1257 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1258 AMDGPU::V_CMP_GT_U64_e64);
1260 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1261 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1262 AMDGPU::V_CMP_GE_U64_e64);
1264 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1265 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1266 AMDGPU::V_CMP_LT_U64_e64);
1268 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1269 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1270 AMDGPU::V_CMP_LE_U64_e64);
1273 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1274 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1275 AMDGPU::V_CMP_EQ_F64_e64);
1277 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1278 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1279 AMDGPU::V_CMP_GT_F64_e64);
1281 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1282 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1283 AMDGPU::V_CMP_GE_F64_e64);
1285 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1286 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1287 AMDGPU::V_CMP_LT_F64_e64);
1289 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1290 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1291 AMDGPU::V_CMP_LE_F64_e64);
1293 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1294 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1295 AMDGPU::V_CMP_NEQ_F64_e64);
1297 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1298 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1299 AMDGPU::V_CMP_O_F64_e64);
1301 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1302 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1303 AMDGPU::V_CMP_U_F64_e64);
1305 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1306 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1307 AMDGPU::V_CMP_NLG_F64_e64);
1309 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1310 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1311 AMDGPU::V_CMP_NLE_F64_e64);
1313 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1314 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1315 AMDGPU::V_CMP_NLT_F64_e64);
1317 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1318 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1319 AMDGPU::V_CMP_NGE_F64_e64);
1321 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1322 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1323 AMDGPU::V_CMP_NGT_F64_e64);
1325 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1326 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1327 AMDGPU::V_CMP_NEQ_F64_e64);
1329 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1330 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1331 AMDGPU::V_CMP_TRU_F64_e64);
1333 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1334 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1335 AMDGPU::V_CMP_F_F64_e64);
1340 unsigned Size)
const {
1342 if (!STI.hasScalarCompareEq64())
1347 return AMDGPU::S_CMP_LG_U64;
1349 return AMDGPU::S_CMP_EQ_U64;
1358 return AMDGPU::S_CMP_LG_U32;
1360 return AMDGPU::S_CMP_EQ_U32;
1362 return AMDGPU::S_CMP_GT_I32;
1364 return AMDGPU::S_CMP_GE_I32;
1366 return AMDGPU::S_CMP_LT_I32;
1368 return AMDGPU::S_CMP_LE_I32;
1370 return AMDGPU::S_CMP_GT_U32;
1372 return AMDGPU::S_CMP_GE_U32;
1374 return AMDGPU::S_CMP_LT_U32;
1376 return AMDGPU::S_CMP_LE_U32;
1378 return AMDGPU::S_CMP_EQ_F32;
1380 return AMDGPU::S_CMP_GT_F32;
1382 return AMDGPU::S_CMP_GE_F32;
1384 return AMDGPU::S_CMP_LT_F32;
1386 return AMDGPU::S_CMP_LE_F32;
1388 return AMDGPU::S_CMP_LG_F32;
1390 return AMDGPU::S_CMP_O_F32;
1392 return AMDGPU::S_CMP_U_F32;
1394 return AMDGPU::S_CMP_NLG_F32;
1396 return AMDGPU::S_CMP_NLE_F32;
1398 return AMDGPU::S_CMP_NLT_F32;
1400 return AMDGPU::S_CMP_NGE_F32;
1402 return AMDGPU::S_CMP_NGT_F32;
1404 return AMDGPU::S_CMP_NEQ_F32;
1411 if (!STI.hasSALUFloatInsts())
1416 return AMDGPU::S_CMP_EQ_F16;
1418 return AMDGPU::S_CMP_GT_F16;
1420 return AMDGPU::S_CMP_GE_F16;
1422 return AMDGPU::S_CMP_LT_F16;
1424 return AMDGPU::S_CMP_LE_F16;
1426 return AMDGPU::S_CMP_LG_F16;
1428 return AMDGPU::S_CMP_O_F16;
1430 return AMDGPU::S_CMP_U_F16;
1432 return AMDGPU::S_CMP_NLG_F16;
1434 return AMDGPU::S_CMP_NLE_F16;
1436 return AMDGPU::S_CMP_NLT_F16;
1438 return AMDGPU::S_CMP_NGE_F16;
1440 return AMDGPU::S_CMP_NGT_F16;
1442 return AMDGPU::S_CMP_NEQ_F16;
1451bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1456 Register SrcReg =
I.getOperand(2).getReg();
1457 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1461 Register CCReg =
I.getOperand(0).getReg();
1462 if (!isVCC(CCReg, *MRI)) {
1463 int Opcode = getS_CMPOpcode(Pred,
Size);
1466 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1467 .
add(
I.getOperand(2))
1468 .
add(
I.getOperand(3));
1469 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1473 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1474 I.eraseFromParent();
1478 if (
I.getOpcode() == AMDGPU::G_FCMP)
1485 MachineInstrBuilder ICmp;
1488 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1490 .
add(
I.getOperand(2))
1492 .
add(
I.getOperand(3))
1495 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1496 .
add(
I.getOperand(2))
1497 .
add(
I.getOperand(3));
1501 *TRI.getBoolRC(), *MRI);
1503 I.eraseFromParent();
1507bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1508 Register Dst =
I.getOperand(0).getReg();
1509 if (isVCC(Dst, *MRI))
1512 LLT DstTy = MRI->getType(Dst);
1518 Register SrcReg =
I.getOperand(2).getReg();
1519 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1527 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1528 I.eraseFromParent();
1529 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1536 MachineInstrBuilder SelectedMI;
1537 MachineOperand &
LHS =
I.getOperand(2);
1538 MachineOperand &
RHS =
I.getOperand(3);
1539 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1540 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1542 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1544 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1545 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1547 SelectedMI.
addImm(Src0Mods);
1548 SelectedMI.
addReg(Src0Reg);
1550 SelectedMI.
addImm(Src1Mods);
1551 SelectedMI.
addReg(Src1Reg);
1557 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1561 I.eraseFromParent();
1572 if (
MI->getParent() !=
MBB)
1576 if (
MI->getOpcode() == AMDGPU::COPY) {
1577 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1578 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1579 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1580 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1597bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1600 Register DstReg =
I.getOperand(0).getReg();
1601 Register SrcReg =
I.getOperand(2).getReg();
1602 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1603 const unsigned WaveSize = STI.getWavefrontSize();
1607 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1610 std::optional<ValueAndVReg> Arg =
1615 if (BallotSize != WaveSize) {
1616 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1620 const int64_t
Value = Arg->Value.getZExtValue();
1623 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1630 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1636 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1640 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1651 if (BallotSize != WaveSize) {
1652 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1654 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1661 I.eraseFromParent();
1665bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1666 Register DstReg =
I.getOperand(0).getReg();
1667 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1668 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1669 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1672 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1674 Module *
M =
MF->getFunction().getParent();
1675 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1682 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1685 I.eraseFromParent();
1689bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1692 Register DstReg =
I.getOperand(0).getReg();
1693 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1694 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1695 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1703 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1706 Module *
M =
MF->getFunction().getParent();
1707 const GlobalValue *GV =
1712 I.eraseFromParent();
1716bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1721 MachineOperand &Dst =
I.getOperand(0);
1723 unsigned Depth =
I.getOperand(2).getImm();
1725 const TargetRegisterClass *RC
1726 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1728 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1733 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1736 I.eraseFromParent();
1740 MachineFrameInfo &MFI =
MF.getFrameInfo();
1745 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1747 AMDGPU::SReg_64RegClass,
DL);
1750 I.eraseFromParent();
1754bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1757 MachineBasicBlock *BB =
MI.getParent();
1758 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1759 .
add(
MI.getOperand(1));
1762 MI.eraseFromParent();
1764 if (!MRI->getRegClassOrNull(
Reg))
1765 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1769bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1771 MachineBasicBlock *
MBB =
MI.getParent();
1775 unsigned IndexOperand =
MI.getOperand(7).getImm();
1776 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1777 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1779 if (WaveDone && !WaveRelease) {
1783 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1786 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1787 IndexOperand &= ~0x3f;
1788 unsigned CountDw = 0;
1791 CountDw = (IndexOperand >> 24) & 0xf;
1792 IndexOperand &= ~(0xf << 24);
1794 if (CountDw < 1 || CountDw > 4) {
1797 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1805 Fn,
"ds_ordered_count: bad index operand",
DL));
1808 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1811 unsigned Offset0 = OrderedCountIndex << 2;
1812 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1815 Offset1 |= (CountDw - 1) << 6;
1818 Offset1 |= ShaderType << 2;
1820 unsigned Offset = Offset0 | (Offset1 << 8);
1828 MachineInstrBuilder
DS =
1829 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1834 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1838 MI.eraseFromParent();
1844 case Intrinsic::amdgcn_ds_gws_init:
1845 return AMDGPU::DS_GWS_INIT;
1846 case Intrinsic::amdgcn_ds_gws_barrier:
1847 return AMDGPU::DS_GWS_BARRIER;
1848 case Intrinsic::amdgcn_ds_gws_sema_v:
1849 return AMDGPU::DS_GWS_SEMA_V;
1850 case Intrinsic::amdgcn_ds_gws_sema_br:
1851 return AMDGPU::DS_GWS_SEMA_BR;
1852 case Intrinsic::amdgcn_ds_gws_sema_p:
1853 return AMDGPU::DS_GWS_SEMA_P;
1854 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1855 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1861bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1863 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1864 !STI.hasGWSSemaReleaseAll()))
1868 const bool HasVSrc =
MI.getNumOperands() == 3;
1869 assert(HasVSrc ||
MI.getNumOperands() == 2);
1871 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1872 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1873 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1879 MachineBasicBlock *
MBB =
MI.getParent();
1882 MachineInstr *Readfirstlane =
nullptr;
1887 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1888 Readfirstlane = OffsetDef;
1893 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1903 std::tie(BaseOffset, ImmOffset) =
1906 if (Readfirstlane) {
1909 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1915 if (!RBI.constrainGenericRegister(BaseOffset,
1916 AMDGPU::SReg_32RegClass, *MRI))
1920 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1939 if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
1946 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
1948 MI.eraseFromParent();
1952bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1953 bool IsAppend)
const {
1954 Register PtrBase =
MI.getOperand(2).getReg();
1955 LLT PtrTy = MRI->getType(PtrBase);
1959 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1962 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1963 PtrBase =
MI.getOperand(2).getReg();
1967 MachineBasicBlock *
MBB =
MI.getParent();
1969 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1973 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
1980 MI.eraseFromParent();
1984bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1985 MachineFunction *
MF =
MI.getParent()->getParent();
1986 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
1997 TFE = TexFailCtrl & 0x1;
1999 LWE = TexFailCtrl & 0x2;
2002 return TexFailCtrl == 0;
2005bool AMDGPUInstructionSelector::selectImageIntrinsic(
2007 MachineBasicBlock *
MBB =
MI.getParent();
2010 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2019 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2023 int NumVDataDwords = -1;
2024 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2025 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2031 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2035 bool IsTexFail =
false;
2037 TFE, LWE, IsTexFail))
2040 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2041 const bool IsA16 = (
Flags & 1) != 0;
2042 const bool IsG16 = (
Flags & 2) != 0;
2045 if (IsA16 && !STI.hasG16() && !IsG16)
2049 unsigned DMaskLanes = 0;
2051 if (BaseOpcode->
Atomic) {
2052 VDataOut =
MI.getOperand(0).getReg();
2053 VDataIn =
MI.getOperand(2).getReg();
2054 LLT Ty = MRI->getType(VDataIn);
2057 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2062 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2064 DMask = Is64Bit ? 0xf : 0x3;
2065 NumVDataDwords = Is64Bit ? 4 : 2;
2067 DMask = Is64Bit ? 0x3 : 0x1;
2068 NumVDataDwords = Is64Bit ? 2 : 1;
2071 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2074 if (BaseOpcode->
Store) {
2075 VDataIn =
MI.getOperand(1).getReg();
2076 VDataTy = MRI->getType(VDataIn);
2081 VDataOut =
MI.getOperand(0).getReg();
2082 VDataTy = MRI->getType(VDataOut);
2083 NumVDataDwords = DMaskLanes;
2085 if (IsD16 && !STI.hasUnpackedD16VMem())
2086 NumVDataDwords = (DMaskLanes + 1) / 2;
2091 if (Subtarget->hasG16() && IsG16) {
2092 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2095 IntrOpcode = G16MappingInfo->
G16;
2099 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2108 int NumVAddrRegs = 0;
2109 int NumVAddrDwords = 0;
2112 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2113 if (!AddrOp.
isReg())
2121 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2128 NumVAddrRegs != 1 &&
2129 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2130 : NumVAddrDwords == NumVAddrRegs);
2131 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2142 NumVDataDwords, NumVAddrDwords);
2143 }
else if (IsGFX11Plus) {
2145 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2146 : AMDGPU::MIMGEncGfx11Default,
2147 NumVDataDwords, NumVAddrDwords);
2148 }
else if (IsGFX10Plus) {
2150 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2151 : AMDGPU::MIMGEncGfx10Default,
2152 NumVDataDwords, NumVAddrDwords);
2154 if (Subtarget->hasGFX90AInsts()) {
2156 NumVDataDwords, NumVAddrDwords);
2160 <<
"requested image instruction is not supported on this GPU\n");
2167 NumVDataDwords, NumVAddrDwords);
2170 NumVDataDwords, NumVAddrDwords);
2180 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2182 Register TmpReg = MRI->createVirtualRegister(
2183 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2184 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2187 if (!MRI->use_empty(VDataOut)) {
2200 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2201 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2202 if (SrcOp.
isReg()) {
2221 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2223 MIB.
addImm(IsA16 ? -1 : 0);
2225 if (!Subtarget->hasGFX90AInsts()) {
2237 MIB.
addImm(IsD16 ? -1 : 0);
2239 MI.eraseFromParent();
2241 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2247bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2253 MachineBasicBlock *
MBB =
MI.getParent();
2258 unsigned Offset =
MI.getOperand(6).getImm();
2262 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2263 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2264 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2266 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2267 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2269 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2270 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2282 MI.eraseFromParent();
2286bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2289 switch (IntrinsicID) {
2290 case Intrinsic::amdgcn_end_cf:
2291 return selectEndCfIntrinsic(
I);
2292 case Intrinsic::amdgcn_ds_ordered_add:
2293 case Intrinsic::amdgcn_ds_ordered_swap:
2294 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2295 case Intrinsic::amdgcn_ds_gws_init:
2296 case Intrinsic::amdgcn_ds_gws_barrier:
2297 case Intrinsic::amdgcn_ds_gws_sema_v:
2298 case Intrinsic::amdgcn_ds_gws_sema_br:
2299 case Intrinsic::amdgcn_ds_gws_sema_p:
2300 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2301 return selectDSGWSIntrinsic(
I, IntrinsicID);
2302 case Intrinsic::amdgcn_ds_append:
2303 return selectDSAppendConsume(
I,
true);
2304 case Intrinsic::amdgcn_ds_consume:
2305 return selectDSAppendConsume(
I,
false);
2306 case Intrinsic::amdgcn_init_whole_wave:
2307 return selectInitWholeWave(
I);
2308 case Intrinsic::amdgcn_raw_buffer_load_lds:
2309 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2310 case Intrinsic::amdgcn_struct_buffer_load_lds:
2311 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2312 return selectBufferLoadLds(
I);
2317 case Intrinsic::amdgcn_load_to_lds:
2318 case Intrinsic::amdgcn_global_load_lds:
2319 return selectGlobalLoadLds(
I);
2320 case Intrinsic::amdgcn_exp_compr:
2321 if (!STI.hasCompressedExport()) {
2323 F.getContext().diagnose(
2324 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2329 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2330 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2331 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2332 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2333 return selectDSBvhStackIntrinsic(
I);
2334 case Intrinsic::amdgcn_s_barrier_init:
2335 case Intrinsic::amdgcn_s_barrier_signal_var:
2336 return selectNamedBarrierInit(
I, IntrinsicID);
2337 case Intrinsic::amdgcn_s_barrier_join:
2338 case Intrinsic::amdgcn_s_get_named_barrier_state:
2339 return selectNamedBarrierInst(
I, IntrinsicID);
2340 case Intrinsic::amdgcn_s_get_barrier_state:
2341 return selectSGetBarrierState(
I, IntrinsicID);
2342 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2343 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2348bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2355 Register DstReg =
I.getOperand(0).getReg();
2356 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2358 const MachineOperand &CCOp =
I.getOperand(1);
2360 if (!isVCC(CCReg, *MRI)) {
2361 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2362 AMDGPU::S_CSELECT_B32;
2363 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2369 if (!MRI->getRegClassOrNull(CCReg))
2370 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2372 .
add(
I.getOperand(2))
2373 .
add(
I.getOperand(3));
2378 I.eraseFromParent();
2387 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2389 .
add(
I.getOperand(3))
2391 .
add(
I.getOperand(2))
2392 .
add(
I.getOperand(1));
2395 I.eraseFromParent();
2399bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2400 Register DstReg =
I.getOperand(0).getReg();
2401 Register SrcReg =
I.getOperand(1).getReg();
2402 const LLT DstTy = MRI->getType(DstReg);
2403 const LLT SrcTy = MRI->getType(SrcReg);
2406 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2407 const RegisterBank *DstRB;
2413 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2418 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2423 const TargetRegisterClass *SrcRC =
2424 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2425 const TargetRegisterClass *DstRC =
2426 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2427 if (!SrcRC || !DstRC)
2430 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2431 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2436 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2437 assert(STI.useRealTrue16Insts());
2441 .
addReg(SrcReg, 0, AMDGPU::lo16);
2442 I.eraseFromParent();
2450 Register LoReg = MRI->createVirtualRegister(DstRC);
2451 Register HiReg = MRI->createVirtualRegister(DstRC);
2453 .
addReg(SrcReg, 0, AMDGPU::sub0);
2455 .
addReg(SrcReg, 0, AMDGPU::sub1);
2457 if (IsVALU && STI.hasSDWA()) {
2460 MachineInstr *MovSDWA =
2461 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2471 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2472 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2473 Register ImmReg = MRI->createVirtualRegister(DstRC);
2475 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2485 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2486 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2487 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2499 And.setOperandDead(3);
2500 Or.setOperandDead(3);
2504 I.eraseFromParent();
2512 unsigned SubRegIdx = DstSize < 32
2513 ?
static_cast<unsigned>(AMDGPU::sub0)
2514 : TRI.getSubRegFromChannel(0, DstSize / 32);
2515 if (SubRegIdx == AMDGPU::NoSubRegister)
2520 const TargetRegisterClass *SrcWithSubRC
2521 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2525 if (SrcWithSubRC != SrcRC) {
2526 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2530 I.getOperand(1).setSubReg(SubRegIdx);
2533 I.setDesc(TII.get(TargetOpcode::COPY));
2540 int SignedMask =
static_cast<int>(Mask);
2541 return SignedMask >= -16 && SignedMask <= 64;
2545const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2554 return &RBI.getRegBankFromRegClass(*RC, LLT());
2558bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2559 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2560 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2563 const Register DstReg =
I.getOperand(0).getReg();
2564 const Register SrcReg =
I.getOperand(1).getReg();
2566 const LLT DstTy = MRI->getType(DstReg);
2567 const LLT SrcTy = MRI->getType(SrcReg);
2568 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2575 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2578 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2580 return selectCOPY(
I);
2582 const TargetRegisterClass *SrcRC =
2583 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2584 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2585 const TargetRegisterClass *DstRC =
2586 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2588 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2589 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2595 I.eraseFromParent();
2597 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2598 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2601 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2607 MachineInstr *ExtI =
2611 I.eraseFromParent();
2615 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2616 MachineInstr *ExtI =
2621 I.eraseFromParent();
2625 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2626 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2627 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2628 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2631 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2632 const unsigned SextOpc = SrcSize == 8 ?
2633 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2636 I.eraseFromParent();
2637 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2642 if (DstSize > 32 && SrcSize == 32) {
2643 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2644 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2659 I.eraseFromParent();
2660 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2664 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2665 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2668 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2670 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2671 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2672 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2674 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2685 I.eraseFromParent();
2686 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2701 I.eraseFromParent();
2702 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2736 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2743 assert(Mask.size() == 2);
2745 if (Mask[0] == 1 && Mask[1] <= 1) {
2753bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2754 if (!Subtarget->hasSALUFloatInsts())
2757 Register Dst =
I.getOperand(0).getReg();
2758 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2759 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2762 Register Src =
I.getOperand(1).getReg();
2768 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2770 I.eraseFromParent();
2771 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2778bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2791 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2792 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2797 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2801 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2802 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2805 MachineBasicBlock *BB =
MI.getParent();
2807 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2808 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2809 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2810 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2812 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2813 .
addReg(Src, 0, AMDGPU::sub0);
2814 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2815 .
addReg(Src, 0, AMDGPU::sub1);
2816 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2820 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2825 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2830 MI.eraseFromParent();
2835bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2837 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2838 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2843 MachineBasicBlock *BB =
MI.getParent();
2845 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2846 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2847 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2848 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2850 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2851 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2854 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2855 .
addReg(Src, 0, AMDGPU::sub0);
2856 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2857 .
addReg(Src, 0, AMDGPU::sub1);
2858 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2863 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2867 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2873 MI.eraseFromParent();
2878 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2881void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2884 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2885 const MachineInstr *PtrMI =
2886 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2890 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2895 for (
unsigned i = 1; i != 3; ++i) {
2896 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2897 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2902 assert(GEPInfo.Imm == 0);
2906 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2907 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2908 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2910 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2914 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2917bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2918 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2921bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2922 if (!
MI.hasOneMemOperand())
2925 const MachineMemOperand *MMO = *
MI.memoperands_begin();
2938 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2939 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
2940 AMDGPU::SGPRRegBankID;
2943 return I &&
I->getMetadata(
"amdgpu.uniform");
2947 for (
const GEPInfo &GEPInfo : AddrInfo) {
2948 if (!GEPInfo.VgprParts.empty())
2954void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2955 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
2958 STI.ldsRequiresM0Init()) {
2962 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2967bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2974 if (
Reg.isPhysical())
2978 const unsigned Opcode =
MI.getOpcode();
2980 if (Opcode == AMDGPU::COPY)
2983 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2984 Opcode == AMDGPU::G_XOR)
2989 return GI->is(Intrinsic::amdgcn_class);
2991 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2994bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2996 MachineOperand &CondOp =
I.getOperand(0);
3002 const TargetRegisterClass *ConstrainRC;
3009 if (!isVCC(CondReg, *MRI)) {
3013 CondPhysReg = AMDGPU::SCC;
3014 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3015 ConstrainRC = &AMDGPU::SReg_32RegClass;
3022 const bool Is64 = STI.isWave64();
3023 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3024 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3026 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3027 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3034 CondPhysReg = TRI.getVCC();
3035 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3036 ConstrainRC = TRI.getBoolRC();
3039 if (!MRI->getRegClassOrNull(CondReg))
3040 MRI->setRegClass(CondReg, ConstrainRC);
3042 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3045 .
addMBB(
I.getOperand(1).getMBB());
3047 I.eraseFromParent();
3051bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3053 Register DstReg =
I.getOperand(0).getReg();
3054 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3055 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3056 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3060 return RBI.constrainGenericRegister(
3061 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3064bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3065 Register DstReg =
I.getOperand(0).getReg();
3066 Register SrcReg =
I.getOperand(1).getReg();
3067 Register MaskReg =
I.getOperand(2).getReg();
3068 LLT Ty = MRI->getType(DstReg);
3069 LLT MaskTy = MRI->getType(MaskReg);
3073 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3074 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3075 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3076 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3082 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3086 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3087 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3090 !CanCopyLow32 && !CanCopyHi32) {
3091 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3095 I.eraseFromParent();
3099 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3100 const TargetRegisterClass &RegRC
3101 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3103 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3104 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3105 const TargetRegisterClass *MaskRC =
3106 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3108 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3109 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3110 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3115 "ptrmask should have been narrowed during legalize");
3117 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3123 I.eraseFromParent();
3127 Register HiReg = MRI->createVirtualRegister(&RegRC);
3128 Register LoReg = MRI->createVirtualRegister(&RegRC);
3131 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3132 .
addReg(SrcReg, 0, AMDGPU::sub0);
3133 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3134 .
addReg(SrcReg, 0, AMDGPU::sub1);
3143 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3144 MaskedLo = MRI->createVirtualRegister(&RegRC);
3146 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3147 .
addReg(MaskReg, 0, AMDGPU::sub0);
3148 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3157 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3158 MaskedHi = MRI->createVirtualRegister(&RegRC);
3160 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3161 .
addReg(MaskReg, 0, AMDGPU::sub1);
3162 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3172 I.eraseFromParent();
3178static std::pair<Register, unsigned>
3185 std::tie(IdxBaseReg,
Offset) =
3187 if (IdxBaseReg == AMDGPU::NoRegister) {
3191 IdxBaseReg = IdxReg;
3198 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3199 return std::pair(IdxReg, SubRegs[0]);
3200 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3203bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3209 LLT DstTy = MRI->getType(DstReg);
3210 LLT SrcTy = MRI->getType(SrcReg);
3212 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3213 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3214 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3218 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3221 const TargetRegisterClass *SrcRC =
3222 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3223 const TargetRegisterClass *DstRC =
3224 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3225 if (!SrcRC || !DstRC)
3227 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3228 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3229 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3232 MachineBasicBlock *BB =
MI.getParent();
3240 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3244 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3247 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3251 MI.eraseFromParent();
3258 if (!STI.useVGPRIndexMode()) {
3259 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3261 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3264 MI.eraseFromParent();
3268 const MCInstrDesc &GPRIDXDesc =
3269 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3275 MI.eraseFromParent();
3280bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3287 LLT VecTy = MRI->getType(DstReg);
3288 LLT ValTy = MRI->getType(ValReg);
3292 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3293 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3294 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3300 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3303 const TargetRegisterClass *VecRC =
3304 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3305 const TargetRegisterClass *ValRC =
3306 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3308 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3309 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3310 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3311 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3314 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3318 std::tie(IdxReg,
SubReg) =
3321 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3322 STI.useVGPRIndexMode();
3324 MachineBasicBlock *BB =
MI.getParent();
3328 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3331 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3332 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3337 MI.eraseFromParent();
3341 const MCInstrDesc &GPRIDXDesc =
3342 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3349 MI.eraseFromParent();
3353bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3354 if (!Subtarget->hasVMemToLDSLoad())
3357 unsigned Size =
MI.getOperand(3).getImm();
3360 const bool HasVIndex =
MI.getNumOperands() == 9;
3364 VIndex =
MI.getOperand(4).getReg();
3368 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3369 std::optional<ValueAndVReg> MaybeVOffset =
3371 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3377 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3378 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3379 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3380 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3383 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3384 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3385 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3386 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3389 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3390 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3391 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3392 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3395 if (!Subtarget->hasLDSLoadB96_B128())
3398 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3399 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3400 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3401 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3404 if (!Subtarget->hasLDSLoadB96_B128())
3407 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3408 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3409 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3410 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3414 MachineBasicBlock *
MBB =
MI.getParent();
3417 .
add(
MI.getOperand(2));
3421 if (HasVIndex && HasVOffset) {
3422 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3423 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3430 }
else if (HasVIndex) {
3432 }
else if (HasVOffset) {
3436 MIB.
add(
MI.getOperand(1));
3437 MIB.
add(
MI.getOperand(5 + OpOffset));
3438 MIB.
add(
MI.getOperand(6 + OpOffset));
3440 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3448 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3450 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3451 MachinePointerInfo StorePtrI = LoadPtrI;
3452 StorePtrI.
V =
nullptr;
3460 MachineMemOperand *StoreMMO =
3466 MI.eraseFromParent();
3478 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3484 return Def->getOperand(1).getReg();
3498 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3506 return Def->getOperand(1).getReg();
3508 if (
VT->signBitIsZero(
Reg))
3509 return matchZeroExtendFromS32(
Reg);
3517AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3519 : matchZeroExtendFromS32(
Reg);
3525AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3527 : matchSignExtendFromS32(
Reg);
3531AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3532 bool IsSigned)
const {
3534 return matchSignExtendFromS32OrS32(
Reg);
3536 return matchZeroExtendFromS32OrS32(
Reg);
3546 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3553 return Def->getOperand(1).getReg();
3558bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3559 if (!Subtarget->hasVMemToLDSLoad())
3563 unsigned Size =
MI.getOperand(3).getImm();
3569 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3572 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3575 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3578 if (!Subtarget->hasLDSLoadB96_B128())
3580 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3583 if (!Subtarget->hasLDSLoadB96_B128())
3585 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3589 MachineBasicBlock *
MBB =
MI.getParent();
3592 .
add(
MI.getOperand(2));
3598 if (!isSGPR(Addr)) {
3600 if (isSGPR(AddrDef->Reg)) {
3601 Addr = AddrDef->Reg;
3602 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3605 if (isSGPR(SAddr)) {
3606 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3607 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3618 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3630 MIB.
add(
MI.getOperand(4))
3631 .
add(
MI.getOperand(5));
3633 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3635 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3636 MachinePointerInfo StorePtrI = LoadPtrI;
3643 MachineMemOperand *StoreMMO =
3645 sizeof(int32_t),
Align(4));
3649 MI.eraseFromParent();
3653bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3655 unsigned OpcodeOpIdx =
3656 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3657 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3658 MI.removeOperand(OpcodeOpIdx);
3659 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3665bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3668 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3669 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3671 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3672 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3674 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3675 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3677 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3678 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3680 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3681 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3683 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3684 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3686 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3687 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3689 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3690 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3692 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3693 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3695 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3696 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3698 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3699 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3701 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3702 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3704 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3705 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3707 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3708 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3710 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3711 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3713 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3714 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3716 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3717 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3719 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3720 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3722 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3723 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3725 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3726 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3728 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3729 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3731 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3732 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3734 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3735 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3737 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3738 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3740 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3741 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3743 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3744 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3746 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3747 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3749 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3750 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3756 auto VDst_In =
MI.getOperand(4);
3758 MI.setDesc(TII.get(
Opc));
3759 MI.removeOperand(4);
3760 MI.removeOperand(1);
3761 MI.addOperand(VDst_In);
3762 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3766bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3768 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3769 !Subtarget->hasPermlane16Swap())
3771 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3772 !Subtarget->hasPermlane32Swap())
3775 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3776 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3777 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3779 MI.removeOperand(2);
3780 MI.setDesc(TII.get(Opcode));
3783 MachineOperand &FI =
MI.getOperand(4);
3789bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3792 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3793 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3794 MachineBasicBlock *
MBB =
MI.getParent();
3798 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3799 .
addImm(Subtarget->getWavefrontSizeLog2())
3804 .
addImm(Subtarget->getWavefrontSizeLog2())
3808 const TargetRegisterClass &RC =
3809 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3810 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3813 MI.eraseFromParent();
3822 unsigned NumOpcodes = 0;
3835 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3846 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3860 if (Src.size() == 3) {
3867 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3868 if (Src[
I] ==
LHS) {
3878 Bits = SrcBits[Src.size()];
3884 switch (
MI->getOpcode()) {
3885 case TargetOpcode::G_AND:
3886 case TargetOpcode::G_OR:
3887 case TargetOpcode::G_XOR: {
3892 if (!getOperandBits(
LHS, LHSBits) ||
3893 !getOperandBits(
RHS, RHSBits)) {
3895 return std::make_pair(0, 0);
3901 NumOpcodes +=
Op.first;
3902 LHSBits =
Op.second;
3907 NumOpcodes +=
Op.first;
3908 RHSBits =
Op.second;
3913 return std::make_pair(0, 0);
3917 switch (
MI->getOpcode()) {
3918 case TargetOpcode::G_AND:
3919 TTbl = LHSBits & RHSBits;
3921 case TargetOpcode::G_OR:
3922 TTbl = LHSBits | RHSBits;
3924 case TargetOpcode::G_XOR:
3925 TTbl = LHSBits ^ RHSBits;
3931 return std::make_pair(NumOpcodes + 1, TTbl);
3934bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3935 if (!Subtarget->hasBitOp3Insts())
3939 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3940 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3946 unsigned NumOpcodes;
3948 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3952 if (NumOpcodes < 2 || Src.empty())
3955 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
3956 if (NumOpcodes == 2 && IsB32) {
3964 }
else if (NumOpcodes < 4) {
3971 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3972 if (!IsB32 && STI.hasTrue16BitInsts())
3973 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
3974 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
3975 unsigned CBL = STI.getConstantBusLimit(
Opc);
3976 MachineBasicBlock *
MBB =
MI.getParent();
3979 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3980 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
3981 if (RB->
getID() != AMDGPU::SGPRRegBankID)
3987 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3998 while (Src.size() < 3)
3999 Src.push_back(Src[0]);
4016 MI.eraseFromParent();
4021bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4023 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4026 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4028 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4030 MachineBasicBlock *
MBB =
MI.getParent();
4034 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4037 .
addImm(Subtarget->getWavefrontSizeLog2())
4044 MI.eraseFromParent();
4050 if (!
I.isPreISelOpcode()) {
4052 return selectCOPY(
I);
4056 switch (
I.getOpcode()) {
4057 case TargetOpcode::G_AND:
4058 case TargetOpcode::G_OR:
4059 case TargetOpcode::G_XOR:
4060 if (selectBITOP3(
I))
4064 return selectG_AND_OR_XOR(
I);
4065 case TargetOpcode::G_ADD:
4066 case TargetOpcode::G_SUB:
4067 case TargetOpcode::G_PTR_ADD:
4070 return selectG_ADD_SUB(
I);
4071 case TargetOpcode::G_UADDO:
4072 case TargetOpcode::G_USUBO:
4073 case TargetOpcode::G_UADDE:
4074 case TargetOpcode::G_USUBE:
4075 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4076 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4077 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4078 return selectG_AMDGPU_MAD_64_32(
I);
4079 case TargetOpcode::G_INTTOPTR:
4080 case TargetOpcode::G_BITCAST:
4081 case TargetOpcode::G_PTRTOINT:
4082 case TargetOpcode::G_FREEZE:
4083 return selectCOPY(
I);
4084 case TargetOpcode::G_FNEG:
4087 return selectG_FNEG(
I);
4088 case TargetOpcode::G_FABS:
4091 return selectG_FABS(
I);
4092 case TargetOpcode::G_EXTRACT:
4093 return selectG_EXTRACT(
I);
4094 case TargetOpcode::G_MERGE_VALUES:
4095 case TargetOpcode::G_CONCAT_VECTORS:
4096 return selectG_MERGE_VALUES(
I);
4097 case TargetOpcode::G_UNMERGE_VALUES:
4098 return selectG_UNMERGE_VALUES(
I);
4099 case TargetOpcode::G_BUILD_VECTOR:
4100 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4101 return selectG_BUILD_VECTOR(
I);
4102 case TargetOpcode::G_IMPLICIT_DEF:
4103 return selectG_IMPLICIT_DEF(
I);
4104 case TargetOpcode::G_INSERT:
4105 return selectG_INSERT(
I);
4106 case TargetOpcode::G_INTRINSIC:
4107 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4108 return selectG_INTRINSIC(
I);
4109 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4110 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4111 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4112 case TargetOpcode::G_ICMP:
4113 case TargetOpcode::G_FCMP:
4114 if (selectG_ICMP_or_FCMP(
I))
4117 case TargetOpcode::G_LOAD:
4118 case TargetOpcode::G_ZEXTLOAD:
4119 case TargetOpcode::G_SEXTLOAD:
4120 case TargetOpcode::G_STORE:
4121 case TargetOpcode::G_ATOMIC_CMPXCHG:
4122 case TargetOpcode::G_ATOMICRMW_XCHG:
4123 case TargetOpcode::G_ATOMICRMW_ADD:
4124 case TargetOpcode::G_ATOMICRMW_SUB:
4125 case TargetOpcode::G_ATOMICRMW_AND:
4126 case TargetOpcode::G_ATOMICRMW_OR:
4127 case TargetOpcode::G_ATOMICRMW_XOR:
4128 case TargetOpcode::G_ATOMICRMW_MIN:
4129 case TargetOpcode::G_ATOMICRMW_MAX:
4130 case TargetOpcode::G_ATOMICRMW_UMIN:
4131 case TargetOpcode::G_ATOMICRMW_UMAX:
4132 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4133 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4134 case TargetOpcode::G_ATOMICRMW_FADD:
4135 case TargetOpcode::G_ATOMICRMW_FMIN:
4136 case TargetOpcode::G_ATOMICRMW_FMAX:
4137 return selectG_LOAD_STORE_ATOMICRMW(
I);
4138 case TargetOpcode::G_SELECT:
4139 return selectG_SELECT(
I);
4140 case TargetOpcode::G_TRUNC:
4141 return selectG_TRUNC(
I);
4142 case TargetOpcode::G_SEXT:
4143 case TargetOpcode::G_ZEXT:
4144 case TargetOpcode::G_ANYEXT:
4145 case TargetOpcode::G_SEXT_INREG:
4149 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4152 return selectG_SZA_EXT(
I);
4153 case TargetOpcode::G_FPEXT:
4154 if (selectG_FPEXT(
I))
4157 case TargetOpcode::G_BRCOND:
4158 return selectG_BRCOND(
I);
4159 case TargetOpcode::G_GLOBAL_VALUE:
4160 return selectG_GLOBAL_VALUE(
I);
4161 case TargetOpcode::G_PTRMASK:
4162 return selectG_PTRMASK(
I);
4163 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4164 return selectG_EXTRACT_VECTOR_ELT(
I);
4165 case TargetOpcode::G_INSERT_VECTOR_ELT:
4166 return selectG_INSERT_VECTOR_ELT(
I);
4167 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4168 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4169 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4170 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4171 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4174 assert(Intr &&
"not an image intrinsic with image pseudo");
4175 return selectImageIntrinsic(
I, Intr);
4177 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4178 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4179 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4180 return selectBVHIntersectRayIntrinsic(
I);
4181 case AMDGPU::G_SBFX:
4182 case AMDGPU::G_UBFX:
4183 return selectG_SBFX_UBFX(
I);
4184 case AMDGPU::G_SI_CALL:
4185 I.setDesc(TII.get(AMDGPU::SI_CALL));
4187 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4188 return selectWaveAddress(
I);
4189 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4190 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4193 case AMDGPU::G_STACKRESTORE:
4194 return selectStackRestore(
I);
4196 return selectPHI(
I);
4197 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4198 return selectCOPY_SCC_VCC(
I);
4199 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4200 return selectCOPY_VCC_SCC(
I);
4201 case AMDGPU::G_AMDGPU_READANYLANE:
4202 return selectReadAnyLane(
I);
4203 case TargetOpcode::G_CONSTANT:
4204 case TargetOpcode::G_FCONSTANT:
4212AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4219std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4220 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4224 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4225 Src =
MI->getOperand(1).getReg();
4228 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4233 if (
LHS &&
LHS->isZero()) {
4235 Src =
MI->getOperand(2).getReg();
4239 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4240 Src =
MI->getOperand(1).getReg();
4247 return std::pair(Src, Mods);
4250Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4252 bool ForceVGPR)
const {
4253 if ((Mods != 0 || ForceVGPR) &&
4254 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4261 TII.
get(AMDGPU::COPY), VGPRSrc)
4273AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4275 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4280AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4283 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4286 [=](MachineInstrBuilder &MIB) {
4287 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4289 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4290 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4291 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4296AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4299 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4304 [=](MachineInstrBuilder &MIB) {
4305 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4307 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4308 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4309 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4314AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4316 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4317 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4318 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4323AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4326 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4329 [=](MachineInstrBuilder &MIB) {
4330 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4332 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4337AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4341 std::tie(Src, Mods) =
4342 selectVOP3ModsImpl(Root.
getReg(),
false);
4345 [=](MachineInstrBuilder &MIB) {
4346 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4348 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4353AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4356 std::tie(Src, Mods) =
4357 selectVOP3ModsImpl(Root.
getReg(),
true,
4361 [=](MachineInstrBuilder &MIB) {
4362 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4364 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4369AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4372 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4375 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4400 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4403 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4404 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4405 return DstSize * 2 == SrcSize;
4411 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4415 std::optional<ValueAndVReg> ShiftAmt;
4418 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4419 unsigned Shift = ShiftAmt->Value.getZExtValue();
4420 return Shift * 2 == SrcSize;
4428 if (
MI->getOpcode() != AMDGPU::G_SHL)
4432 std::optional<ValueAndVReg> ShiftAmt;
4435 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4436 unsigned Shift = ShiftAmt->Value.getZExtValue();
4437 return Shift * 2 == SrcSize;
4445 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4447 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4448 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4618static std::optional<std::pair<Register, SrcStatus>>
4623 unsigned Opc =
MI->getOpcode();
4627 case AMDGPU::G_BITCAST:
4628 return std::optional<std::pair<Register, SrcStatus>>(
4629 {
MI->getOperand(1).getReg(), Curr.second});
4631 if (
MI->getOperand(1).getReg().isPhysical())
4632 return std::nullopt;
4633 return std::optional<std::pair<Register, SrcStatus>>(
4634 {
MI->getOperand(1).getReg(), Curr.second});
4635 case AMDGPU::G_FNEG: {
4638 return std::nullopt;
4639 return std::optional<std::pair<Register, SrcStatus>>(
4640 {
MI->getOperand(1).getReg(), Stat});
4647 switch (Curr.second) {
4650 return std::optional<std::pair<Register, SrcStatus>>(
4653 if (Curr.first ==
MI->getOperand(0).getReg())
4654 return std::optional<std::pair<Register, SrcStatus>>(
4656 return std::optional<std::pair<Register, SrcStatus>>(
4668 return std::optional<std::pair<Register, SrcStatus>>(
4672 if (Curr.first ==
MI->getOperand(0).getReg())
4673 return std::optional<std::pair<Register, SrcStatus>>(
4675 return std::optional<std::pair<Register, SrcStatus>>(
4681 return std::optional<std::pair<Register, SrcStatus>>(
4686 return std::optional<std::pair<Register, SrcStatus>>(
4691 return std::optional<std::pair<Register, SrcStatus>>(
4696 return std::optional<std::pair<Register, SrcStatus>>(
4702 return std::nullopt;
4712 bool HasNeg =
false;
4714 bool HasOpsel =
true;
4719 unsigned Opc =
MI->getOpcode();
4721 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4724 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4727 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4751 while (
Depth <= MaxDepth && Curr.has_value()) {
4754 Statlist.push_back(Curr.value());
4761static std::pair<Register, SrcStatus>
4768 while (
Depth <= MaxDepth && Curr.has_value()) {
4774 LastSameOrNeg = Curr.value();
4779 return LastSameOrNeg;
4784 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4785 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4786 return Width1 == Width2;
4822 IsHalfState(HiStat);
4825std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4831 return {RootReg, Mods};
4834 SearchOptions SO(RootReg, MRI);
4845 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
4847 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4848 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
4850 return {Stat.first, Mods};
4856 if (StatlistHi.
empty()) {
4858 return {Stat.first, Mods};
4864 if (StatlistLo.
empty()) {
4866 return {Stat.first, Mods};
4869 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4870 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4871 if (StatlistHi[
I].first == StatlistLo[J].first &&
4873 StatlistHi[
I].first, RootReg, TII, MRI))
4874 return {StatlistHi[
I].first,
4875 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4881 return {Stat.first, Mods};
4891 return RB->
getID() == RBNo;
4908 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4913 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
4919 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
4922 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
4930AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
4935 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
4939 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4940 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4945AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4947 return selectVOP3PRetHelper(Root);
4951AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4953 return selectVOP3PRetHelper(Root,
true);
4957AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4960 "expected i1 value");
4966 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4974 switch (Elts.
size()) {
4976 DstRegClass = &AMDGPU::VReg_256RegClass;
4979 DstRegClass = &AMDGPU::VReg_128RegClass;
4982 DstRegClass = &AMDGPU::VReg_64RegClass;
4989 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
4990 .addDef(
MRI.createVirtualRegister(DstRegClass));
4991 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5002 if (ModOpcode == TargetOpcode::G_FNEG) {
5006 for (
auto El : Elts) {
5012 if (Elts.size() != NegAbsElts.
size()) {
5021 assert(ModOpcode == TargetOpcode::G_FABS);
5029AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5035 assert(BV->getNumSources() > 0);
5037 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5038 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5041 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5042 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5049 if (BV->getNumSources() == EltsF32.
size()) {
5055 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5056 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5060AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5066 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5074 if (CV->getNumSources() == EltsV2F16.
size()) {
5081 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5082 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5086AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5092 assert(CV->getNumSources() > 0);
5093 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5095 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5099 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5100 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5107 if (CV->getNumSources() == EltsV2F16.
size()) {
5114 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5115 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5119AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5120 std::optional<FPValueAndVReg> FPValReg;
5122 if (TII.isInlineConstant(FPValReg->Value)) {
5123 return {{[=](MachineInstrBuilder &MIB) {
5124 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5134 if (TII.isInlineConstant(ICst)) {
5144AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5150 std::optional<ValueAndVReg> ShiftAmt;
5152 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5153 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5154 Key = ShiftAmt->Value.getZExtValue() / 8;
5159 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5160 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5165AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5172 std::optional<ValueAndVReg> ShiftAmt;
5174 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5175 ShiftAmt->Value.getZExtValue() == 16) {
5181 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5182 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5187AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5194 S32 = matchAnyExtendFromS32(Src);
5198 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5203 Src =
Def->getOperand(2).getReg();
5210 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5211 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5216AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5219 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5223 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5224 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5230AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5233 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5239 [=](MachineInstrBuilder &MIB) {
5241 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5243 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5248AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5251 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5257 [=](MachineInstrBuilder &MIB) {
5259 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5261 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5268bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5270 bool IsSigned)
const {
5271 if (!Subtarget->hasScaleOffset())
5275 MachineMemOperand *MMO = *
MI.memoperands_begin();
5287 OffsetReg =
Def->Reg;
5302 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5306 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5307 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5308 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5309 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5322bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5326 bool *ScaleOffset)
const {
5328 MachineBasicBlock *
MBB =
MI->getParent();
5333 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5335 if (AddrInfo.
empty())
5338 const GEPInfo &GEPI = AddrInfo[0];
5339 std::optional<int64_t> EncodedImm;
5342 *ScaleOffset =
false;
5347 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5348 AddrInfo.
size() > 1) {
5349 const GEPInfo &GEPI2 = AddrInfo[1];
5350 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5351 Register OffsetReg = GEPI2.SgprParts[1];
5354 selectScaleOffset(Root, OffsetReg,
false );
5355 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5357 Base = GEPI2.SgprParts[0];
5358 *SOffset = OffsetReg;
5367 auto SKnown =
VT->getKnownBits(*SOffset);
5368 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5380 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5381 Base = GEPI.SgprParts[0];
5387 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5393 Base = GEPI.SgprParts[0];
5394 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5395 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5400 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5401 Register OffsetReg = GEPI.SgprParts[1];
5403 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5404 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5406 Base = GEPI.SgprParts[0];
5407 *SOffset = OffsetReg;
5416AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5419 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5421 return std::nullopt;
5423 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5424 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5428AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5430 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5432 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5433 return std::nullopt;
5435 const GEPInfo &GEPInfo = AddrInfo[0];
5436 Register PtrReg = GEPInfo.SgprParts[0];
5437 std::optional<int64_t> EncodedImm =
5440 return std::nullopt;
5443 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5444 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5449AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5452 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5454 return std::nullopt;
5457 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5458 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5459 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5463AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5467 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5468 return std::nullopt;
5471 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5472 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5474 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5477std::pair<Register, int>
5478AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5479 uint64_t FlatVariant)
const {
5484 if (!STI.hasFlatInstOffsets())
5488 int64_t ConstOffset;
5490 std::tie(PtrBase, ConstOffset, IsInBounds) =
5491 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5497 if (ConstOffset == 0 ||
5499 !isFlatScratchBaseLegal(Root.
getReg())) ||
5503 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5504 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5507 return std::pair(PtrBase, ConstOffset);
5511AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5515 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5516 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5521AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5525 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5526 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5531AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5535 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5536 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5542AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5544 bool NeedIOffset)
const {
5547 int64_t ConstOffset;
5548 int64_t ImmOffset = 0;
5552 std::tie(PtrBase, ConstOffset, std::ignore) =
5553 getPtrBaseWithConstantOffset(Addr, *MRI);
5555 if (ConstOffset != 0) {
5560 ImmOffset = ConstOffset;
5563 if (isSGPR(PtrBaseDef->Reg)) {
5564 if (ConstOffset > 0) {
5570 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5572 std::tie(SplitImmOffset, RemainderOffset) =
5577 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5580 MachineBasicBlock *
MBB =
MI->getParent();
5582 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5584 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5586 .
addImm(RemainderOffset);
5590 [=](MachineInstrBuilder &MIB) {
5593 [=](MachineInstrBuilder &MIB) {
5596 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5597 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5600 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5601 [=](MachineInstrBuilder &MIB) {
5604 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5614 unsigned NumLiterals =
5615 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5616 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5617 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5618 return std::nullopt;
5625 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5630 if (isSGPR(SAddr)) {
5631 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5635 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5636 Subtarget->hasSignedGVSOffset());
5637 if (
Register VOffset = matchExtendFromS32OrS32(
5638 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5640 return {{[=](MachineInstrBuilder &MIB) {
5643 [=](MachineInstrBuilder &MIB) {
5646 [=](MachineInstrBuilder &MIB) {
5649 [=](MachineInstrBuilder &MIB) {
5653 return {{[=](MachineInstrBuilder &MIB) {
5656 [=](MachineInstrBuilder &MIB) {
5659 [=](MachineInstrBuilder &MIB) {
5669 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5670 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5671 return std::nullopt;
5676 MachineBasicBlock *
MBB =
MI->getParent();
5677 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5679 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5684 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5685 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5686 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5687 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5690 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5691 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5692 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5697AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5698 return selectGlobalSAddr(Root, 0);
5702AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5708 return selectGlobalSAddr(Root, PassedCPol);
5712AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5718 return selectGlobalSAddr(Root, PassedCPol);
5722AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5727AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5734 return selectGlobalSAddr(Root, PassedCPol,
false);
5738AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5745 return selectGlobalSAddr(Root, PassedCPol,
false);
5749AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5752 int64_t ConstOffset;
5753 int64_t ImmOffset = 0;
5757 std::tie(PtrBase, ConstOffset, std::ignore) =
5758 getPtrBaseWithConstantOffset(Addr, *MRI);
5760 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5764 ImmOffset = ConstOffset;
5768 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5769 int FI = AddrDef->MI->getOperand(1).
getIndex();
5772 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5778 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5779 Register LHS = AddrDef->MI->getOperand(1).getReg();
5780 Register RHS = AddrDef->MI->getOperand(2).getReg();
5784 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5785 isSGPR(RHSDef->Reg)) {
5786 int FI = LHSDef->MI->getOperand(1).getIndex();
5790 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5792 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5800 return std::nullopt;
5803 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
5804 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5809bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5811 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
5817 auto VKnown =
VT->getKnownBits(VAddr);
5820 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5821 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
5822 return (VMax & 3) + (
SMax & 3) >= 4;
5826AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5829 int64_t ConstOffset;
5830 int64_t ImmOffset = 0;
5834 std::tie(PtrBase, ConstOffset, std::ignore) =
5835 getPtrBaseWithConstantOffset(Addr, *MRI);
5838 if (ConstOffset != 0 &&
5842 ImmOffset = ConstOffset;
5846 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5847 return std::nullopt;
5849 Register RHS = AddrDef->MI->getOperand(2).getReg();
5850 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
5851 return std::nullopt;
5853 Register LHS = AddrDef->MI->getOperand(1).getReg();
5856 if (OrigAddr != Addr) {
5857 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5858 return std::nullopt;
5860 if (!isFlatScratchBaseLegalSV(OrigAddr))
5861 return std::nullopt;
5864 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
5865 return std::nullopt;
5867 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
5871 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5872 int FI = LHSDef->MI->getOperand(1).getIndex();
5874 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5876 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5877 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5886 return std::nullopt;
5889 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5890 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
5891 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5892 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5897AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5899 MachineBasicBlock *
MBB =
MI->getParent();
5901 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
5906 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5911 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5915 return {{[=](MachineInstrBuilder &MIB) {
5918 [=](MachineInstrBuilder &MIB) {
5921 [=](MachineInstrBuilder &MIB) {
5926 [=](MachineInstrBuilder &MIB) {
5935 std::optional<int> FI;
5938 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
5940 int64_t ConstOffset;
5941 std::tie(PtrBase, ConstOffset, std::ignore) =
5942 getPtrBaseWithConstantOffset(VAddr, *MRI);
5943 if (ConstOffset != 0) {
5944 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
5945 (!STI.privateMemoryResourceIsRangeChecked() ||
5946 VT->signBitIsZero(PtrBase))) {
5947 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
5948 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5954 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5958 return {{[=](MachineInstrBuilder &MIB) {
5961 [=](MachineInstrBuilder &MIB) {
5967 [=](MachineInstrBuilder &MIB) {
5972 [=](MachineInstrBuilder &MIB) {
5977bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
5982 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
5987 return VT->signBitIsZero(
Base);
5990bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
5992 unsigned Size)
const {
5993 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
5998 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6003 return VT->signBitIsZero(
Base);
6008 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6009 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6016bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6024 if (STI.hasSignedScratchOffsets())
6030 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6031 std::optional<ValueAndVReg> RhsValReg =
6037 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6038 RhsValReg->Value.getSExtValue() > -0x40000000)
6042 return VT->signBitIsZero(
LHS);
6047bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6055 if (STI.hasSignedScratchOffsets())
6060 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6065bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6069 if (STI.hasSignedScratchOffsets())
6074 std::optional<DefinitionAndSourceRegister> BaseDef =
6076 std::optional<ValueAndVReg> RHSOffset =
6086 (RHSOffset->Value.getSExtValue() < 0 &&
6087 RHSOffset->Value.getSExtValue() > -0x40000000)))
6090 Register LHS = BaseDef->MI->getOperand(1).getReg();
6091 Register RHS = BaseDef->MI->getOperand(2).getReg();
6092 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6095bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6096 unsigned ShAmtBits)
const {
6097 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6099 std::optional<APInt>
RHS =
6104 if (
RHS->countr_one() >= ShAmtBits)
6107 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6108 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6112AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6115 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6117 std::optional<DefinitionAndSourceRegister>
Def =
6119 assert(Def &&
"this shouldn't be an optional result");
6124 [=](MachineInstrBuilder &MIB) {
6127 [=](MachineInstrBuilder &MIB) {
6130 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6141 if (!TII.isLegalMUBUFImmOffset(
Offset))
6149 [=](MachineInstrBuilder &MIB) {
6152 [=](MachineInstrBuilder &MIB) {
6160 !TII.isLegalMUBUFImmOffset(
Offset))
6164 [=](MachineInstrBuilder &MIB) {
6167 [=](MachineInstrBuilder &MIB) {
6174std::pair<Register, unsigned>
6175AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6176 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6177 int64_t ConstAddr = 0;
6181 std::tie(PtrBase,
Offset, std::ignore) =
6182 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6185 if (isDSOffsetLegal(PtrBase,
Offset)) {
6187 return std::pair(PtrBase,
Offset);
6189 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6198 return std::pair(Root.
getReg(), 0);
6202AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6205 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6207 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6213AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6214 return selectDSReadWrite2(Root, 4);
6218AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6219 return selectDSReadWrite2(Root, 8);
6223AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6224 unsigned Size)
const {
6229 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6231 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6235std::pair<Register, unsigned>
6236AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6237 unsigned Size)
const {
6238 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6239 int64_t ConstAddr = 0;
6243 std::tie(PtrBase,
Offset, std::ignore) =
6244 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6247 int64_t OffsetValue0 =
Offset;
6249 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6251 return std::pair(PtrBase, OffsetValue0 /
Size);
6253 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6261 return std::pair(Root.
getReg(), 0);
6269std::tuple<Register, int64_t, bool>
6270AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6273 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6274 return {Root, 0,
false};
6277 std::optional<ValueAndVReg> MaybeOffset =
6280 return {Root, 0,
false};
6295 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6296 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6297 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6298 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6300 B.buildInstr(AMDGPU::S_MOV_B32)
6303 B.buildInstr(AMDGPU::S_MOV_B32)
6310 B.buildInstr(AMDGPU::REG_SEQUENCE)
6313 .addImm(AMDGPU::sub0)
6315 .addImm(AMDGPU::sub1);
6319 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6320 B.buildInstr(AMDGPU::S_MOV_B64)
6325 B.buildInstr(AMDGPU::REG_SEQUENCE)
6328 .addImm(AMDGPU::sub0_sub1)
6330 .addImm(AMDGPU::sub2_sub3);
6337 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6346 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6353AMDGPUInstructionSelector::MUBUFAddressData
6354AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6355 MUBUFAddressData
Data;
6361 std::tie(PtrBase,
Offset, std::ignore) =
6362 getPtrBaseWithConstantOffset(Src, *MRI);
6368 if (MachineInstr *InputAdd
6370 Data.N2 = InputAdd->getOperand(1).getReg();
6371 Data.N3 = InputAdd->getOperand(2).getReg();
6386bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6392 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6393 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6399void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6401 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6405 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6406 B.buildInstr(AMDGPU::S_MOV_B32)
6412bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6417 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6420 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6421 if (!shouldUseAddr64(AddrData))
6427 Offset = AddrData.Offset;
6433 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6435 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6448 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6459 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6463bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6468 if (STI.useFlatForGlobal())
6471 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6472 if (shouldUseAddr64(AddrData))
6478 Offset = AddrData.Offset;
6484 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6489AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6495 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6501 [=](MachineInstrBuilder &MIB) {
6504 [=](MachineInstrBuilder &MIB) {
6507 [=](MachineInstrBuilder &MIB) {
6510 else if (STI.hasRestrictedSOffset())
6511 MIB.
addReg(AMDGPU::SGPR_NULL);
6515 [=](MachineInstrBuilder &MIB) {
6525AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6530 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6534 [=](MachineInstrBuilder &MIB) {
6537 [=](MachineInstrBuilder &MIB) {
6540 else if (STI.hasRestrictedSOffset())
6541 MIB.
addReg(AMDGPU::SGPR_NULL);
6553AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6558 SOffset = AMDGPU::SGPR_NULL;
6560 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6564static std::optional<uint64_t>
6568 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6569 return std::nullopt;
6570 return Lo_32(*OffsetVal);
6574AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6575 std::optional<uint64_t> OffsetVal =
6580 std::optional<int64_t> EncodedImm =
6585 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6589AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6596 std::optional<int64_t> EncodedImm =
6601 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6605AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6613 return std::nullopt;
6615 std::optional<int64_t> EncodedOffset =
6618 return std::nullopt;
6621 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6622 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6625std::pair<Register, unsigned>
6626AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6627 bool &Matched)
const {
6632 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6642 const auto CheckAbsNeg = [&]() {
6647 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6678AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6683 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6688 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6689 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6694AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6698 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6701 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6702 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6706bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6710 Register CCReg =
I.getOperand(0).getReg();
6715 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6716 .
addImm(
I.getOperand(2).getImm());
6720 I.eraseFromParent();
6721 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6725bool AMDGPUInstructionSelector::selectSGetBarrierState(
6729 MachineOperand BarOp =
I.getOperand(2);
6730 std::optional<int64_t> BarValImm =
6734 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6738 MachineInstrBuilder MIB;
6739 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6740 : AMDGPU::S_GET_BARRIER_STATE_M0;
6743 auto DstReg =
I.getOperand(0).getReg();
6744 const TargetRegisterClass *DstRC =
6745 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6746 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6752 I.eraseFromParent();
6757 if (HasInlineConst) {
6761 case Intrinsic::amdgcn_s_barrier_join:
6762 return AMDGPU::S_BARRIER_JOIN_IMM;
6763 case Intrinsic::amdgcn_s_get_named_barrier_state:
6764 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6770 case Intrinsic::amdgcn_s_barrier_join:
6771 return AMDGPU::S_BARRIER_JOIN_M0;
6772 case Intrinsic::amdgcn_s_get_named_barrier_state:
6773 return AMDGPU::S_GET_BARRIER_STATE_M0;
6778bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6782 MachineOperand BarOp =
I.getOperand(1);
6783 MachineOperand CntOp =
I.getOperand(2);
6786 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6792 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6799 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6805 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6806 constexpr unsigned ShAmt = 16;
6812 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6822 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6823 ? AMDGPU::S_BARRIER_INIT_M0
6824 : AMDGPU::S_BARRIER_SIGNAL_M0;
6825 MachineInstrBuilder MIB;
6828 I.eraseFromParent();
6832bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6836 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6839 std::optional<int64_t> BarValImm =
6844 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6850 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6856 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6861 MachineInstrBuilder MIB;
6865 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6866 auto DstReg =
I.getOperand(0).getReg();
6867 const TargetRegisterClass *DstRC =
6868 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6869 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6875 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6879 I.eraseFromParent();
6886 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6887 "Expected G_CONSTANT");
6888 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6894 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6895 "Expected G_CONSTANT");
6896 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6902 const MachineOperand &
Op =
MI.getOperand(1);
6903 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6904 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6910 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6911 "Expected G_CONSTANT");
6912 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6920 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
6937 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6941void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6943 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6948void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6950 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6956void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6958 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6963void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6965 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6971void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6973 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6978void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6980 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6985void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6987 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6992void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
6994 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7003 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7012 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7019void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7021 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7022 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7037 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7039 assert(ExpVal != INT_MIN);
7057 if (
MI.getOperand(
OpIdx).getImm())
7059 MIB.
addImm((int64_t)Mods);
7066 if (
MI.getOperand(
OpIdx).getImm())
7068 MIB.
addImm((int64_t)Mods);
7074 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7082 MIB.
addImm((int64_t)Mods);
7088 uint32_t
V =
MI.getOperand(2).getImm();
7091 if (!Subtarget->hasSafeCUPrefetch())
7097void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7099 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7108bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7109 return TII.isInlineConstant(Imm);
7112bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7113 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.