29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
135 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
140#ifdef EXPENSIVE_CHECKS
145 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
166bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
193 case ISD::FNEARBYINT:
194 case ISD::FROUNDEVEN:
230#ifdef EXPENSIVE_CHECKS
234 assert(L->isLCSSAForm(DT));
243#ifdef EXPENSIVE_CHECKS
251 assert(Subtarget->d16PreservesUnusedBits());
252 MVT VT =
N->getValueType(0).getSimpleVT();
253 if (VT != MVT::v2i16 && VT != MVT::v2f16)
284 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
297 if (LdLo &&
Lo.hasOneUse()) {
311 TiedIn =
CurDAG->getNode(ISD::BITCAST,
SDLoc(
N), VT, TiedIn);
318 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
331 if (!Subtarget->d16PreservesUnusedBits())
336 bool MadeChange =
false;
337 while (Position !=
CurDAG->allnodes_begin()) {
342 switch (
N->getOpcode()) {
353 CurDAG->RemoveDeadNodes();
359bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
365 return TII->isInlineConstant(
C->getAPIntValue());
368 return TII->isInlineConstant(
C->getValueAPF());
378 unsigned OpNo)
const {
379 if (!
N->isMachineOpcode()) {
382 if (
Reg.isVirtual()) {
384 return MRI.getRegClass(
Reg);
387 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
388 return TRI->getPhysRegBaseClass(
Reg);
394 switch (
N->getMachineOpcode()) {
396 const MCInstrDesc &
Desc =
397 Subtarget->getInstrInfo()->get(
N->getMachineOpcode());
398 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
401 int RegClass =
Desc.operands()[
OpIdx].RegClass;
405 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
407 case AMDGPU::REG_SEQUENCE: {
408 unsigned RCID =
N->getConstantOperandVal(0);
409 const TargetRegisterClass *SuperRC =
410 Subtarget->getRegisterInfo()->getRegClass(RCID);
412 SDValue SubRegOp =
N->getOperand(OpNo + 1);
414 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
423 Ops.push_back(NewChain);
424 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
425 Ops.push_back(
N->getOperand(i));
428 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
435 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
438 return glueCopyToOp(
N,
M0,
M0.getValue(1));
441SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
444 if (Subtarget->ldsRequiresM0Init())
446 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
448 MachineFunction &
MF =
CurDAG->getMachineFunction();
449 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
451 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
458 SDNode *
Lo =
CurDAG->getMachineNode(
459 AMDGPU::S_MOV_B32,
DL, MVT::i32,
461 SDNode *
Hi =
CurDAG->getMachineNode(
462 AMDGPU::S_MOV_B32,
DL, MVT::i32,
465 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
469 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
473 EVT VT =
N->getValueType(0);
477 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
479 if (NumVectorElts == 1) {
480 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
485 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
486 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
489 bool AllConst =
true;
491 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
499 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
502 C |= Val << (EltSize *
I);
507 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
508 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
514 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
521 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
522 bool IsRegSeq =
true;
523 unsigned NOps =
N->getNumOperands();
524 for (
unsigned i = 0; i < NOps; i++) {
532 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
533 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
535 if (NOps != NumVectorElts) {
540 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
543 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
544 RegSeqArgs[1 + (2 * i) + 1] =
551 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
555 EVT VT =
N->getValueType(0);
559 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
573 Mask[0] < 4 && Mask[1] < 4);
575 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
576 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
577 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
578 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
581 Src0SubReg = Src1SubReg;
583 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
588 Src1SubReg = Src0SubReg;
590 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
600 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
601 Src1SubReg == AMDGPU::sub0) {
617 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
618 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
621 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
622 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
632 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
634 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
637 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
638 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
639 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
640 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
644 unsigned int Opc =
N->getOpcode();
645 if (
N->isMachineOpcode()) {
653 N = glueCopyToM0LDSInit(
N);
668 if (
N->getValueType(0) != MVT::i64)
671 SelectADD_SUB_I64(
N);
676 if (
N->getValueType(0) != MVT::i32)
683 SelectUADDO_USUBO(
N);
687 SelectFMUL_W_CHAIN(
N);
691 SelectFMA_W_CHAIN(
N);
697 EVT VT =
N->getValueType(0);
711 unsigned RegClassID =
722 if (
N->getValueType(0) == MVT::i128) {
723 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
724 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
725 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
726 }
else if (
N->getValueType(0) == MVT::i64) {
727 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
728 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
729 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
733 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
734 N->getOperand(1), SubReg1 };
736 N->getValueType(0),
Ops));
742 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
743 Subtarget->has64BitLiterals())
748 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
753 Imm =
C->getZExtValue();
800 return SelectMUL_LOHI(
N);
811 if (
N->getValueType(0) != MVT::i32)
828 if (
N->getValueType(0) == MVT::i32) {
831 { N->getOperand(0), N->getOperand(1) });
839 SelectINTRINSIC_W_CHAIN(
N);
843 SelectINTRINSIC_WO_CHAIN(
N);
847 SelectINTRINSIC_VOID(
N);
851 SelectWAVE_ADDRESS(
N);
854 case ISD::STACKRESTORE: {
855 SelectSTACKRESTORE(
N);
863bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
866 return Term->getMetadata(
"amdgpu.uniform") ||
867 Term->getMetadata(
"structurizecfg.uniform");
870bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
871 unsigned ShAmtBits)
const {
874 const APInt &
RHS =
N->getConstantOperandAPInt(1);
875 if (
RHS.countr_one() >= ShAmtBits)
905 N1 =
Lo.getOperand(1);
915 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
930 return "AMDGPU DAG->DAG Pattern Instruction Selection";
940#ifdef EXPENSIVE_CHECKS
946 for (
auto &L : LI.getLoopsInPreorder())
947 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
971 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
985SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
987 SDNode *Mov =
CurDAG->getMachineNode(
988 AMDGPU::S_MOV_B32,
DL, MVT::i32,
989 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
994void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
999 unsigned Opcode =
N->getOpcode();
1008 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1009 DL, MVT::i32,
LHS, Sub0);
1010 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1011 DL, MVT::i32,
LHS, Sub1);
1013 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1014 DL, MVT::i32,
RHS, Sub0);
1015 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1016 DL, MVT::i32,
RHS, Sub1);
1018 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1020 static const unsigned OpcMap[2][2][2] = {
1021 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1022 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1023 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1024 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1026 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1027 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1030 if (!ConsumeCarry) {
1032 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1035 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1042 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1045 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1052 MVT::i64, RegSequenceArgs);
1063void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1068 if (
N->isDivergent()) {
1070 : AMDGPU::V_SUBB_U32_e64;
1072 N,
Opc,
N->getVTList(),
1074 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1077 : AMDGPU::S_SUB_CO_PSEUDO;
1078 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1082void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1087 bool IsVALU =
N->isDivergent();
1089 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1091 if (UI.getUse().getResNo() == 1) {
1100 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1103 N,
Opc,
N->getVTList(),
1104 {N->getOperand(0), N->getOperand(1),
1105 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1107 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1108 : AMDGPU::S_USUBO_PSEUDO;
1110 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1111 {N->getOperand(0), N->getOperand(1)});
1115void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1119 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1120 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1121 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1122 Ops[8] =
N->getOperand(0);
1123 Ops[9] =
N->getOperand(4);
1127 bool UseFMAC = Subtarget->hasDLInsts() &&
1131 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1132 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1135void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1139 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1140 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1141 Ops[6] =
N->getOperand(0);
1142 Ops[7] =
N->getOperand(3);
1144 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1149void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1150 EVT VT =
N->getValueType(0);
1152 assert(VT == MVT::f32 || VT == MVT::f64);
1155 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1160 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1161 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1162 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1168void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1172 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && !
N->hasAnyUseOfValue(1);
1173 if (Subtarget->hasMADIntraFwdBug())
1174 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1175 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1176 else if (UseNoCarry)
1177 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1179 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1182 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1186 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1197void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1202 if (Subtarget->hasMadU64U32NoCarry()) {
1203 VTList =
CurDAG->getVTList(MVT::i64);
1204 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1206 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1207 if (Subtarget->hasMADIntraFwdBug()) {
1208 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1209 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1211 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1218 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1220 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1221 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1222 MVT::i32,
SDValue(Mad, 0), Sub0);
1226 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1227 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1228 MVT::i32,
SDValue(Mad, 0), Sub1);
1238 if (!
Base || Subtarget->hasUsableDSOffset() ||
1239 Subtarget->unsafeDSOffsetFoldingEnabled())
1250 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1263 int64_t ByteOffset =
C->getSExtValue();
1264 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1273 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1279 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1280 if (Subtarget->hasAddNoCarry()) {
1281 SubOp = AMDGPU::V_SUB_U32_e64;
1283 CurDAG->getTargetConstant(0, {}, MVT::i1));
1286 MachineSDNode *MachineSub =
1287 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1303 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1305 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1306 DL, MVT::i32, Zero);
1308 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1315 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1319bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1321 unsigned Size)
const {
1322 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1327 if (!
Base || Subtarget->hasUsableDSOffset() ||
1328 Subtarget->unsafeDSOffsetFoldingEnabled())
1346bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1352 if (Subtarget->hasSignedScratchOffsets())
1362 ConstantSDNode *ImmOp =
nullptr;
1373bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1379 if (Subtarget->hasSignedScratchOffsets())
1389bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1403 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1406 auto LHS =
Base.getOperand(0);
1407 auto RHS =
Base.getOperand(1);
1415 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1421 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1426 unsigned Size)
const {
1429 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1434 unsigned OffsetValue1 = OffsetValue0 +
Size;
1437 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1439 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1440 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1445 if (
const ConstantSDNode *
C =
1447 unsigned OffsetValue0 =
C->getZExtValue();
1448 unsigned OffsetValue1 = OffsetValue0 +
Size;
1450 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1460 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1464 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1465 if (Subtarget->hasAddNoCarry()) {
1466 SubOp = AMDGPU::V_SUB_U32_e64;
1468 CurDAG->getTargetConstant(0, {}, MVT::i1));
1471 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1476 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1478 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1484 unsigned OffsetValue0 = CAddr->getZExtValue();
1485 unsigned OffsetValue1 = OffsetValue0 +
Size;
1487 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1489 MachineSDNode *MovZero =
1490 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1492 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1493 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1501 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1502 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1512 if (Subtarget->useFlatForGlobal())
1517 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1518 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1519 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1520 SOffset = Subtarget->hasRestrictedSOffset()
1521 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1522 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1524 ConstantSDNode *C1 =
nullptr;
1526 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1539 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1563 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1567 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1577 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1588 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1594bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1601 if (!Subtarget->hasAddr64())
1604 if (!SelectMUBUF(Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1608 if (
C->getSExtValue()) {
1621std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1626 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1632 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1635bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1641 MachineFunction &
MF =
CurDAG->getMachineFunction();
1642 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1644 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1647 int64_t
Imm = CAddr->getSExtValue();
1648 const int64_t NullPtr =
1651 if (Imm != NullPtr) {
1654 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1655 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1656 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1657 VAddr =
SDValue(MovHighBits, 0);
1659 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1660 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1665 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1686 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1687 if (
TII->isLegalMUBUFImmOffset(C1) &&
1688 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1689 CurDAG->SignBitIsZero(N0))) {
1690 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1691 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1697 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1698 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1706 if (!
Reg.isPhysical())
1708 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1709 return RC &&
TRI.isSGPRClass(RC);
1712bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1717 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1718 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1719 MachineFunction &
MF =
CurDAG->getMachineFunction();
1720 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1725 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1731 ConstantSDNode *CAddr;
1744 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1749 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1755bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1759 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1761 if (!SelectMUBUF(Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1767 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1780bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1782 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1783 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1787 SOffset = ByteOffsetNode;
1805bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
SDNode *
N,
SDValue Addr,
1807 uint64_t FlatVariant)
const {
1808 int64_t OffsetVal = 0;
1812 bool CanHaveFlatSegmentOffsetBug =
1813 Subtarget->hasFlatSegmentOffsetBug() &&
1817 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1819 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1821 isFlatScratchBaseLegal(Addr))) {
1824 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1825 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1827 OffsetVal = COffsetVal;
1840 uint64_t RemainderOffset;
1842 std::tie(OffsetVal, RemainderOffset) =
1843 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1846 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1853 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1854 if (Subtarget->hasAddNoCarry()) {
1855 AddOp = AMDGPU::V_ADD_U32_e64;
1858 Addr =
SDValue(
CurDAG->getMachineNode(AddOp,
DL, MVT::i32, Opnds), 0);
1865 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1866 DL, MVT::i32, N0, Sub0);
1867 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1868 DL, MVT::i32, N0, Sub1);
1871 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1873 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1876 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1877 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1879 SDNode *Addc =
CurDAG->getMachineNode(
1880 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1884 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL, MVT::i32),
1888 MVT::i64, RegSequenceArgs),
1896 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1900bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1906bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1912bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1915 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1923 if (
Op.getValueType() == MVT::i32)
1938bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
1941 bool NeedIOffset)
const {
1942 int64_t ImmOffset = 0;
1943 ScaleOffset =
false;
1949 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
1951 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1957 ImmOffset = COffsetVal;
1958 }
else if (!
LHS->isDivergent()) {
1959 if (COffsetVal > 0) {
1964 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
1966 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1970 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
1972 SDNode *VMov =
CurDAG->getMachineNode(
1973 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1974 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1977 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
1987 unsigned NumLiterals =
1988 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
1989 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
1990 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1999 if (!
LHS->isDivergent()) {
2002 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2004 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2011 if (!SAddr && !
RHS->isDivergent()) {
2013 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2015 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2022 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2027 if (Subtarget->hasScaleOffset() &&
2028 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2043 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2056 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2057 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2059 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2063bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2068 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2076bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2081 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2086 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2092bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2098 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2103 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2109bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2114 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2118 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2122bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2128 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2134 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2140bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2146 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2167 FI->getValueType(0));
2177bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2185 int64_t COffsetVal = 0;
2187 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2196 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2200 int64_t SplitImmOffset, RemainderOffset;
2201 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2204 COffsetVal = SplitImmOffset;
2208 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2209 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2210 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2215 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2221bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2223 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2229 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2236 return (VMax & 3) + (
SMax & 3) >= 4;
2239bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2243 int64_t ImmOffset = 0;
2247 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2249 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2254 ImmOffset = COffsetVal;
2255 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2259 int64_t SplitImmOffset, RemainderOffset;
2260 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2264 SDNode *VMov =
CurDAG->getMachineNode(
2265 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2266 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2269 if (!isFlatScratchBaseLegal(Addr))
2271 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2273 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2274 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2286 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2289 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2296 if (OrigAddr != Addr) {
2297 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2300 if (!isFlatScratchBaseLegalSV(OrigAddr))
2304 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2307 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2309 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2318bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2321 int64_t ImmOffset)
const {
2322 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2324 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2336 bool IsSigned)
const {
2337 bool ScaleOffset =
false;
2338 if (!Subtarget->hasScaleOffset() || !
Offset)
2354 (
Offset.isMachineOpcode() &&
2355 Offset.getMachineOpcode() ==
2356 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2357 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2359 ScaleOffset =
C->getZExtValue() ==
Size;
2371bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2373 bool Imm32Only,
bool IsBuffer,
2374 bool HasSOffset, int64_t ImmOffset,
2375 bool *ScaleOffset)
const {
2377 "Cannot match both soffset and offset at the same time!");
2382 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2392 *SOffset = ByteOffsetNode;
2393 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2399 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2406 SDLoc SL(ByteOffsetNode);
2410 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2412 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2413 if (EncodedOffset &&
Offset && !Imm32Only) {
2414 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2423 if (EncodedOffset &&
Offset && Imm32Only) {
2424 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2432 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2434 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2441SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2448 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2449 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2450 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2451 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2454 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2456 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2457 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2459 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2462 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2469bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2472 bool IsBuffer,
bool HasSOffset,
2474 bool *ScaleOffset)
const {
2476 assert(!Imm32Only && !IsBuffer);
2479 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2484 ImmOff =
C->getSExtValue();
2486 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2487 true, ImmOff, ScaleOffset);
2507 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2508 ImmOffset, ScaleOffset)) {
2512 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2513 ImmOffset, ScaleOffset)) {
2522 bool Imm32Only,
bool *ScaleOffset)
const {
2523 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2526 SBase = Expand32BitAddress(SBase);
2531 SBase = Expand32BitAddress(Addr);
2532 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2539bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2541 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2545bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2548 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2555 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2556 false, &ScaleOffset))
2560 SDLoc(
N), MVT::i32);
2564bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2569 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2573 SDLoc(
N), MVT::i32);
2578 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2582bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2585 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2589bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2593 return N.getValueType() == MVT::i32 &&
2594 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2599bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2604 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2629SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2633 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2637 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2639 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2643 uint32_t PackedVal =
Offset | (Width << 16);
2644 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2646 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2649void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2654 const SDValue &Shl =
N->getOperand(0);
2659 uint32_t BVal =
B->getZExtValue();
2660 uint32_t CVal =
C->getZExtValue();
2662 if (0 < BVal && BVal <= CVal && CVal < 32) {
2672void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2673 switch (
N->getOpcode()) {
2675 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2678 const SDValue &Srl =
N->getOperand(0);
2682 if (Shift && Mask) {
2684 uint32_t MaskVal =
Mask->getZExtValue();
2696 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2703 if (Shift && Mask) {
2705 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2714 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2715 SelectS_BFEFromShifts(
N);
2720 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2721 SelectS_BFEFromShifts(
N);
2736 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2746bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2747 assert(
N->getOpcode() == ISD::BRCOND);
2748 if (!
N->hasOneUse())
2758 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2762 if (VT == MVT::i64) {
2765 Subtarget->hasScalarCompareEq64();
2768 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2789 auto Cond = VCMP.getOperand(0);
2801void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2804 if (
Cond.isUndef()) {
2805 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2806 N->getOperand(2),
N->getOperand(0));
2810 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2812 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2813 bool AndExec = !UseSCCBr;
2814 bool Negate =
false;
2823 VCMP.getValueType().getSizeInBits() == Subtarget->getWavefrontSize()) {
2832 bool NegatedBallot =
false;
2835 UseSCCBr = !BallotCond->isDivergent();
2836 Negate = Negate ^ NegatedBallot;
2851 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2852 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2853 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2872 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2874 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2882 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2887void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2888 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2889 !
N->isDivergent()) {
2891 if (Src.getValueType() == MVT::f16) {
2893 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2903void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2906 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2907 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2912 MachineMemOperand *MMO =
M->getMemOperand();
2916 if (
CurDAG->isBaseWithConstantOffset(
Ptr)) {
2921 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2922 N = glueCopyToM0(
N, PtrBase);
2923 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2928 N = glueCopyToM0(
N,
Ptr);
2929 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2934 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2939 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
2945void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
2948 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2949 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2950 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2952 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2953 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2955 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2956 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2959 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2960 N->getOperand(5),
N->getOperand(0)};
2963 MachineMemOperand *MMO =
M->getMemOperand();
2964 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
2970 case Intrinsic::amdgcn_ds_gws_init:
2971 return AMDGPU::DS_GWS_INIT;
2972 case Intrinsic::amdgcn_ds_gws_barrier:
2973 return AMDGPU::DS_GWS_BARRIER;
2974 case Intrinsic::amdgcn_ds_gws_sema_v:
2975 return AMDGPU::DS_GWS_SEMA_V;
2976 case Intrinsic::amdgcn_ds_gws_sema_br:
2977 return AMDGPU::DS_GWS_SEMA_BR;
2978 case Intrinsic::amdgcn_ds_gws_sema_p:
2979 return AMDGPU::DS_GWS_SEMA_P;
2980 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2981 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2987void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2988 if (!Subtarget->hasGWS() ||
2989 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2990 !Subtarget->hasGWSSemaReleaseAll())) {
2997 const bool HasVSrc =
N->getNumOperands() == 4;
2998 assert(HasVSrc ||
N->getNumOperands() == 3);
3001 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3004 MachineMemOperand *MMO =
M->getMemOperand();
3017 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3018 ImmOffset = ConstOffset->getZExtValue();
3020 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3029 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3033 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3035 CurDAG->getTargetConstant(16, SL, MVT::i32));
3036 glueCopyToM0(
N,
SDValue(M0Base, 0));
3040 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3045 Ops.push_back(
N->getOperand(2));
3046 Ops.push_back(OffsetField);
3047 Ops.push_back(Chain);
3049 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3053void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3054 if (Subtarget->getLDSBankCount() != 16) {
3084 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3087 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3088 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3094 SDNode *InterpP1LV =
3095 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3096 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3100 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3103 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3104 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3111void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3112 unsigned IntrID =
N->getConstantOperandVal(1);
3114 case Intrinsic::amdgcn_ds_append:
3115 case Intrinsic::amdgcn_ds_consume: {
3116 if (
N->getValueType(0) != MVT::i32)
3118 SelectDSAppendConsume(
N, IntrID);
3121 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3122 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3123 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3124 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3125 SelectDSBvhStackIntrinsic(
N, IntrID);
3127 case Intrinsic::amdgcn_init_whole_wave:
3128 CurDAG->getMachineFunction()
3129 .getInfo<SIMachineFunctionInfo>()
3130 ->setInitWholeWave();
3137void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3138 unsigned IntrID =
N->getConstantOperandVal(0);
3139 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3140 SDNode *ConvGlueNode =
N->getGluedNode();
3146 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3147 MVT::Glue,
SDValue(ConvGlueNode, 0));
3149 ConvGlueNode =
nullptr;
3152 case Intrinsic::amdgcn_wqm:
3153 Opcode = AMDGPU::WQM;
3155 case Intrinsic::amdgcn_softwqm:
3156 Opcode = AMDGPU::SOFT_WQM;
3158 case Intrinsic::amdgcn_wwm:
3159 case Intrinsic::amdgcn_strict_wwm:
3160 Opcode = AMDGPU::STRICT_WWM;
3162 case Intrinsic::amdgcn_strict_wqm:
3163 Opcode = AMDGPU::STRICT_WQM;
3165 case Intrinsic::amdgcn_interp_p1_f16:
3166 SelectInterpP1F16(
N);
3168 case Intrinsic::amdgcn_permlane16_swap:
3169 case Intrinsic::amdgcn_permlane32_swap: {
3170 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3171 !Subtarget->hasPermlane16Swap()) ||
3172 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3173 !Subtarget->hasPermlane32Swap())) {
3178 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3179 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3180 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3184 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3186 bool FI =
N->getConstantOperandVal(3);
3187 NewOps[2] =
CurDAG->getTargetConstant(
3190 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3198 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3200 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3205 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3206 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3210void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3211 unsigned IntrID =
N->getConstantOperandVal(1);
3213 case Intrinsic::amdgcn_ds_gws_init:
3214 case Intrinsic::amdgcn_ds_gws_barrier:
3215 case Intrinsic::amdgcn_ds_gws_sema_v:
3216 case Intrinsic::amdgcn_ds_gws_sema_br:
3217 case Intrinsic::amdgcn_ds_gws_sema_p:
3218 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3219 SelectDS_GWS(
N, IntrID);
3228void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3230 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3231 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3232 {N->getOperand(0), Log2WaveSize});
3235void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3250 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3252 if (
N->isDivergent()) {
3253 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3258 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3259 {SrcVal, Log2WaveSize}),
3263 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3267bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3269 bool IsCanonicalizing,
3270 bool AllowAbs)
const {
3274 if (Src.getOpcode() == ISD::FNEG) {
3276 Src = Src.getOperand(0);
3277 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3281 if (
LHS &&
LHS->isZero()) {
3283 Src = Src.getOperand(1);
3287 if (AllowAbs && Src.getOpcode() == ISD::FABS) {
3289 Src = Src.getOperand(0);
3302 if (IsCanonicalizing)
3305 unsigned Opc = Src->getOpcode();
3306 EVT VT = Src.getValueType();
3308 (VT != MVT::i32 && VT != MVT::i64))
3320 Src = Src.getOperand(0);
3324 Src = Src.getOperand(0);
3327 Src = Src.getOperand(0);
3336 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3338 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3345bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3348 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3350 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3357bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3360 if (SelectVOP3ModsImpl(In, Src, Mods,
3363 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3370bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3371 if (
In.getOpcode() == ISD::FABS ||
In.getOpcode() == ISD::FNEG)
3378bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3382 if (SelectVOP3ModsImpl(In, Src, Mods,
3387 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3394bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3396 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3399bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3401 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3404bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3408 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3409 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3411 return SelectVOP3Mods(In, Src, SrcMods);
3414bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3418 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3419 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3421 return SelectVOP3BMods(In, Src, SrcMods);
3424bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3429 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3430 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3435bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3436 SDValue &SrcMods,
bool IsDOT)
const {
3441 if (Src.getOpcode() == ISD::FNEG) {
3443 Src = Src.getOperand(0);
3447 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3448 unsigned VecMods = Mods;
3450 SDValue Lo = stripBitcast(Src.getOperand(0));
3451 SDValue Hi = stripBitcast(Src.getOperand(1));
3453 if (
Lo.getOpcode() == ISD::FNEG) {
3454 Lo = stripBitcast(
Lo.getOperand(0));
3458 if (
Hi.getOpcode() == ISD::FNEG) {
3459 Hi = stripBitcast(
Hi.getOperand(0));
3469 unsigned VecSize = Src.getValueSizeInBits();
3470 Lo = stripExtractLoElt(
Lo);
3471 Hi = stripExtractLoElt(
Hi);
3473 if (
Lo.getValueSizeInBits() > VecSize) {
3474 Lo =
CurDAG->getTargetExtractSubreg(
3475 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3479 if (
Hi.getValueSizeInBits() > VecSize) {
3480 Hi =
CurDAG->getTargetExtractSubreg(
3481 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3485 assert(
Lo.getValueSizeInBits() <= VecSize &&
3486 Hi.getValueSizeInBits() <= VecSize);
3488 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3492 if (VecSize ==
Lo.getValueSizeInBits()) {
3494 }
else if (VecSize == 32) {
3495 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3497 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3501 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3502 Lo.getValueType()), 0);
3503 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3504 : AMDGPU::SReg_64RegClassID;
3506 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3507 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3508 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3510 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3511 Src.getValueType(),
Ops), 0);
3513 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3519 .bitcastToAPInt().getZExtValue();
3521 Src =
CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);
3522 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3529 Src.getNumOperands() == 2) {
3535 ArrayRef<int>
Mask = SVN->getMask();
3537 if (Mask[0] < 2 && Mask[1] < 2) {
3539 SDValue ShuffleSrc = SVN->getOperand(0);
3541 if (ShuffleSrc.
getOpcode() == ISD::FNEG) {
3552 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3560 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3564bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3566 return SelectVOP3PMods(In, Src, SrcMods,
true);
3569bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3572 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3575 unsigned SrcVal =
C->getZExtValue();
3579 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3586 unsigned DstRegClass;
3588 switch (Elts.
size()) {
3590 DstRegClass = AMDGPU::VReg_256RegClassID;
3594 DstRegClass = AMDGPU::VReg_128RegClassID;
3598 DstRegClass = AMDGPU::VReg_64RegClassID;
3607 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3608 Ops.push_back(Elts[i]);
3619 assert(
"unhandled Reg sequence size" &&
3620 (Elts.
size() == 8 || Elts.
size() == 16));
3624 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3625 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3633 {Elts[i + 1], Elts[i], PackLoLo});
3643 const SDLoc &
DL,
unsigned ElementSize) {
3644 if (ElementSize == 16)
3646 if (ElementSize == 32)
3654 unsigned ElementSize) {
3655 if (ModOpcode == ISD::FNEG) {
3659 for (
auto El : Elts) {
3660 if (El.getOpcode() != ISD::FABS)
3662 NegAbsElts.
push_back(El->getOperand(0));
3664 if (Elts.size() != NegAbsElts.
size()) {
3673 assert(ModOpcode == ISD::FABS);
3684 std::function<
bool(
SDValue)> ModifierCheck) {
3688 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3689 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3690 if (!ModifierCheck(ElF16))
3697bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3740 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3744bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3755 if (EltsF16.
empty())
3756 ModOpcode = (ElF16.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3776 if (EltsV2F16.
empty())
3777 ModOpcode = (ElV2f16.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3789 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3793bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3803 unsigned ModOpcode =
3804 (ElF32.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3818 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3822bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3824 BitVector UndefElements;
3826 if (isInlineImmediate(
Splat.getNode())) {
3828 unsigned Imm =
C->getAPIntValue().getSExtValue();
3829 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3833 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3834 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3842 SDValue SplatSrc32 = stripBitcast(In);
3844 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3845 SDValue SplatSrc16 = stripBitcast(Splat32);
3848 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
3849 std::optional<APInt> RawValue;
3851 RawValue =
C->getValueAPF().bitcastToAPInt();
3853 RawValue =
C->getAPIntValue();
3855 if (RawValue.has_value()) {
3856 EVT VT =
In.getValueType().getScalarType();
3862 if (
TII->isInlineConstant(FloatVal)) {
3863 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3868 if (
TII->isInlineConstant(RawValue.value())) {
3869 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3882bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3888 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
3897 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
3901bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3907 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
3916 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
3920bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
3928 const SDValue &ExtendSrc =
In.getOperand(0);
3931 }
else if (
In->getOpcode() == ISD::BITCAST) {
3932 const SDValue &CastSrc =
In.getOperand(0);
3936 if (Zero &&
Zero->getZExtValue() == 0)
3947 Src = ExtractVecEltSrc;
3951 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
3955bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3959 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
3963bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3966 return SelectVOP3Mods(In, Src, SrcMods);
3976 if (
Op.getValueType() != MVT::f32 ||
Op.getOpcode() != ISD::BITCAST)
3978 Op =
Op.getOperand(0);
3980 IsExtractHigh =
false;
3983 if (!Low16 || !Low16->isZero())
3985 Op = stripBitcast(
Op.getOperand(1));
3986 if (
Op.getValueType() != MVT::bf16)
3991 if (
Op.getValueType() != MVT::i32)
3996 if (Mask->getZExtValue() == 0xffff0000) {
3997 IsExtractHigh =
true;
3998 return Op.getOperand(0);
4007 return Op.getOperand(0);
4016bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4020 SelectVOP3ModsImpl(In, Src, Mods);
4022 bool IsExtractHigh =
false;
4023 if (Src.getOpcode() == ISD::FP_EXTEND) {
4024 Src = Src.getOperand(0);
4025 }
else if (VT == MVT::bf16) {
4033 if (Src.getValueType() != VT &&
4034 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4037 Src = stripBitcast(Src);
4043 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4058 if (IsExtractHigh ||
4067 Src.getOperand(0).getValueType() == MVT::i32) {
4068 Src = Src.getOperand(0);
4073bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4076 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4078 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4082bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4085 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4086 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4090bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4093 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4095 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4099bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4102 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4103 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4111 unsigned NumOpcodes = 0;
4124 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4127 if (
C->isAllOnes()) {
4137 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4151 if (Src.size() == 3) {
4157 if (
C->isAllOnes()) {
4159 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4160 if (Src[
I] ==
LHS) {
4172 Bits = SrcBits[Src.size()];
4177 switch (In.getOpcode()) {
4185 if (!getOperandBits(
LHS, LHSBits) ||
4186 !getOperandBits(
RHS, RHSBits)) {
4188 return std::make_pair(0, 0);
4194 NumOpcodes +=
Op.first;
4195 LHSBits =
Op.second;
4200 NumOpcodes +=
Op.first;
4201 RHSBits =
Op.second;
4206 return std::make_pair(0, 0);
4210 switch (In.getOpcode()) {
4212 TTbl = LHSBits & RHSBits;
4215 TTbl = LHSBits | RHSBits;
4218 TTbl = LHSBits ^ RHSBits;
4224 return std::make_pair(NumOpcodes + 1, TTbl);
4231 unsigned NumOpcodes;
4233 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4237 if (NumOpcodes < 2 || Src.empty())
4243 if (NumOpcodes < 4 && !In->isDivergent())
4246 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4251 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4252 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4266 while (Src.size() < 3)
4267 Src.push_back(Src[0]);
4273 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4279 return CurDAG->getUNDEF(MVT::i32);
4283 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4288 return CurDAG->getConstant(
4289 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4299bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4300 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4302 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4303 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4306 bool AllUsesAcceptSReg =
true;
4308 Limit < 10 && U !=
E; ++U, ++Limit) {
4309 const TargetRegisterClass *RC =
4310 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4318 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
4319 AllUsesAcceptSReg =
false;
4320 SDNode *
User =
U->getUser();
4321 if (
User->isMachineOpcode()) {
4322 unsigned Opc =
User->getMachineOpcode();
4323 const MCInstrDesc &
Desc = SII->get(
Opc);
4324 if (
Desc.isCommutable()) {
4325 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4328 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4329 const TargetRegisterClass *CommutedRC =
4330 getOperandRegClass(
U->getUser(), CommutedOpNo);
4331 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4332 CommutedRC == &AMDGPU::VS_64RegClass)
4333 AllUsesAcceptSReg =
true;
4341 if (!AllUsesAcceptSReg)
4345 return !AllUsesAcceptSReg && (Limit < 10);
4348bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4351 const MachineMemOperand *MMO = Ld->getMemOperand();
4361 (Subtarget->getScalarizeGlobalBehavior() &&
4365 ->isMemOpHasNoClobberedMemOperand(
N)));
4371 bool IsModified =
false;
4377 while (Position !=
CurDAG->allnodes_end()) {
4384 if (ResNode !=
Node) {
4390 CurDAG->RemoveDeadNodes();
4391 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
@ User
could "use" a pointer
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
int popcount(T Value) noexcept
Count the number of set bits in a value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Implement std::hash so that hash_code can be used in STL containers.
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.