29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(), Ops),
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
135 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
140#ifdef EXPENSIVE_CHECKS
145 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
166bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
230#ifdef EXPENSIVE_CHECKS
231 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
232 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
234 assert(L->isLCSSAForm(DT));
243#ifdef EXPENSIVE_CHECKS
252 MVT VT =
N->getValueType(0).getSimpleVT();
253 if (VT != MVT::v2i16 && VT != MVT::v2f16)
259 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
296 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
297 if (LdLo &&
Lo.hasOneUse()) {
336 bool MadeChange =
false;
342 switch (
N->getOpcode()) {
359bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
365 return TII->isInlineConstant(
C->getAPIntValue());
368 return TII->isInlineConstant(
C->getValueAPF());
378 unsigned OpNo)
const {
379 if (!
N->isMachineOpcode()) {
381 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
382 if (Reg.isVirtual()) {
384 return MRI.getRegClass(Reg);
388 return TRI->getPhysRegBaseClass(Reg);
394 switch (
N->getMachineOpcode()) {
398 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
401 int RegClass =
Desc.operands()[
OpIdx].RegClass;
407 case AMDGPU::REG_SEQUENCE: {
408 unsigned RCID =
N->getConstantOperandVal(0);
412 SDValue SubRegOp =
N->getOperand(OpNo + 1);
422 SmallVector <SDValue, 8> Ops;
424 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
435 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
438 return glueCopyToOp(
N,
M0,
M0.getValue(1));
441SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
442 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
459 AMDGPU::S_MOV_B32,
DL, MVT::i32,
462 AMDGPU::S_MOV_B32,
DL, MVT::i32,
473 EVT VT =
N->getValueType(0);
479 if (NumVectorElts == 1) {
489 bool AllConst =
true;
491 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
499 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
501 Val = cast<ConstantSDNode>(
Op)->getZExtValue();
502 C |= Val << (EltSize *
I);
514 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
522 bool IsRegSeq =
true;
523 unsigned NOps =
N->getNumOperands();
524 for (
unsigned i = 0; i < NOps; i++) {
526 if (isa<RegisterSDNode>(
N->getOperand(i))) {
532 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
535 if (NOps != NumVectorElts) {
540 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
543 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
544 RegSeqArgs[1 + (2 * i) + 1] =
555 EVT VT =
N->getValueType(0);
565 auto *SVN = cast<ShuffleVectorSDNode>(
N);
573 Mask[0] < 4 && Mask[1] < 4);
575 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
576 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
577 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
578 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
581 Src0SubReg = Src1SubReg;
588 Src1SubReg = Src0SubReg;
600 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
601 Src1SubReg == AMDGPU::sub0) {
622 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
644 unsigned int Opc =
N->getOpcode();
645 if (
N->isMachineOpcode()) {
653 N = glueCopyToM0LDSInit(
N);
668 if (
N->getValueType(0) != MVT::i64)
671 SelectADD_SUB_I64(
N);
676 if (
N->getValueType(0) != MVT::i32)
683 SelectUADDO_USUBO(
N);
687 SelectFMUL_W_CHAIN(
N);
691 SelectFMA_W_CHAIN(
N);
697 EVT VT =
N->getValueType(0);
711 unsigned RegClassID =
722 if (
N->getValueType(0) == MVT::i128) {
726 }
else if (
N->getValueType(0) == MVT::i64) {
733 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
734 N->getOperand(1), SubReg1 };
736 N->getValueType(0), Ops));
742 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
748 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
753 Imm =
C->getZExtValue();
800 return SelectMUL_LOHI(
N);
811 if (
N->getValueType(0) != MVT::i32)
828 if (
N->getValueType(0) == MVT::i32) {
831 { N->getOperand(0), N->getOperand(1) });
839 SelectINTRINSIC_W_CHAIN(
N);
843 SelectINTRINSIC_WO_CHAIN(
N);
847 SelectINTRINSIC_VOID(
N);
851 SelectWAVE_ADDRESS(
N);
855 SelectSTACKRESTORE(
N);
863bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
866 return Term->getMetadata(
"amdgpu.uniform") ||
867 Term->getMetadata(
"structurizecfg.uniform");
870bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
871 unsigned ShAmtBits)
const {
874 const APInt &
RHS =
N->getConstantOperandAPInt(1);
875 if (
RHS.countr_one() >= ShAmtBits)
879 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
905 N1 =
Lo.getOperand(1);
922 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
930 return "AMDGPU DAG->DAG Pattern Instruction Selection";
940#ifdef EXPENSIVE_CHECKS
946 for (
auto &L : LI.getLoopsInPreorder())
947 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
966 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
970 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
974 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
985SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
988 AMDGPU::S_MOV_B32,
DL, MVT::i32,
994void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
999 unsigned Opcode =
N->getOpcode();
1009 DL, MVT::i32, LHS, Sub0);
1011 DL, MVT::i32, LHS, Sub1);
1014 DL, MVT::i32, RHS, Sub0);
1016 DL, MVT::i32, RHS, Sub1);
1020 static const unsigned OpcMap[2][2][2] = {
1021 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1022 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1023 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1024 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1026 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1027 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1030 if (!ConsumeCarry) {
1052 MVT::i64, RegSequenceArgs);
1063void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1068 if (
N->isDivergent()) {
1070 : AMDGPU::V_SUBB_U32_e64;
1072 N,
Opc,
N->getVTList(),
1074 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1077 : AMDGPU::S_SUB_CO_PSEUDO;
1078 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {
LHS,
RHS, CI});
1082void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1087 bool IsVALU =
N->isDivergent();
1091 if (UI.getUse().getResNo() == 1) {
1100 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1103 N,
Opc,
N->getVTList(),
1104 {N->getOperand(0), N->getOperand(1),
1105 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1107 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1108 : AMDGPU::S_USUBO_PSEUDO;
1110 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1111 {
N->getOperand(0),
N->getOperand(1)});
1115void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1119 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1120 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
1121 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
1122 Ops[8] =
N->getOperand(0);
1123 Ops[9] =
N->getOperand(4);
1128 cast<ConstantSDNode>(Ops[0])->isZero() &&
1129 cast<ConstantSDNode>(Ops[2])->isZero() &&
1130 cast<ConstantSDNode>(Ops[4])->isZero();
1131 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1135void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1139 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1140 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
1141 Ops[6] =
N->getOperand(0);
1142 Ops[7] =
N->getOperand(3);
1149void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1150 EVT VT =
N->getValueType(0);
1152 assert(VT == MVT::f32 || VT == MVT::f64);
1155 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1160 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1161 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1162 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1168void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1174 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1175 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1176 else if (UseNoCarry)
1177 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1179 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1182 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1197void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1204 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1208 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1209 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1211 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1217 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1222 MVT::i32,
SDValue(Mad, 0), Sub0);
1228 MVT::i32,
SDValue(Mad, 0), Sub1);
1263 int64_t ByteOffset =
C->getSExtValue();
1264 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1271 Zero,
Addr.getOperand(1));
1273 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1279 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1281 SubOp = AMDGPU::V_SUB_U32_e64;
1303 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1306 DL, MVT::i32, Zero);
1319bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1321 unsigned Size)
const {
1322 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1324 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1339 Addr->getFlags().hasNoUnsignedWrap()) ||
1346bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1355 auto LHS =
Addr.getOperand(0);
1356 auto RHS =
Addr.getOperand(1);
1363 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1373bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1382 auto LHS =
Addr.getOperand(0);
1383 auto RHS =
Addr.getOperand(1);
1389bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1396 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1403 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1406 auto LHS =
Base.getOperand(0);
1407 auto RHS =
Base.getOperand(1);
1415 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1421 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1426 unsigned Size)
const {
1434 unsigned OffsetValue1 = OffsetValue0 +
Size;
1437 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1446 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1447 unsigned OffsetValue0 =
C->getZExtValue();
1448 unsigned OffsetValue1 = OffsetValue0 +
Size;
1450 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1460 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1464 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1466 SubOp = AMDGPU::V_SUB_U32_e64;
1484 unsigned OffsetValue0 = CAddr->getZExtValue();
1485 unsigned OffsetValue1 = OffsetValue0 +
Size;
1487 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1527 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1529 N0 =
Addr.getOperand(0);
1588 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1604 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1608 if (
C->getSExtValue()) {
1621std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1624 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1635bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1647 int64_t
Imm = CAddr->getSExtValue();
1648 const int64_t NullPtr =
1651 if (Imm != NullPtr) {
1656 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1657 VAddr =
SDValue(MovHighBits, 0);
1687 if (
TII->isLegalMUBUFImmOffset(C1) &&
1690 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1697 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1705 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1706 if (!Reg.isPhysical())
1708 const auto *RC =
TRI.getPhysRegBaseClass(Reg);
1709 return RC &&
TRI.isSGPRClass(RC);
1712bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1734 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1740 SOffset =
Addr.getOperand(0);
1741 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1761 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1764 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1765 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1766 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1768 maskTrailingOnes<uint64_t>(32);
1780bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1787 SOffset = ByteOffsetNode;
1797 assert(isa<BuildVectorSDNode>(
N));
1808 int64_t OffsetVal = 0;
1812 bool CanHaveFlatSegmentOffsetBug =
1819 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1821 isFlatScratchBaseLegal(
Addr))) {
1822 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1825 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1827 OffsetVal = COffsetVal;
1842 std::tie(OffsetVal, RemainderOffset) =
1843 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1846 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1849 if (
Addr.getValueType().getSizeInBits() == 32) {
1853 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1855 AddOp = AMDGPU::V_ADD_U32_e64;
1866 DL, MVT::i32, N0, Sub0);
1868 DL, MVT::i32, N0, Sub1);
1871 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1877 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1880 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1888 MVT::i64, RegSequenceArgs),
1915 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1923 if (
Op.getValueType() == MVT::i32)
1941 bool NeedIOffset)
const {
1942 int64_t ImmOffset = 0;
1943 ScaleOffset =
false;
1949 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1950 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1957 ImmOffset = COffsetVal;
1958 }
else if (!
LHS->isDivergent()) {
1959 if (COffsetVal > 0) {
1964 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
1966 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1971 : isUInt<32>(RemainderOffset)) {
1973 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1987 unsigned NumLiterals =
1999 if (!
LHS->isDivergent()) {
2011 if (!SAddr && !
RHS->isDivergent()) {
2033 Addr.getOperand(0)->isDivergent() &&
2034 isa<ConstantSDNode>(
Addr.getOperand(1)) &&
2035 !
Addr.getOperand(2)->isDivergent()) {
2038 (
unsigned)cast<MemSDNode>(
N)->getMemoryVT().getFixedSizeInBits() / 8;
2039 ScaleOffset =
Addr.getConstantOperandVal(1) ==
Size;
2041 SAddr =
Addr.getOperand(2);
2042 VOffset =
Addr.getOperand(0);
2049 isa<ConstantSDNode>(
Addr))
2068 if (!SelectGlobalSAddr(
N,
Addr, SAddr, VOffset,
Offset, ScaleOffset))
2081 if (!SelectGlobalSAddr(
N,
Addr, SAddr, VOffset,
Offset, ScaleOffset))
2086 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2097 if (!SelectGlobalSAddr(
N,
Addr, SAddr, VOffset,
Offset, ScaleOffset))
2111 if (!SelectGlobalSAddr(
N,
Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2117 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2124 if (
auto *FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
2127 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
2130 auto *FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
2132 FI->getValueType(0));
2145 if (
Addr->isDivergent())
2150 int64_t COffsetVal = 0;
2153 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
2154 SAddr =
Addr.getOperand(0);
2165 int64_t SplitImmOffset, RemainderOffset;
2166 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2169 COffsetVal = SplitImmOffset;
2173 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2174 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL,
MVT::i32);
2186bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2201 return (VMax & 3) + (
SMax & 3) >= 4;
2208 int64_t ImmOffset = 0;
2212 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
2213 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
2219 ImmOffset = COffsetVal;
2220 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2224 int64_t SplitImmOffset, RemainderOffset;
2225 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2228 if (isUInt<32>(RemainderOffset)) {
2230 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2234 if (!isFlatScratchBaseLegal(
Addr))
2236 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2251 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2254 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2261 if (OrigAddr !=
Addr) {
2262 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2265 if (!isFlatScratchBaseLegalSV(OrigAddr))
2269 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2274 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2283bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2286 int64_t ImmOffset)
const {
2287 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2301 bool IsSigned)
const {
2302 bool ScaleOffset =
false;
2307 (
unsigned)cast<MemSDNode>(
N)->getMemoryVT().getFixedSizeInBits() / 8;
2314 if (
auto *
C = dyn_cast<ConstantSDNode>(
Off.getOperand(1)))
2319 (
Offset.isMachineOpcode() &&
2320 Offset.getMachineOpcode() ==
2321 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2322 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2323 if (
auto *
C = dyn_cast<ConstantSDNode>(
Offset.getOperand(1)))
2324 ScaleOffset =
C->getZExtValue() ==
Size;
2336bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2338 bool Imm32Only,
bool IsBuffer,
2339 bool HasSOffset, int64_t ImmOffset,
2340 bool *ScaleOffset)
const {
2342 "Cannot match both soffset and offset at the same time!");
2347 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2357 *SOffset = ByteOffsetNode;
2358 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2364 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2371 SDLoc SL(ByteOffsetNode);
2375 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2377 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2378 if (EncodedOffset &&
Offset && !Imm32Only) {
2388 if (EncodedOffset &&
Offset && Imm32Only) {
2393 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2407 if (
Addr.getValueType() != MVT::i32)
2415 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2437 bool IsBuffer,
bool HasSOffset,
2439 bool *ScaleOffset)
const {
2441 assert(!Imm32Only && !IsBuffer);
2444 if (!SelectSMRDBaseOffset(
N,
Addr,
B,
nullptr,
Offset,
false,
false,
true))
2449 ImmOff =
C->getSExtValue();
2451 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2452 true, ImmOff, ScaleOffset);
2458 !
Addr->getFlags().hasNoUnsignedWrap())
2464 N0 =
Addr.getOperand(0);
2465 N1 =
Addr.getOperand(1);
2467 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2472 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2473 ImmOffset, ScaleOffset)) {
2477 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2478 ImmOffset, ScaleOffset)) {
2487 bool Imm32Only,
bool *ScaleOffset)
const {
2488 if (SelectSMRDBaseOffset(
N,
Addr, SBase, SOffset,
Offset, Imm32Only,
2491 SBase = Expand32BitAddress(SBase);
2495 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2506 return SelectSMRD(
nullptr,
Addr, SBase,
nullptr,
2513 return SelectSMRD(
nullptr,
Addr, SBase,
nullptr,
2520 if (!SelectSMRD(
N,
Addr, SBase, &SOffset,
nullptr,
2521 false, &ScaleOffset))
2534 if (!SelectSMRD(
N,
Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2543 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2547bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2550 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2554bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2558 return N.getValueType() == MVT::i32 &&
2559 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2564bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2586 if (isa<ConstantSDNode>(Index))
2594SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2598 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2604 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2614void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2619 const SDValue &Shl =
N->getOperand(0);
2627 if (0 < BVal && BVal <= CVal && CVal < 32) {
2637void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2638 switch (
N->getOpcode()) {
2640 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2643 const SDValue &Srl =
N->getOperand(0);
2647 if (Shift && Mask) {
2661 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2668 if (Shift && Mask) {
2679 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2680 SelectS_BFEFromShifts(
N);
2685 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2686 SelectS_BFEFromShifts(
N);
2697 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2701 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2711bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2713 if (!
N->hasOneUse())
2723 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2727 if (VT == MVT::i64) {
2750 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2754 auto Cond = VCMP.getOperand(0);
2766void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2769 if (
Cond.isUndef()) {
2771 N->getOperand(2),
N->getOperand(0));
2777 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2778 bool AndExec = !UseSCCBr;
2779 bool Negate =
false;
2784 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2797 bool NegatedBallot =
false;
2800 UseSCCBr = !BallotCond->isDivergent();
2801 Negate = Negate ^ NegatedBallot;
2816 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2817 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2818 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2837 Subtarget->
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2852void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2854 !
N->isDivergent()) {
2856 if (Src.getValueType() == MVT::f16) {
2868void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2871 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2872 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2886 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2887 N = glueCopyToM0(
N, PtrBase);
2893 N = glueCopyToM0(
N,
Ptr);
2901 N->getOperand(
N->getNumOperands() - 1)
2910void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
2913 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2914 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2915 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2917 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2918 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2920 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2921 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2924 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2925 N->getOperand(5),
N->getOperand(0)};
2935 case Intrinsic::amdgcn_ds_gws_init:
2936 return AMDGPU::DS_GWS_INIT;
2937 case Intrinsic::amdgcn_ds_gws_barrier:
2938 return AMDGPU::DS_GWS_BARRIER;
2939 case Intrinsic::amdgcn_ds_gws_sema_v:
2940 return AMDGPU::DS_GWS_SEMA_V;
2941 case Intrinsic::amdgcn_ds_gws_sema_br:
2942 return AMDGPU::DS_GWS_SEMA_BR;
2943 case Intrinsic::amdgcn_ds_gws_sema_p:
2944 return AMDGPU::DS_GWS_SEMA_P;
2945 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2946 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2952void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2953 if (!Subtarget->
hasGWS() ||
2954 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2962 const bool HasVSrc =
N->getNumOperands() == 4;
2963 assert(HasVSrc ||
N->getNumOperands() == 3);
2966 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2977 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2983 ImmOffset = ConstOffset->getZExtValue();
3001 glueCopyToM0(
N,
SDValue(M0Base, 0));
3018void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3076void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3077 unsigned IntrID =
N->getConstantOperandVal(1);
3079 case Intrinsic::amdgcn_ds_append:
3080 case Intrinsic::amdgcn_ds_consume: {
3081 if (
N->getValueType(0) != MVT::i32)
3083 SelectDSAppendConsume(
N, IntrID);
3086 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3087 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3088 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3089 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3090 SelectDSBvhStackIntrinsic(
N, IntrID);
3092 case Intrinsic::amdgcn_init_whole_wave:
3095 ->setInitWholeWave();
3102void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3103 unsigned IntrID =
N->getConstantOperandVal(0);
3104 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3105 SDNode *ConvGlueNode =
N->getGluedNode();
3112 MVT::Glue,
SDValue(ConvGlueNode, 0));
3114 ConvGlueNode =
nullptr;
3117 case Intrinsic::amdgcn_wqm:
3118 Opcode = AMDGPU::WQM;
3120 case Intrinsic::amdgcn_softwqm:
3121 Opcode = AMDGPU::SOFT_WQM;
3123 case Intrinsic::amdgcn_wwm:
3124 case Intrinsic::amdgcn_strict_wwm:
3125 Opcode = AMDGPU::STRICT_WWM;
3127 case Intrinsic::amdgcn_strict_wqm:
3128 Opcode = AMDGPU::STRICT_WQM;
3130 case Intrinsic::amdgcn_interp_p1_f16:
3131 SelectInterpP1F16(
N);
3133 case Intrinsic::amdgcn_permlane16_swap:
3134 case Intrinsic::amdgcn_permlane32_swap: {
3135 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3137 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3143 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3144 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3145 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3149 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3151 bool FI =
N->getConstantOperandVal(3);
3163 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3170 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3175void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3176 unsigned IntrID =
N->getConstantOperandVal(1);
3178 case Intrinsic::amdgcn_ds_gws_init:
3179 case Intrinsic::amdgcn_ds_gws_barrier:
3180 case Intrinsic::amdgcn_ds_gws_sema_v:
3181 case Intrinsic::amdgcn_ds_gws_sema_br:
3182 case Intrinsic::amdgcn_ds_gws_sema_p:
3183 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3184 SelectDS_GWS(
N, IntrID);
3193void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3197 {N->getOperand(0), Log2WaveSize});
3200void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3217 if (
N->isDivergent()) {
3224 {SrcVal, Log2WaveSize}),
3232bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3234 bool IsCanonicalizing,
3235 bool AllowAbs)
const {
3241 Src = Src.getOperand(0);
3242 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3245 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
3246 if (LHS &&
LHS->isZero()) {
3248 Src = Src.getOperand(1);
3252 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3254 Src = Src.getOperand(0);
3267 if (IsCanonicalizing)
3270 unsigned Opc = Src->getOpcode();
3271 EVT VT = Src.getValueType();
3273 (VT != MVT::i32 && VT != MVT::i64))
3276 ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Src->getOperand(1));
3285 Src = Src.getOperand(0);
3289 Src = Src.getOperand(0);
3292 Src = Src.getOperand(0);
3301 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3310bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3313 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3322bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3325 if (SelectVOP3ModsImpl(In, Src, Mods,
3335bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3343bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3347 if (SelectVOP3ModsImpl(In, Src, Mods,
3359bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3361 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3364bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3366 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3369bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3376 return SelectVOP3Mods(In, Src, SrcMods);
3379bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3386 return SelectVOP3BMods(In, Src, SrcMods);
3389bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3400bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3401 SDValue &SrcMods,
bool IsDOT)
const {
3408 Src = Src.getOperand(0);
3413 unsigned VecMods = Mods;
3415 SDValue Lo = stripBitcast(Src.getOperand(0));
3416 SDValue Hi = stripBitcast(Src.getOperand(1));
3419 Lo = stripBitcast(
Lo.getOperand(0));
3424 Hi = stripBitcast(
Hi.getOperand(0));
3434 unsigned VecSize = Src.getValueSizeInBits();
3435 Lo = stripExtractLoElt(
Lo);
3436 Hi = stripExtractLoElt(
Hi);
3438 if (
Lo.getValueSizeInBits() > VecSize) {
3440 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3444 if (
Hi.getValueSizeInBits() > VecSize) {
3446 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3450 assert(
Lo.getValueSizeInBits() <= VecSize &&
3451 Hi.getValueSizeInBits() <= VecSize);
3453 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3457 if (VecSize ==
Lo.getValueSizeInBits()) {
3459 }
else if (VecSize == 32) {
3460 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3462 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3467 Lo.getValueType()), 0);
3468 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3469 : AMDGPU::SReg_64RegClassID;
3476 Src.getValueType(), Ops), 0);
3482 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3483 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3484 .bitcastToAPInt().getZExtValue();
3494 Src.getNumOperands() == 2) {
3499 auto *SVN = cast<ShuffleVectorSDNode>(Src);
3502 if (Mask[0] < 2 && Mask[1] < 2) {
3529bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3531 return SelectVOP3PMods(In, Src, SrcMods,
true);
3534bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3537 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3540 unsigned SrcVal =
C->getZExtValue();
3551 unsigned DstRegClass;
3553 switch (Elts.
size()) {
3555 DstRegClass = AMDGPU::VReg_256RegClassID;
3559 DstRegClass = AMDGPU::VReg_128RegClassID;
3563 DstRegClass = AMDGPU::VReg_64RegClassID;
3572 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3584 assert(
"unhandled Reg sequence size" &&
3585 (Elts.
size() == 8 || Elts.
size() == 16));
3589 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3590 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3598 {Elts[i + 1], Elts[i], PackLoLo});
3608 const SDLoc &
DL,
unsigned ElementSize) {
3609 if (ElementSize == 16)
3611 if (ElementSize == 32)
3619 unsigned ElementSize) {
3624 for (
auto El : Elts) {
3627 NegAbsElts.
push_back(El->getOperand(0));
3629 if (Elts.size() != NegAbsElts.
size()) {
3649 std::function<
bool(
SDValue)> ModifierCheck) {
3652 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3653 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3654 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3655 if (!ModifierCheck(ElF16))
3662bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3668 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3687 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3709bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3716 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3720 if (EltsF16.
empty())
3735 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3741 if (EltsV2F16.
empty())
3758bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3764 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3768 unsigned ModOpcode =
3787bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3788 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3791 if (isInlineImmediate(
Splat.getNode())) {
3793 unsigned Imm =
C->getAPIntValue().getSExtValue();
3798 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3807 SDValue SplatSrc32 = stripBitcast(In);
3808 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3809 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3810 SDValue SplatSrc16 = stripBitcast(Splat32);
3811 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3814 std::optional<APInt> RawValue;
3816 RawValue =
C->getValueAPF().bitcastToAPInt();
3818 RawValue =
C->getAPIntValue();
3820 if (RawValue.has_value()) {
3821 EVT VT =
In.getValueType().getScalarType();
3827 if (
TII->isInlineConstant(FloatVal)) {
3833 if (
TII->isInlineConstant(RawValue.value())) {
3847bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3866bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3885bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
3893 const SDValue &ExtendSrc =
In.getOperand(0);
3897 const SDValue &CastSrc =
In.getOperand(0);
3901 if (Zero &&
Zero->getZExtValue() == 0)
3912 Src = ExtractVecEltSrc;
3920bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3928bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3931 return SelectVOP3Mods(In, Src, SrcMods);
3943 Op =
Op.getOperand(0);
3945 IsExtractHigh =
false;
3947 auto Low16 = dyn_cast<ConstantSDNode>(
Op.getOperand(0));
3948 if (!Low16 || !Low16->isZero())
3950 Op = stripBitcast(
Op.getOperand(1));
3951 if (
Op.getValueType() != MVT::bf16)
3956 if (
Op.getValueType() != MVT::i32)
3960 if (
auto Mask = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3961 if (Mask->getZExtValue() == 0xffff0000) {
3962 IsExtractHigh =
true;
3963 return Op.getOperand(0);
3970 if (
auto Amt = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3972 return Op.getOperand(0);
3981bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3985 SelectVOP3ModsImpl(In, Src, Mods);
3987 bool IsExtractHigh =
false;
3989 Src = Src.getOperand(0);
3990 }
else if (VT == MVT::bf16) {
3998 if (Src.getValueType() != VT &&
3999 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4002 Src = stripBitcast(Src);
4008 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4023 if (IsExtractHigh ||
4032 Src.getOperand(0).getValueType() == MVT::i32) {
4033 Src = Src.getOperand(0);
4038bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4041 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4047bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4050 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4055bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4058 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4064bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4067 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4076 unsigned NumOpcodes = 0;
4089 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4091 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
4092 if (
C->isAllOnes()) {
4102 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4116 if (Src.size() == 3) {
4121 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
4122 if (
C->isAllOnes()) {
4124 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4125 if (Src[
I] ==
LHS) {
4137 Bits = SrcBits[Src.size()];
4142 switch (In.getOpcode()) {
4150 if (!getOperandBits(
LHS, LHSBits) ||
4151 !getOperandBits(
RHS, RHSBits)) {
4153 return std::make_pair(0, 0);
4159 NumOpcodes +=
Op.first;
4160 LHSBits =
Op.second;
4165 NumOpcodes +=
Op.first;
4166 RHSBits =
Op.second;
4171 return std::make_pair(0, 0);
4175 switch (In.getOpcode()) {
4177 TTbl = LHSBits & RHSBits;
4180 TTbl = LHSBits | RHSBits;
4183 TTbl = LHSBits ^ RHSBits;
4189 return std::make_pair(NumOpcodes + 1, TTbl);
4196 unsigned NumOpcodes;
4198 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4202 if (NumOpcodes < 2 || Src.empty())
4208 if (NumOpcodes < 4 && !In->isDivergent())
4211 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4216 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4217 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4231 while (Src.size() < 3)
4232 Src.push_back(Src[0]);
4254 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4264bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4271 bool AllUsesAcceptSReg =
true;
4273 Limit < 10 && U != E; ++U, ++Limit) {
4275 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4283 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
4284 AllUsesAcceptSReg =
false;
4286 if (
User->isMachineOpcode()) {
4287 unsigned Opc =
User->getMachineOpcode();
4289 if (
Desc.isCommutable()) {
4290 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4293 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4295 getOperandRegClass(
U->getUser(), CommutedOpNo);
4296 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4297 CommutedRC == &AMDGPU::VS_64RegClass)
4298 AllUsesAcceptSReg =
true;
4306 if (!AllUsesAcceptSReg)
4310 return !AllUsesAcceptSReg && (Limit < 10);
4313bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4314 const auto *Ld = cast<LoadSDNode>(
N);
4330 ->isMemOpHasNoClobberedMemOperand(
N)));
4336 bool IsModified =
false;
4343 SDNode *Node = &*Position++;
4349 if (ResNode != Node) {
4356 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Register const TargetRegisterInfo * TRI
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
support::ulittle16_t & Lo
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasScaleOffset() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool has64BitLiterals() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasMadU64U32NoCarry() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasSignedGVSOffset() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void dump() const
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
bool isConstantValueOfAnyType(SDValue N) const
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Implement std::hash so that hash_code can be used in STL containers.
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.