33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
76 unsigned N =
Node->getNumOperands();
77 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
90 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
152bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
155 if (
MI.isCompare()) {
166 switch (
Use.getOpcode()) {
167 case AMDGPU::S_AND_SAVEEXEC_B32:
168 case AMDGPU::S_AND_SAVEEXEC_B64:
170 case AMDGPU::S_AND_B32:
171 case AMDGPU::S_AND_B64:
172 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
182 switch (
MI.getOpcode()) {
185 case AMDGPU::V_READFIRSTLANE_B32:
202 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
207 for (
auto Op :
MI.uses()) {
208 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
214 if (FromCycle ==
nullptr)
220 while (FromCycle && !FromCycle->
contains(ToCycle)) {
240 int64_t &Offset1)
const {
248 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
252 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
268 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
269 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
270 if (Offset0Idx == -1 || Offset1Idx == -1)
277 Offset0Idx -=
get(Opc0).NumDefs;
278 Offset1Idx -=
get(Opc1).NumDefs;
308 if (!Load0Offset || !Load1Offset)
325 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
326 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
328 if (OffIdx0 == -1 || OffIdx1 == -1)
334 OffIdx0 -=
get(Opc0).NumDefs;
335 OffIdx1 -=
get(Opc1).NumDefs;
354 case AMDGPU::DS_READ2ST64_B32:
355 case AMDGPU::DS_READ2ST64_B64:
356 case AMDGPU::DS_WRITE2ST64_B32:
357 case AMDGPU::DS_WRITE2ST64_B64:
372 OffsetIsScalable =
false;
389 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
391 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
392 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
405 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
406 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
407 if (Offset0 + 1 != Offset1)
418 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
426 Offset = EltSize * Offset0;
428 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
429 if (DataOpIdx == -1) {
430 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
448 if (BaseOp && !BaseOp->
isFI())
456 if (SOffset->
isReg())
462 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
464 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
473 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
474 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
476 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
477 if (VAddr0Idx >= 0) {
479 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
486 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
501 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
518 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
520 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
537 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
545 if (MO1->getAddrSpace() != MO2->getAddrSpace())
548 const auto *Base1 = MO1->getValue();
549 const auto *Base2 = MO2->getValue();
550 if (!Base1 || !Base2)
558 return Base1 == Base2;
562 int64_t Offset1,
bool OffsetIsScalable1,
564 int64_t Offset2,
bool OffsetIsScalable2,
565 unsigned ClusterSize,
566 unsigned NumBytes)
const {
579 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
598 const unsigned LoadSize = NumBytes / ClusterSize;
599 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
600 return NumDWords <= MaxMemoryClusterDWords;
614 int64_t Offset0, int64_t Offset1,
615 unsigned NumLoads)
const {
616 assert(Offset1 > Offset0 &&
617 "Second offset should be larger than first offset!");
622 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
629 const char *Msg =
"illegal VGPR to SGPR copy") {
650 assert((
TII.getSubtarget().hasMAIInsts() &&
651 !
TII.getSubtarget().hasGFX90AInsts()) &&
652 "Expected GFX908 subtarget.");
655 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
656 "Source register of the copy should be either an SGPR or an AGPR.");
659 "Destination register of the copy should be an AGPR.");
668 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
671 if (!Def->modifiesRegister(SrcReg, &RI))
674 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
675 Def->getOperand(0).getReg() != SrcReg)
682 bool SafeToPropagate =
true;
685 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
686 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
687 SafeToPropagate =
false;
689 if (!SafeToPropagate)
692 for (
auto I = Def;
I !=
MI; ++
I)
693 I->clearRegisterKills(DefOp.
getReg(), &RI);
702 if (ImpUseSuperReg) {
703 Builder.addReg(ImpUseSuperReg,
721 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
724 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
725 "VGPR used for an intermediate copy should have been reserved.");
740 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
741 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
742 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
749 if (ImpUseSuperReg) {
750 UseBuilder.
addReg(ImpUseSuperReg,
771 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
772 int16_t SubIdx = BaseIndices[Idx];
773 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
774 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
775 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
776 unsigned Opcode = AMDGPU::S_MOV_B32;
779 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
780 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
781 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
785 DestSubReg = RI.getSubReg(DestReg, SubIdx);
786 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
787 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
788 Opcode = AMDGPU::S_MOV_B64;
803 assert(FirstMI && LastMI);
811 LastMI->addRegisterKilled(SrcReg, &RI);
817 Register SrcReg,
bool KillSrc,
bool RenamableDest,
818 bool RenamableSrc)
const {
820 unsigned Size = RI.getRegSizeInBits(*RC);
822 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (((
Size == 16) != (SrcSize == 16))) {
830 assert(ST.useRealTrue16Insts());
835 if (DestReg == SrcReg) {
841 RC = RI.getPhysRegBaseClass(DestReg);
842 Size = RI.getRegSizeInBits(*RC);
843 SrcRC = RI.getPhysRegBaseClass(SrcReg);
844 SrcSize = RI.getRegSizeInBits(*SrcRC);
848 if (RC == &AMDGPU::VGPR_32RegClass) {
850 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
851 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
852 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
853 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
859 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
860 RC == &AMDGPU::SReg_32RegClass) {
861 if (SrcReg == AMDGPU::SCC) {
868 if (DestReg == AMDGPU::VCC_LO) {
869 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
883 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
893 if (RC == &AMDGPU::SReg_64RegClass) {
894 if (SrcReg == AMDGPU::SCC) {
901 if (DestReg == AMDGPU::VCC) {
902 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
916 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
926 if (DestReg == AMDGPU::SCC) {
929 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
933 assert(ST.hasScalarCompareEq64());
947 if (RC == &AMDGPU::AGPR_32RegClass) {
948 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
949 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
955 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
964 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
971 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
972 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
974 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
975 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
976 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
977 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
980 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
981 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
994 if (IsAGPRDst || IsAGPRSrc) {
995 if (!DstLow || !SrcLow) {
997 "Cannot use hi16 subreg with an AGPR!");
1004 if (ST.useRealTrue16Insts()) {
1010 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1011 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1023 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1024 if (!DstLow || !SrcLow) {
1026 "Cannot use hi16 subreg on VI!");
1049 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1050 if (ST.hasMovB64()) {
1055 if (ST.hasPkMovB32()) {
1071 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1072 if (RI.isSGPRClass(RC)) {
1073 if (!RI.isSGPRClass(SrcRC)) {
1077 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1083 unsigned EltSize = 4;
1084 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1085 if (RI.isAGPRClass(RC)) {
1086 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1087 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1088 else if (RI.hasVGPRs(SrcRC) ||
1089 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1090 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1092 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1093 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1094 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1095 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1096 (RI.isProperlyAlignedRC(*RC) &&
1097 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1099 if (ST.hasMovB64()) {
1100 Opcode = AMDGPU::V_MOV_B64_e32;
1102 }
else if (ST.hasPkMovB32()) {
1103 Opcode = AMDGPU::V_PK_MOV_B32;
1113 std::unique_ptr<RegScavenger> RS;
1114 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1115 RS = std::make_unique<RegScavenger>();
1121 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1122 const bool CanKillSuperReg = KillSrc && !Overlap;
1124 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1127 SubIdx = SubIndices[Idx];
1129 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1130 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1131 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1132 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1134 bool IsFirstSubreg = Idx == 0;
1135 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1137 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1141 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1142 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1188 return &AMDGPU::VGPR_32RegClass;
1200 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1201 "Not a VGPR32 reg");
1203 if (
Cond.size() == 1) {
1204 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1213 }
else if (
Cond.size() == 2) {
1214 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1216 case SIInstrInfo::SCC_TRUE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::SCC_FALSE: {
1228 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1238 case SIInstrInfo::VCCNZ: {
1241 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1252 case SIInstrInfo::VCCZ: {
1255 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1266 case SIInstrInfo::EXECNZ: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1268 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1279 case SIInstrInfo::EXECZ: {
1280 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1281 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1306 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1319 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1329 int64_t &ImmVal)
const {
1330 switch (
MI.getOpcode()) {
1331 case AMDGPU::V_MOV_B32_e32:
1332 case AMDGPU::S_MOV_B32:
1333 case AMDGPU::S_MOVK_I32:
1334 case AMDGPU::S_MOV_B64:
1335 case AMDGPU::V_MOV_B64_e32:
1336 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1337 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1338 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1339 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1340 case AMDGPU::V_MOV_B64_PSEUDO: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_BREV_B32:
1350 case AMDGPU::V_BFREV_B32_e32:
1351 case AMDGPU::V_BFREV_B32_e64: {
1355 return MI.getOperand(0).getReg() == Reg;
1360 case AMDGPU::S_NOT_B32:
1361 case AMDGPU::V_NOT_B32_e32:
1362 case AMDGPU::V_NOT_B32_e64: {
1365 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1366 return MI.getOperand(0).getReg() == Reg;
1378 if (RI.isAGPRClass(DstRC))
1379 return AMDGPU::COPY;
1380 if (RI.getRegSizeInBits(*DstRC) == 16) {
1383 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1385 if (RI.getRegSizeInBits(*DstRC) == 32)
1386 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1387 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1388 return AMDGPU::S_MOV_B64;
1389 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1390 return AMDGPU::V_MOV_B64_PSEUDO;
1391 return AMDGPU::COPY;
1396 bool IsIndirectSrc)
const {
1397 if (IsIndirectSrc) {
1399 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1401 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1403 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1405 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1407 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1409 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1411 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1420 if (VecSize <= 1024)
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1427 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1429 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1431 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1433 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1435 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1437 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1439 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1448 if (VecSize <= 1024)
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1456 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1458 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1460 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1462 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1464 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1466 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1468 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1477 if (VecSize <= 1024)
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1485 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1487 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1489 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1491 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1493 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1495 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1497 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1506 if (VecSize <= 1024)
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1529 bool IsSGPR)
const {
1541 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1548 return AMDGPU::SI_SPILL_S32_SAVE;
1550 return AMDGPU::SI_SPILL_S64_SAVE;
1552 return AMDGPU::SI_SPILL_S96_SAVE;
1554 return AMDGPU::SI_SPILL_S128_SAVE;
1556 return AMDGPU::SI_SPILL_S160_SAVE;
1558 return AMDGPU::SI_SPILL_S192_SAVE;
1560 return AMDGPU::SI_SPILL_S224_SAVE;
1562 return AMDGPU::SI_SPILL_S256_SAVE;
1564 return AMDGPU::SI_SPILL_S288_SAVE;
1566 return AMDGPU::SI_SPILL_S320_SAVE;
1568 return AMDGPU::SI_SPILL_S352_SAVE;
1570 return AMDGPU::SI_SPILL_S384_SAVE;
1572 return AMDGPU::SI_SPILL_S512_SAVE;
1574 return AMDGPU::SI_SPILL_S1024_SAVE;
1583 return AMDGPU::SI_SPILL_V16_SAVE;
1585 return AMDGPU::SI_SPILL_V32_SAVE;
1587 return AMDGPU::SI_SPILL_V64_SAVE;
1589 return AMDGPU::SI_SPILL_V96_SAVE;
1591 return AMDGPU::SI_SPILL_V128_SAVE;
1593 return AMDGPU::SI_SPILL_V160_SAVE;
1595 return AMDGPU::SI_SPILL_V192_SAVE;
1597 return AMDGPU::SI_SPILL_V224_SAVE;
1599 return AMDGPU::SI_SPILL_V256_SAVE;
1601 return AMDGPU::SI_SPILL_V288_SAVE;
1603 return AMDGPU::SI_SPILL_V320_SAVE;
1605 return AMDGPU::SI_SPILL_V352_SAVE;
1607 return AMDGPU::SI_SPILL_V384_SAVE;
1609 return AMDGPU::SI_SPILL_V512_SAVE;
1611 return AMDGPU::SI_SPILL_V1024_SAVE;
1620 return AMDGPU::SI_SPILL_AV32_SAVE;
1622 return AMDGPU::SI_SPILL_AV64_SAVE;
1624 return AMDGPU::SI_SPILL_AV96_SAVE;
1626 return AMDGPU::SI_SPILL_AV128_SAVE;
1628 return AMDGPU::SI_SPILL_AV160_SAVE;
1630 return AMDGPU::SI_SPILL_AV192_SAVE;
1632 return AMDGPU::SI_SPILL_AV224_SAVE;
1634 return AMDGPU::SI_SPILL_AV256_SAVE;
1636 return AMDGPU::SI_SPILL_AV288_SAVE;
1638 return AMDGPU::SI_SPILL_AV320_SAVE;
1640 return AMDGPU::SI_SPILL_AV352_SAVE;
1642 return AMDGPU::SI_SPILL_AV384_SAVE;
1644 return AMDGPU::SI_SPILL_AV512_SAVE;
1646 return AMDGPU::SI_SPILL_AV1024_SAVE;
1653 bool IsVectorSuperClass) {
1658 if (IsVectorSuperClass)
1659 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1661 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1667 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1674 if (ST.hasMAIInsts())
1694 FrameInfo.getObjectAlign(FrameIndex));
1695 unsigned SpillSize =
TRI->getSpillSize(*RC);
1698 if (RI.isSGPRClass(RC)) {
1700 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1701 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1702 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1710 if (SrcReg.
isVirtual() && SpillSize == 4) {
1711 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1720 if (RI.spillSGPRToVGPR())
1740 return AMDGPU::SI_SPILL_S32_RESTORE;
1742 return AMDGPU::SI_SPILL_S64_RESTORE;
1744 return AMDGPU::SI_SPILL_S96_RESTORE;
1746 return AMDGPU::SI_SPILL_S128_RESTORE;
1748 return AMDGPU::SI_SPILL_S160_RESTORE;
1750 return AMDGPU::SI_SPILL_S192_RESTORE;
1752 return AMDGPU::SI_SPILL_S224_RESTORE;
1754 return AMDGPU::SI_SPILL_S256_RESTORE;
1756 return AMDGPU::SI_SPILL_S288_RESTORE;
1758 return AMDGPU::SI_SPILL_S320_RESTORE;
1760 return AMDGPU::SI_SPILL_S352_RESTORE;
1762 return AMDGPU::SI_SPILL_S384_RESTORE;
1764 return AMDGPU::SI_SPILL_S512_RESTORE;
1766 return AMDGPU::SI_SPILL_S1024_RESTORE;
1775 return AMDGPU::SI_SPILL_V16_RESTORE;
1777 return AMDGPU::SI_SPILL_V32_RESTORE;
1779 return AMDGPU::SI_SPILL_V64_RESTORE;
1781 return AMDGPU::SI_SPILL_V96_RESTORE;
1783 return AMDGPU::SI_SPILL_V128_RESTORE;
1785 return AMDGPU::SI_SPILL_V160_RESTORE;
1787 return AMDGPU::SI_SPILL_V192_RESTORE;
1789 return AMDGPU::SI_SPILL_V224_RESTORE;
1791 return AMDGPU::SI_SPILL_V256_RESTORE;
1793 return AMDGPU::SI_SPILL_V288_RESTORE;
1795 return AMDGPU::SI_SPILL_V320_RESTORE;
1797 return AMDGPU::SI_SPILL_V352_RESTORE;
1799 return AMDGPU::SI_SPILL_V384_RESTORE;
1801 return AMDGPU::SI_SPILL_V512_RESTORE;
1803 return AMDGPU::SI_SPILL_V1024_RESTORE;
1812 return AMDGPU::SI_SPILL_AV32_RESTORE;
1814 return AMDGPU::SI_SPILL_AV64_RESTORE;
1816 return AMDGPU::SI_SPILL_AV96_RESTORE;
1818 return AMDGPU::SI_SPILL_AV128_RESTORE;
1820 return AMDGPU::SI_SPILL_AV160_RESTORE;
1822 return AMDGPU::SI_SPILL_AV192_RESTORE;
1824 return AMDGPU::SI_SPILL_AV224_RESTORE;
1826 return AMDGPU::SI_SPILL_AV256_RESTORE;
1828 return AMDGPU::SI_SPILL_AV288_RESTORE;
1830 return AMDGPU::SI_SPILL_AV320_RESTORE;
1832 return AMDGPU::SI_SPILL_AV352_RESTORE;
1834 return AMDGPU::SI_SPILL_AV384_RESTORE;
1836 return AMDGPU::SI_SPILL_AV512_RESTORE;
1838 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1845 bool IsVectorSuperClass) {
1850 if (IsVectorSuperClass)
1851 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1853 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1859 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1866 if (ST.hasMAIInsts())
1869 assert(!RI.isAGPRClass(RC));
1884 unsigned SpillSize =
TRI->getSpillSize(*RC);
1891 FrameInfo.getObjectAlign(FrameIndex));
1893 if (RI.isSGPRClass(RC)) {
1895 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1896 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1897 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1902 if (DestReg.
isVirtual() && SpillSize == 4) {
1904 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1907 if (RI.spillSGPRToVGPR())
1933 unsigned Quantity)
const {
1935 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1936 while (Quantity > 0) {
1937 unsigned Arg = std::min(Quantity, MaxSNopCount);
1944 auto *MF =
MBB.getParent();
1947 assert(Info->isEntryFunction());
1949 if (
MBB.succ_empty()) {
1950 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1951 if (HasNoTerminator) {
1952 if (Info->returnsVoid()) {
1966 constexpr unsigned DoorbellIDMask = 0x3ff;
1967 constexpr unsigned ECQueueWaveAbort = 0x400;
1973 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1974 ContBB =
MBB.splitAt(
MI,
false);
1978 MBB.addSuccessor(TrapBB);
1985 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1989 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1992 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1993 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1997 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1998 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1999 .
addUse(DoorbellRegMasked)
2000 .
addImm(ECQueueWaveAbort);
2001 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2002 .
addUse(SetWaveAbortBit);
2005 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2020 switch (
MI.getOpcode()) {
2022 if (
MI.isMetaInstruction())
2027 return MI.getOperand(0).getImm() + 1;
2037 switch (
MI.getOpcode()) {
2039 case AMDGPU::S_MOV_B64_term:
2042 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2045 case AMDGPU::S_MOV_B32_term:
2048 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2051 case AMDGPU::S_XOR_B64_term:
2054 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2057 case AMDGPU::S_XOR_B32_term:
2060 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2062 case AMDGPU::S_OR_B64_term:
2065 MI.setDesc(
get(AMDGPU::S_OR_B64));
2067 case AMDGPU::S_OR_B32_term:
2070 MI.setDesc(
get(AMDGPU::S_OR_B32));
2073 case AMDGPU::S_ANDN2_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2079 case AMDGPU::S_ANDN2_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2085 case AMDGPU::S_AND_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_AND_B64));
2091 case AMDGPU::S_AND_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_AND_B32));
2097 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2100 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2103 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2106 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2109 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2110 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2113 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2114 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2116 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2120 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2123 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2126 int64_t Imm =
MI.getOperand(1).getImm();
2128 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2129 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2136 MI.eraseFromParent();
2142 case AMDGPU::V_MOV_B64_PSEUDO: {
2144 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2145 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2150 if (ST.hasMovB64()) {
2151 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2156 if (
SrcOp.isImm()) {
2158 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2159 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2181 if (ST.hasPkMovB32() &&
2202 MI.eraseFromParent();
2205 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2209 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2213 if (ST.has64BitLiterals()) {
2214 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2220 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2225 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2226 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2228 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2229 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2236 MI.eraseFromParent();
2239 case AMDGPU::V_SET_INACTIVE_B32: {
2243 .
add(
MI.getOperand(3))
2244 .
add(
MI.getOperand(4))
2245 .
add(
MI.getOperand(1))
2246 .
add(
MI.getOperand(2))
2247 .
add(
MI.getOperand(5));
2248 MI.eraseFromParent();
2251 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2252 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2253 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2254 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2255 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2256 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2257 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2258 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2259 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2260 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2263 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2268 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2269 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2270 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2271 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2272 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2273 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2274 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2283 if (RI.hasVGPRs(EltRC)) {
2284 Opc = AMDGPU::V_MOVRELD_B32_e32;
2286 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2287 : AMDGPU::S_MOVRELD_B32;
2292 bool IsUndef =
MI.getOperand(1).isUndef();
2293 unsigned SubReg =
MI.getOperand(3).getImm();
2294 assert(VecReg ==
MI.getOperand(1).getReg());
2299 .
add(
MI.getOperand(2))
2303 const int ImpDefIdx =
2305 const int ImpUseIdx = ImpDefIdx + 1;
2307 MI.eraseFromParent();
2310 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2311 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2321 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2322 assert(ST.useVGPRIndexMode());
2324 bool IsUndef =
MI.getOperand(1).isUndef();
2333 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2337 .
add(
MI.getOperand(2))
2342 const int ImpDefIdx =
2344 const int ImpUseIdx = ImpDefIdx + 1;
2351 MI.eraseFromParent();
2354 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2355 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2356 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2357 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2358 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2359 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2360 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2361 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2362 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2363 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2364 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2365 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2366 assert(ST.useVGPRIndexMode());
2369 bool IsUndef =
MI.getOperand(1).isUndef();
2387 MI.eraseFromParent();
2390 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2393 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2394 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2413 if (ST.hasGetPCZeroExtension()) {
2417 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2424 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2434 MI.eraseFromParent();
2437 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2447 Op.setOffset(
Op.getOffset() + 4);
2449 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2453 MI.eraseFromParent();
2456 case AMDGPU::ENTER_STRICT_WWM: {
2462 case AMDGPU::ENTER_STRICT_WQM: {
2469 MI.eraseFromParent();
2472 case AMDGPU::EXIT_STRICT_WWM:
2473 case AMDGPU::EXIT_STRICT_WQM: {
2479 case AMDGPU::SI_RETURN: {
2493 MI.eraseFromParent();
2497 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2498 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2499 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2502 case AMDGPU::S_GETPC_B64_pseudo:
2503 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2504 if (ST.hasGetPCZeroExtension()) {
2506 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2515 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2516 assert(ST.hasBF16PackedInsts());
2517 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2541 case AMDGPU::S_LOAD_DWORDX16_IMM:
2542 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2555 for (
auto &CandMO :
I->operands()) {
2556 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2564 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2568 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2572 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2574 unsigned NewOpcode = -1;
2575 if (SubregSize == 256)
2576 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2577 else if (SubregSize == 128)
2578 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2585 MRI.setRegClass(DestReg, NewRC);
2588 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2593 MI->getOperand(0).setReg(DestReg);
2594 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2598 OffsetMO->
setImm(FinalOffset);
2604 MI->setMemRefs(*MF, NewMMOs);
2617std::pair<MachineInstr*, MachineInstr*>
2619 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2621 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2624 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2625 return std::pair(&
MI,
nullptr);
2636 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2638 if (Dst.isPhysical()) {
2639 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2642 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2646 for (
unsigned I = 1;
I <= 2; ++
I) {
2649 if (
SrcOp.isImm()) {
2651 Imm.ashrInPlace(Part * 32);
2652 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2656 if (Src.isPhysical())
2657 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2664 MovDPP.addImm(MO.getImm());
2666 Split[Part] = MovDPP;
2670 if (Dst.isVirtual())
2677 MI.eraseFromParent();
2678 return std::pair(Split[0], Split[1]);
2681std::optional<DestSourcePair>
2683 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2686 return std::nullopt;
2690 AMDGPU::OpName Src0OpName,
2692 AMDGPU::OpName Src1OpName)
const {
2699 "All commutable instructions have both src0 and src1 modifiers");
2701 int Src0ModsVal = Src0Mods->
getImm();
2702 int Src1ModsVal = Src1Mods->
getImm();
2704 Src1Mods->
setImm(Src0ModsVal);
2705 Src0Mods->
setImm(Src1ModsVal);
2714 bool IsKill = RegOp.
isKill();
2716 bool IsUndef = RegOp.
isUndef();
2717 bool IsDebug = RegOp.
isDebug();
2719 if (NonRegOp.
isImm())
2721 else if (NonRegOp.
isFI())
2742 int64_t NonRegVal = NonRegOp1.
getImm();
2745 NonRegOp2.
setImm(NonRegVal);
2752 unsigned OpIdx1)
const {
2757 unsigned Opc =
MI.getOpcode();
2758 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2768 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2771 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2776 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2782 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2797 unsigned Src1Idx)
const {
2798 assert(!NewMI &&
"this should never be used");
2800 unsigned Opc =
MI.getOpcode();
2802 if (CommutedOpcode == -1)
2805 if (Src0Idx > Src1Idx)
2808 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2809 static_cast<int>(Src0Idx) &&
2810 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2811 static_cast<int>(Src1Idx) &&
2812 "inconsistency with findCommutedOpIndices");
2837 Src1, AMDGPU::OpName::src1_modifiers);
2840 AMDGPU::OpName::src1_sel);
2852 unsigned &SrcOpIdx0,
2853 unsigned &SrcOpIdx1)
const {
2858 unsigned &SrcOpIdx0,
2859 unsigned &SrcOpIdx1)
const {
2860 if (!
Desc.isCommutable())
2863 unsigned Opc =
Desc.getOpcode();
2864 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2868 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2872 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2876 int64_t BrOffset)
const {
2893 return MI.getOperand(0).getMBB();
2898 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2899 MI.getOpcode() == AMDGPU::SI_LOOP)
2911 "new block should be inserted for expanding unconditional branch");
2914 "restore block should be inserted for restoring clobbered registers");
2922 if (ST.hasAddPC64Inst()) {
2924 MCCtx.createTempSymbol(
"offset",
true);
2928 MCCtx.createTempSymbol(
"post_addpc",
true);
2929 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2933 Offset->setVariableValue(OffsetExpr);
2937 assert(RS &&
"RegScavenger required for long branching");
2941 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2945 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2946 ST.hasVALUReadSGPRHazard();
2947 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2948 if (FlushSGPRWrites)
2956 ApplyHazardWorkarounds();
2959 MCCtx.createTempSymbol(
"post_getpc",
true);
2963 MCCtx.createTempSymbol(
"offset_lo",
true);
2965 MCCtx.createTempSymbol(
"offset_hi",
true);
2968 .
addReg(PCReg, 0, AMDGPU::sub0)
2972 .
addReg(PCReg, 0, AMDGPU::sub1)
2974 ApplyHazardWorkarounds();
3015 if (LongBranchReservedReg) {
3017 Scav = LongBranchReservedReg;
3026 MRI.replaceRegWith(PCReg, Scav);
3027 MRI.clearVirtRegs();
3033 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3034 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3035 MRI.clearVirtRegs();
3050unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3052 case SIInstrInfo::SCC_TRUE:
3053 return AMDGPU::S_CBRANCH_SCC1;
3054 case SIInstrInfo::SCC_FALSE:
3055 return AMDGPU::S_CBRANCH_SCC0;
3056 case SIInstrInfo::VCCNZ:
3057 return AMDGPU::S_CBRANCH_VCCNZ;
3058 case SIInstrInfo::VCCZ:
3059 return AMDGPU::S_CBRANCH_VCCZ;
3060 case SIInstrInfo::EXECNZ:
3061 return AMDGPU::S_CBRANCH_EXECNZ;
3062 case SIInstrInfo::EXECZ:
3063 return AMDGPU::S_CBRANCH_EXECZ;
3069SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3071 case AMDGPU::S_CBRANCH_SCC0:
3073 case AMDGPU::S_CBRANCH_SCC1:
3075 case AMDGPU::S_CBRANCH_VCCNZ:
3077 case AMDGPU::S_CBRANCH_VCCZ:
3079 case AMDGPU::S_CBRANCH_EXECNZ:
3081 case AMDGPU::S_CBRANCH_EXECZ:
3093 bool AllowModify)
const {
3094 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3096 TBB =
I->getOperand(0).getMBB();
3100 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3101 if (Pred == INVALID_BR)
3106 Cond.push_back(
I->getOperand(1));
3110 if (
I ==
MBB.end()) {
3116 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3118 FBB =
I->getOperand(0).getMBB();
3128 bool AllowModify)
const {
3136 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3137 switch (
I->getOpcode()) {
3138 case AMDGPU::S_MOV_B64_term:
3139 case AMDGPU::S_XOR_B64_term:
3140 case AMDGPU::S_OR_B64_term:
3141 case AMDGPU::S_ANDN2_B64_term:
3142 case AMDGPU::S_AND_B64_term:
3143 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3144 case AMDGPU::S_MOV_B32_term:
3145 case AMDGPU::S_XOR_B32_term:
3146 case AMDGPU::S_OR_B32_term:
3147 case AMDGPU::S_ANDN2_B32_term:
3148 case AMDGPU::S_AND_B32_term:
3149 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3152 case AMDGPU::SI_ELSE:
3153 case AMDGPU::SI_KILL_I1_TERMINATOR:
3154 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3171 int *BytesRemoved)
const {
3173 unsigned RemovedSize = 0;
3176 if (
MI.isBranch() ||
MI.isReturn()) {
3178 MI.eraseFromParent();
3184 *BytesRemoved = RemovedSize;
3201 int *BytesAdded)
const {
3202 if (!FBB &&
Cond.empty()) {
3206 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3213 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3225 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3243 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3250 if (
Cond.size() != 2) {
3254 if (
Cond[0].isImm()) {
3265 Register FalseReg,
int &CondCycles,
3266 int &TrueCycles,
int &FalseCycles)
const {
3272 if (
MRI.getRegClass(FalseReg) != RC)
3276 CondCycles = TrueCycles = FalseCycles = NumInsts;
3279 return RI.hasVGPRs(RC) && NumInsts <= 6;
3287 if (
MRI.getRegClass(FalseReg) != RC)
3293 if (NumInsts % 2 == 0)
3296 CondCycles = TrueCycles = FalseCycles = NumInsts;
3297 return RI.isSGPRClass(RC);
3308 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3309 if (Pred == VCCZ || Pred == SCC_FALSE) {
3310 Pred =
static_cast<BranchPredicate
>(-Pred);
3316 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3318 if (DstSize == 32) {
3320 if (Pred == SCC_TRUE) {
3335 if (DstSize == 64 && Pred == SCC_TRUE) {
3345 static const int16_t Sub0_15[] = {
3346 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3347 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3348 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3349 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3352 static const int16_t Sub0_15_64[] = {
3353 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3354 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3355 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3356 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3359 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3361 const int16_t *SubIndices = Sub0_15;
3362 int NElts = DstSize / 32;
3366 if (Pred == SCC_TRUE) {
3368 SelOp = AMDGPU::S_CSELECT_B32;
3369 EltRC = &AMDGPU::SGPR_32RegClass;
3371 SelOp = AMDGPU::S_CSELECT_B64;
3372 EltRC = &AMDGPU::SGPR_64RegClass;
3373 SubIndices = Sub0_15_64;
3379 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3384 for (
int Idx = 0; Idx != NElts; ++Idx) {
3385 Register DstElt =
MRI.createVirtualRegister(EltRC);
3388 unsigned SubIdx = SubIndices[Idx];
3391 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3394 .
addReg(FalseReg, 0, SubIdx)
3395 .
addReg(TrueReg, 0, SubIdx);
3399 .
addReg(TrueReg, 0, SubIdx)
3400 .
addReg(FalseReg, 0, SubIdx);
3412 switch (
MI.getOpcode()) {
3413 case AMDGPU::V_MOV_B16_t16_e32:
3414 case AMDGPU::V_MOV_B16_t16_e64:
3415 case AMDGPU::V_MOV_B32_e32:
3416 case AMDGPU::V_MOV_B32_e64:
3417 case AMDGPU::V_MOV_B64_PSEUDO:
3418 case AMDGPU::V_MOV_B64_e32:
3419 case AMDGPU::V_MOV_B64_e64:
3420 case AMDGPU::S_MOV_B32:
3421 case AMDGPU::S_MOV_B64:
3422 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3424 case AMDGPU::WWM_COPY:
3425 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3426 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3427 case AMDGPU::V_ACCVGPR_MOV_B32:
3428 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3429 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3437 switch (
MI.getOpcode()) {
3438 case AMDGPU::V_MOV_B16_t16_e32:
3439 case AMDGPU::V_MOV_B16_t16_e64:
3441 case AMDGPU::V_MOV_B32_e32:
3442 case AMDGPU::V_MOV_B32_e64:
3443 case AMDGPU::V_MOV_B64_PSEUDO:
3444 case AMDGPU::V_MOV_B64_e32:
3445 case AMDGPU::V_MOV_B64_e64:
3446 case AMDGPU::S_MOV_B32:
3447 case AMDGPU::S_MOV_B64:
3448 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3450 case AMDGPU::WWM_COPY:
3451 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3452 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3453 case AMDGPU::V_ACCVGPR_MOV_B32:
3454 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3455 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3463 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3464 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3465 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3468 unsigned Opc =
MI.getOpcode();
3470 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3472 MI.removeOperand(Idx);
3477 unsigned SubRegIndex) {
3478 switch (SubRegIndex) {
3479 case AMDGPU::NoSubRegister:
3489 case AMDGPU::sub1_lo16:
3491 case AMDGPU::sub1_hi16:
3494 return std::nullopt;
3502 case AMDGPU::V_MAC_F16_e32:
3503 case AMDGPU::V_MAC_F16_e64:
3504 case AMDGPU::V_MAD_F16_e64:
3505 return AMDGPU::V_MADAK_F16;
3506 case AMDGPU::V_MAC_F32_e32:
3507 case AMDGPU::V_MAC_F32_e64:
3508 case AMDGPU::V_MAD_F32_e64:
3509 return AMDGPU::V_MADAK_F32;
3510 case AMDGPU::V_FMAC_F32_e32:
3511 case AMDGPU::V_FMAC_F32_e64:
3512 case AMDGPU::V_FMA_F32_e64:
3513 return AMDGPU::V_FMAAK_F32;
3514 case AMDGPU::V_FMAC_F16_e32:
3515 case AMDGPU::V_FMAC_F16_e64:
3516 case AMDGPU::V_FMAC_F16_t16_e64:
3517 case AMDGPU::V_FMAC_F16_fake16_e64:
3518 case AMDGPU::V_FMA_F16_e64:
3519 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3520 ? AMDGPU::V_FMAAK_F16_t16
3521 : AMDGPU::V_FMAAK_F16_fake16
3522 : AMDGPU::V_FMAAK_F16;
3523 case AMDGPU::V_FMAC_F64_e32:
3524 case AMDGPU::V_FMAC_F64_e64:
3525 case AMDGPU::V_FMA_F64_e64:
3526 return AMDGPU::V_FMAAK_F64;
3534 case AMDGPU::V_MAC_F16_e32:
3535 case AMDGPU::V_MAC_F16_e64:
3536 case AMDGPU::V_MAD_F16_e64:
3537 return AMDGPU::V_MADMK_F16;
3538 case AMDGPU::V_MAC_F32_e32:
3539 case AMDGPU::V_MAC_F32_e64:
3540 case AMDGPU::V_MAD_F32_e64:
3541 return AMDGPU::V_MADMK_F32;
3542 case AMDGPU::V_FMAC_F32_e32:
3543 case AMDGPU::V_FMAC_F32_e64:
3544 case AMDGPU::V_FMA_F32_e64:
3545 return AMDGPU::V_FMAMK_F32;
3546 case AMDGPU::V_FMAC_F16_e32:
3547 case AMDGPU::V_FMAC_F16_e64:
3548 case AMDGPU::V_FMAC_F16_t16_e64:
3549 case AMDGPU::V_FMAC_F16_fake16_e64:
3550 case AMDGPU::V_FMA_F16_e64:
3551 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3552 ? AMDGPU::V_FMAMK_F16_t16
3553 : AMDGPU::V_FMAMK_F16_fake16
3554 : AMDGPU::V_FMAMK_F16;
3555 case AMDGPU::V_FMAC_F64_e32:
3556 case AMDGPU::V_FMAC_F64_e64:
3557 case AMDGPU::V_FMA_F64_e64:
3558 return AMDGPU::V_FMAMK_F64;
3570 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3572 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3575 if (
Opc == AMDGPU::COPY) {
3576 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3583 if (HasMultipleUses) {
3586 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3589 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3597 if (ImmDefSize == 32 &&
3602 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3603 RI.getSubRegIdxSize(UseSubReg) == 16;
3606 if (RI.hasVGPRs(DstRC))
3609 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3615 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3622 for (
unsigned MovOp :
3623 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3624 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3632 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3636 if (MovDstPhysReg) {
3640 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3647 if (MovDstPhysReg) {
3648 if (!MovDstRC->
contains(MovDstPhysReg))
3650 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3664 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3672 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3676 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3678 UseMI.getOperand(0).setReg(MovDstPhysReg);
3683 UseMI.setDesc(NewMCID);
3684 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3685 UseMI.addImplicitDefUseOperands(*MF);
3689 if (HasMultipleUses)
3692 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3693 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3694 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3695 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3696 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3697 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3698 Opc == AMDGPU::V_FMAC_F64_e64) {
3707 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3722 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3723 if (!RegSrc->
isReg())
3725 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3726 ST.getConstantBusLimit(
Opc) < 2)
3729 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3741 if (Def && Def->isMoveImmediate() &&
3752 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3753 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3763 unsigned SrcSubReg = RegSrc->
getSubReg();
3768 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3769 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3770 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3771 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3772 UseMI.untieRegOperand(
3773 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3780 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3782 DefMI.eraseFromParent();
3789 if (ST.getConstantBusLimit(
Opc) < 2) {
3792 bool Src0Inlined =
false;
3793 if (Src0->
isReg()) {
3798 if (Def && Def->isMoveImmediate() &&
3803 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3810 if (Src1->
isReg() && !Src0Inlined) {
3813 if (Def && Def->isMoveImmediate() &&
3815 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3817 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3830 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3831 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3837 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3838 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3839 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3840 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3841 UseMI.untieRegOperand(
3842 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3844 const std::optional<int64_t> SubRegImm =
3858 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3860 DefMI.eraseFromParent();
3872 if (BaseOps1.
size() != BaseOps2.
size())
3874 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3875 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3883 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3884 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3885 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3887 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3890bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3893 int64_t Offset0, Offset1;
3896 bool Offset0IsScalable, Offset1IsScalable;
3910 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3911 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3918 "MIa must load from or modify a memory location");
3920 "MIb must load from or modify a memory location");
3939 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3946 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3956 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3970 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3981 if (
Reg.isPhysical())
3983 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3985 Imm = Def->getOperand(1).getImm();
4005 unsigned NumOps =
MI.getNumOperands();
4008 if (
Op.isReg() &&
Op.isKill())
4016 case AMDGPU::V_MAC_F16_e32:
4017 case AMDGPU::V_MAC_F16_e64:
4018 return AMDGPU::V_MAD_F16_e64;
4019 case AMDGPU::V_MAC_F32_e32:
4020 case AMDGPU::V_MAC_F32_e64:
4021 return AMDGPU::V_MAD_F32_e64;
4022 case AMDGPU::V_MAC_LEGACY_F32_e32:
4023 case AMDGPU::V_MAC_LEGACY_F32_e64:
4024 return AMDGPU::V_MAD_LEGACY_F32_e64;
4025 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4026 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4027 return AMDGPU::V_FMA_LEGACY_F32_e64;
4028 case AMDGPU::V_FMAC_F16_e32:
4029 case AMDGPU::V_FMAC_F16_e64:
4030 case AMDGPU::V_FMAC_F16_t16_e64:
4031 case AMDGPU::V_FMAC_F16_fake16_e64:
4032 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4033 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4034 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4035 : AMDGPU::V_FMA_F16_gfx9_e64;
4036 case AMDGPU::V_FMAC_F32_e32:
4037 case AMDGPU::V_FMAC_F32_e64:
4038 return AMDGPU::V_FMA_F32_e64;
4039 case AMDGPU::V_FMAC_F64_e32:
4040 case AMDGPU::V_FMAC_F64_e64:
4041 return AMDGPU::V_FMA_F64_e64;
4051 unsigned Opc =
MI.getOpcode();
4055 if (NewMFMAOpc != -1) {
4058 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4059 MIB.
add(
MI.getOperand(
I));
4065 if (Def.isEarlyClobber() && Def.isReg() &&
4070 auto UpdateDefIndex = [&](
LiveRange &LR) {
4071 auto *S = LR.find(OldIndex);
4072 if (S != LR.end() && S->start == OldIndex) {
4073 assert(S->valno && S->valno->def == OldIndex);
4074 S->start = NewIndex;
4075 S->valno->def = NewIndex;
4079 for (
auto &SR : LI.subranges())
4090 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4100 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4101 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4102 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4106 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4107 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4108 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4109 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4110 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4111 bool Src0Literal =
false;
4116 case AMDGPU::V_MAC_F16_e64:
4117 case AMDGPU::V_FMAC_F16_e64:
4118 case AMDGPU::V_FMAC_F16_t16_e64:
4119 case AMDGPU::V_FMAC_F16_fake16_e64:
4120 case AMDGPU::V_MAC_F32_e64:
4121 case AMDGPU::V_MAC_LEGACY_F32_e64:
4122 case AMDGPU::V_FMAC_F32_e64:
4123 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4124 case AMDGPU::V_FMAC_F64_e64:
4126 case AMDGPU::V_MAC_F16_e32:
4127 case AMDGPU::V_FMAC_F16_e32:
4128 case AMDGPU::V_MAC_F32_e32:
4129 case AMDGPU::V_MAC_LEGACY_F32_e32:
4130 case AMDGPU::V_FMAC_F32_e32:
4131 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4132 case AMDGPU::V_FMAC_F64_e32: {
4133 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4134 AMDGPU::OpName::src0);
4161 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4162 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4164 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4165 !RI.isSGPRReg(
MBB.getParent()->getRegInfo(), Src0->
getReg()))) {
4167 const auto killDef = [&]() ->
void {
4172 if (
MRI.hasOneNonDBGUse(DefReg)) {
4174 DefMI->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4175 DefMI->getOperand(0).setIsDead(
true);
4176 for (
unsigned I =
DefMI->getNumOperands() - 1;
I != 0; --
I)
4189 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4191 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4192 MIOp.setIsUndef(
true);
4193 MIOp.setReg(DummyReg);
4242 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4263 if (Src0Literal && !ST.hasVOP3Literal())
4283 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4294 switch (
MI.getOpcode()) {
4295 case AMDGPU::S_SET_GPR_IDX_ON:
4296 case AMDGPU::S_SET_GPR_IDX_MODE:
4297 case AMDGPU::S_SET_GPR_IDX_OFF:
4315 if (
MI.isTerminator() ||
MI.isPosition())
4319 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4322 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4328 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4329 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4330 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4331 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4332 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4337 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4338 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4339 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4347 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4356 if (
MI.memoperands_empty())
4361 unsigned AS = Memop->getAddrSpace();
4362 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4363 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4364 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4365 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4380 if (
MI.memoperands_empty())
4389 unsigned AS = Memop->getAddrSpace();
4406 if (ST.isTgSplitEnabled())
4411 if (
MI.memoperands_empty())
4416 unsigned AS = Memop->getAddrSpace();
4432 unsigned Opcode =
MI.getOpcode();
4447 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4448 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4449 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4452 if (
MI.isCall() ||
MI.isInlineAsm())
4468 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4469 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4470 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4471 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4479 if (
MI.isMetaInstruction())
4483 if (
MI.isCopyLike()) {
4484 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4488 return MI.readsRegister(AMDGPU::EXEC, &RI);
4499 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4503 switch (Imm.getBitWidth()) {
4509 ST.hasInv2PiInlineImm());
4512 ST.hasInv2PiInlineImm());
4514 return ST.has16BitInsts() &&
4516 ST.hasInv2PiInlineImm());
4523 APInt IntImm = Imm.bitcastToAPInt();
4525 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4533 return ST.has16BitInsts() &&
4536 return ST.has16BitInsts() &&
4546 switch (OperandType) {
4556 int32_t Trunc =
static_cast<int32_t
>(Imm);
4596 int16_t Trunc =
static_cast<int16_t
>(Imm);
4597 return ST.has16BitInsts() &&
4606 int16_t Trunc =
static_cast<int16_t
>(Imm);
4607 return ST.has16BitInsts() &&
4658 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4664 return ST.hasVOP3Literal();
4668 int64_t ImmVal)
const {
4671 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4672 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4673 AMDGPU::OpName::src2))
4675 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4687 "unexpected imm-like operand kind");
4700 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4718 AMDGPU::OpName
OpName)
const {
4720 return Mods && Mods->
getImm();
4733 switch (
MI.getOpcode()) {
4734 default:
return false;
4736 case AMDGPU::V_ADDC_U32_e64:
4737 case AMDGPU::V_SUBB_U32_e64:
4738 case AMDGPU::V_SUBBREV_U32_e64: {
4746 case AMDGPU::V_MAC_F16_e64:
4747 case AMDGPU::V_MAC_F32_e64:
4748 case AMDGPU::V_MAC_LEGACY_F32_e64:
4749 case AMDGPU::V_FMAC_F16_e64:
4750 case AMDGPU::V_FMAC_F16_t16_e64:
4751 case AMDGPU::V_FMAC_F16_fake16_e64:
4752 case AMDGPU::V_FMAC_F32_e64:
4753 case AMDGPU::V_FMAC_F64_e64:
4754 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4760 case AMDGPU::V_CNDMASK_B32_e64:
4766 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4796 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4805 unsigned Op32)
const {
4819 Inst32.
add(
MI.getOperand(
I));
4823 int Idx =
MI.getNumExplicitDefs();
4825 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4830 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4852 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4860 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4863 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4864 AMDGPU::SReg_64RegClass.contains(Reg);
4870 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4882 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4892 switch (MO.getReg()) {
4894 case AMDGPU::VCC_LO:
4895 case AMDGPU::VCC_HI:
4897 case AMDGPU::FLAT_SCR:
4910 switch (
MI.getOpcode()) {
4911 case AMDGPU::V_READLANE_B32:
4912 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4913 case AMDGPU::V_WRITELANE_B32:
4914 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4921 if (
MI.isPreISelOpcode() ||
4922 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4937 if (
SubReg.getReg().isPhysical())
4940 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4951 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
4952 ErrInfo =
"illegal copy from vector register to SGPR";
4970 if (!
MRI.isSSA() &&
MI.isCopy())
4971 return verifyCopy(
MI,
MRI, ErrInfo);
4973 if (SIInstrInfo::isGenericOpcode(Opcode))
4976 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4977 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4978 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4980 if (Src0Idx == -1) {
4982 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4983 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4984 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4985 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4990 if (!
Desc.isVariadic() &&
4991 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4992 ErrInfo =
"Instruction has wrong number of operands.";
4996 if (
MI.isInlineAsm()) {
5009 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5010 ErrInfo =
"inlineasm operand has incorrect register class.";
5018 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5019 ErrInfo =
"missing memory operand from image instruction.";
5024 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5027 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5028 "all fp values to integers.";
5032 int RegClass =
Desc.operands()[i].RegClass;
5035 switch (OpInfo.OperandType) {
5037 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5038 ErrInfo =
"Illegal immediate value for operand.";
5072 ErrInfo =
"Illegal immediate value for operand.";
5079 ErrInfo =
"Expected inline constant for operand.";
5094 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5095 ErrInfo =
"Expected immediate, but got non-immediate";
5104 if (OpInfo.isGenericType())
5119 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5121 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5123 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5124 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5131 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5132 ErrInfo =
"Subtarget requires even aligned vector registers";
5137 if (RegClass != -1) {
5138 if (Reg.isVirtual())
5143 ErrInfo =
"Operand has incorrect register class.";
5151 if (!ST.hasSDWA()) {
5152 ErrInfo =
"SDWA is not supported on this target";
5156 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5157 AMDGPU::OpName::dst_sel}) {
5161 int64_t Imm = MO->
getImm();
5163 ErrInfo =
"Invalid SDWA selection";
5168 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5170 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5175 if (!ST.hasSDWAScalar()) {
5177 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5178 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5185 "Only reg allowed as operands in SDWA instructions on GFX9+";
5191 if (!ST.hasSDWAOmod()) {
5194 if (OMod !=
nullptr &&
5196 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5201 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5202 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5203 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5204 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5207 unsigned Mods = Src0ModsMO->
getImm();
5210 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5216 if (
isVOPC(BasicOpcode)) {
5217 if (!ST.hasSDWASdst() && DstIdx != -1) {
5220 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5221 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5224 }
else if (!ST.hasSDWAOutModsVOPC()) {
5227 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5228 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5234 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5235 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5242 if (DstUnused && DstUnused->isImm() &&
5245 if (!Dst.isReg() || !Dst.isTied()) {
5246 ErrInfo =
"Dst register should have tied register";
5251 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5254 "Dst register should be tied to implicit use of preserved register";
5258 ErrInfo =
"Dst register should use same physical register as preserved";
5265 if (
isImage(Opcode) && !
MI.mayStore()) {
5277 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5285 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5289 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5290 if (RegCount > DstSize) {
5291 ErrInfo =
"Image instruction returns too many registers for dst "
5300 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5301 unsigned ConstantBusCount = 0;
5302 bool UsesLiteral =
false;
5305 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5309 LiteralVal = &
MI.getOperand(ImmIdx);
5318 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5329 }
else if (!MO.
isFI()) {
5336 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5346 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5347 return !RI.regsOverlap(SGPRUsed, SGPR);
5356 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5357 Opcode != AMDGPU::V_WRITELANE_B32) {
5358 ErrInfo =
"VOP* instruction violates constant bus restriction";
5362 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5363 ErrInfo =
"VOP3 instruction uses literal";
5370 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5371 unsigned SGPRCount = 0;
5374 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5382 if (MO.
getReg() != SGPRUsed)
5387 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5388 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5395 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5396 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5403 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5413 ErrInfo =
"ABS not allowed in VOP3B instructions";
5426 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5433 if (
Desc.isBranch()) {
5435 ErrInfo =
"invalid branch target for SOPK instruction";
5442 ErrInfo =
"invalid immediate for SOPK instruction";
5447 ErrInfo =
"invalid immediate for SOPK instruction";
5454 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5455 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5456 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5457 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5458 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5459 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5461 const unsigned StaticNumOps =
5462 Desc.getNumOperands() +
Desc.implicit_uses().size();
5463 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5468 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5469 ErrInfo =
"missing implicit register operands";
5475 if (!Dst->isUse()) {
5476 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5481 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5482 UseOpIdx != StaticNumOps + 1) {
5483 ErrInfo =
"movrel implicit operands should be tied";
5490 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5492 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5493 ErrInfo =
"src0 should be subreg of implicit vector use";
5501 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5502 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5508 if (
MI.mayStore() &&
5513 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5514 ErrInfo =
"scalar stores must use m0 as offset register";
5520 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5522 if (
Offset->getImm() != 0) {
5523 ErrInfo =
"subtarget does not support offsets in flat instructions";
5528 if (
isDS(
MI) && !ST.hasGDS()) {
5530 if (GDSOp && GDSOp->
getImm() != 0) {
5531 ErrInfo =
"GDS is not supported on this subtarget";
5539 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5540 AMDGPU::OpName::vaddr0);
5541 AMDGPU::OpName RSrcOpName =
5542 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5543 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5551 ErrInfo =
"dim is out of range";
5556 if (ST.hasR128A16()) {
5558 IsA16 = R128A16->
getImm() != 0;
5559 }
else if (ST.hasA16()) {
5561 IsA16 = A16->
getImm() != 0;
5564 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5566 unsigned AddrWords =
5569 unsigned VAddrWords;
5571 VAddrWords = RsrcIdx - VAddr0Idx;
5572 if (ST.hasPartialNSAEncoding() &&
5574 unsigned LastVAddrIdx = RsrcIdx - 1;
5575 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5583 if (VAddrWords != AddrWords) {
5585 <<
" but got " << VAddrWords <<
"\n");
5586 ErrInfo =
"bad vaddr size";
5596 unsigned DC = DppCt->
getImm();
5597 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5598 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5599 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5600 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5601 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5602 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5603 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5604 ErrInfo =
"Invalid dpp_ctrl value";
5607 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5609 ErrInfo =
"Invalid dpp_ctrl value: "
5610 "wavefront shifts are not supported on GFX10+";
5613 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5615 ErrInfo =
"Invalid dpp_ctrl value: "
5616 "broadcasts are not supported on GFX10+";
5619 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5621 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5622 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5623 !ST.hasGFX90AInsts()) {
5624 ErrInfo =
"Invalid dpp_ctrl value: "
5625 "row_newbroadcast/row_share is not supported before "
5629 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5630 ErrInfo =
"Invalid dpp_ctrl value: "
5631 "row_share and row_xmask are not supported before GFX10";
5636 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5639 ErrInfo =
"Invalid dpp_ctrl value: "
5640 "DP ALU dpp only support row_newbcast";
5647 AMDGPU::OpName DataName =
5648 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5654 if (ST.hasGFX90AInsts()) {
5655 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5656 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5657 ErrInfo =
"Invalid register class: "
5658 "vdata and vdst should be both VGPR or AGPR";
5661 if (
Data && Data2 &&
5663 ErrInfo =
"Invalid register class: "
5664 "both data operands should be VGPR or AGPR";
5668 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5670 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5671 ErrInfo =
"Invalid register class: "
5672 "agpr loads and stores not supported on this GPU";
5678 if (ST.needsAlignedVGPRs()) {
5679 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5684 if (Reg.isPhysical())
5685 return !(RI.getHWRegIndex(Reg) & 1);
5687 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5688 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5691 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5692 Opcode == AMDGPU::DS_GWS_BARRIER) {
5694 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5695 ErrInfo =
"Subtarget requires even aligned vector registers "
5696 "for DS_GWS instructions";
5702 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5703 ErrInfo =
"Subtarget requires even aligned vector registers "
5704 "for vaddr operand of image instructions";
5710 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5712 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5713 ErrInfo =
"Invalid register class: "
5714 "v_accvgpr_write with an SGPR is not supported on this GPU";
5719 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5722 ErrInfo =
"pseudo expects only physical SGPRs";
5729 if (!ST.hasScaleOffset()) {
5730 ErrInfo =
"Subtarget does not support offset scaling";
5734 ErrInfo =
"Instruction does not support offset scaling";
5743 for (
unsigned I = 0;
I < 3; ++
I) {
5756 switch (
MI.getOpcode()) {
5757 default:
return AMDGPU::INSTRUCTION_LIST_END;
5758 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5759 case AMDGPU::COPY:
return AMDGPU::COPY;
5760 case AMDGPU::PHI:
return AMDGPU::PHI;
5761 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5762 case AMDGPU::WQM:
return AMDGPU::WQM;
5763 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5764 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5765 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5766 case AMDGPU::S_MOV_B32: {
5768 return MI.getOperand(1).isReg() ||
5769 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5770 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5772 case AMDGPU::S_ADD_I32:
5773 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5774 case AMDGPU::S_ADDC_U32:
5775 return AMDGPU::V_ADDC_U32_e32;
5776 case AMDGPU::S_SUB_I32:
5777 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5780 case AMDGPU::S_ADD_U32:
5781 return AMDGPU::V_ADD_CO_U32_e32;
5782 case AMDGPU::S_SUB_U32:
5783 return AMDGPU::V_SUB_CO_U32_e32;
5784 case AMDGPU::S_ADD_U64_PSEUDO:
5785 return AMDGPU::V_ADD_U64_PSEUDO;
5786 case AMDGPU::S_SUB_U64_PSEUDO:
5787 return AMDGPU::V_SUB_U64_PSEUDO;
5788 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5789 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5790 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5791 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5792 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5793 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5794 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5795 case AMDGPU::S_XNOR_B32:
5796 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5797 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5798 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5799 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5800 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5801 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5802 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5803 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5804 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5805 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5806 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5807 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5808 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5809 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5810 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5811 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5812 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5813 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5814 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5815 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5816 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5817 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5818 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5819 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5820 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5821 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5822 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5823 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5824 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5825 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5826 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5827 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5828 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5829 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5830 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5831 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5832 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5833 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5834 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5835 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5836 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5837 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5838 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5839 case AMDGPU::S_CVT_F32_F16:
5840 case AMDGPU::S_CVT_HI_F32_F16:
5841 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5842 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5843 case AMDGPU::S_CVT_F16_F32:
5844 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5845 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5846 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5847 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5848 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5849 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5850 case AMDGPU::S_CEIL_F16:
5851 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5852 : AMDGPU::V_CEIL_F16_fake16_e64;
5853 case AMDGPU::S_FLOOR_F16:
5854 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5855 : AMDGPU::V_FLOOR_F16_fake16_e64;
5856 case AMDGPU::S_TRUNC_F16:
5857 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5858 : AMDGPU::V_TRUNC_F16_fake16_e64;
5859 case AMDGPU::S_RNDNE_F16:
5860 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5861 : AMDGPU::V_RNDNE_F16_fake16_e64;
5862 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5863 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5864 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5865 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5866 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5867 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5868 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5869 case AMDGPU::S_ADD_F16:
5870 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5871 : AMDGPU::V_ADD_F16_fake16_e64;
5872 case AMDGPU::S_SUB_F16:
5873 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5874 : AMDGPU::V_SUB_F16_fake16_e64;
5875 case AMDGPU::S_MIN_F16:
5876 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5877 : AMDGPU::V_MIN_F16_fake16_e64;
5878 case AMDGPU::S_MAX_F16:
5879 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5880 : AMDGPU::V_MAX_F16_fake16_e64;
5881 case AMDGPU::S_MINIMUM_F16:
5882 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5883 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5884 case AMDGPU::S_MAXIMUM_F16:
5885 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5886 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5887 case AMDGPU::S_MUL_F16:
5888 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5889 : AMDGPU::V_MUL_F16_fake16_e64;
5890 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5891 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5892 case AMDGPU::S_FMAC_F16:
5893 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5894 : AMDGPU::V_FMAC_F16_fake16_e64;
5895 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5896 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5897 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5898 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5899 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5900 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5901 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5902 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5903 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5904 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5905 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5906 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5907 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5908 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5909 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5910 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5911 case AMDGPU::S_CMP_LT_F16:
5912 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5913 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5914 case AMDGPU::S_CMP_EQ_F16:
5915 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5916 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5917 case AMDGPU::S_CMP_LE_F16:
5918 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5919 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5920 case AMDGPU::S_CMP_GT_F16:
5921 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5922 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5923 case AMDGPU::S_CMP_LG_F16:
5924 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5925 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5926 case AMDGPU::S_CMP_GE_F16:
5927 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5928 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5929 case AMDGPU::S_CMP_O_F16:
5930 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5931 : AMDGPU::V_CMP_O_F16_fake16_e64;
5932 case AMDGPU::S_CMP_U_F16:
5933 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5934 : AMDGPU::V_CMP_U_F16_fake16_e64;
5935 case AMDGPU::S_CMP_NGE_F16:
5936 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5937 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5938 case AMDGPU::S_CMP_NLG_F16:
5939 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5940 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5941 case AMDGPU::S_CMP_NGT_F16:
5942 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5943 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5944 case AMDGPU::S_CMP_NLE_F16:
5945 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5946 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5947 case AMDGPU::S_CMP_NEQ_F16:
5948 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5949 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5950 case AMDGPU::S_CMP_NLT_F16:
5951 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5952 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5953 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5954 case AMDGPU::V_S_EXP_F16_e64:
5955 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5956 : AMDGPU::V_EXP_F16_fake16_e64;
5957 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5958 case AMDGPU::V_S_LOG_F16_e64:
5959 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5960 : AMDGPU::V_LOG_F16_fake16_e64;
5961 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5962 case AMDGPU::V_S_RCP_F16_e64:
5963 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5964 : AMDGPU::V_RCP_F16_fake16_e64;
5965 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5966 case AMDGPU::V_S_RSQ_F16_e64:
5967 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5968 : AMDGPU::V_RSQ_F16_fake16_e64;
5969 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5970 case AMDGPU::V_S_SQRT_F16_e64:
5971 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5972 : AMDGPU::V_SQRT_F16_fake16_e64;
5975 "Unexpected scalar opcode without corresponding vector one!");
6024 "Not a whole wave func");
6027 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6028 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6039 case AMDGPU::AV_32RegClassID:
6040 RCID = AMDGPU::VGPR_32RegClassID;
6042 case AMDGPU::AV_64RegClassID:
6043 RCID = AMDGPU::VReg_64RegClassID;
6045 case AMDGPU::AV_96RegClassID:
6046 RCID = AMDGPU::VReg_96RegClassID;
6048 case AMDGPU::AV_128RegClassID:
6049 RCID = AMDGPU::VReg_128RegClassID;
6051 case AMDGPU::AV_160RegClassID:
6052 RCID = AMDGPU::VReg_160RegClassID;
6054 case AMDGPU::AV_512RegClassID:
6055 RCID = AMDGPU::VReg_512RegClassID;
6070 auto RegClass = TID.
operands()[OpNum].RegClass;
6073 return RI.getRegClass(RegClass);
6079 unsigned OpNo)
const {
6081 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6082 Desc.operands()[OpNo].RegClass == -1) {
6085 if (Reg.isVirtual()) {
6087 MI.getParent()->getParent()->getRegInfo();
6088 return MRI.getRegClass(Reg);
6090 return RI.getPhysRegBaseClass(Reg);
6093 unsigned RCID =
Desc.operands()[OpNo].RegClass;
6102 unsigned RCID =
get(
MI.getOpcode()).operands()[
OpIdx].RegClass;
6104 unsigned Size = RI.getRegSizeInBits(*RC);
6105 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6106 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6107 : AMDGPU::V_MOV_B32_e32;
6109 Opcode = AMDGPU::COPY;
6110 else if (RI.isSGPRClass(RC))
6111 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6125 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6131 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6142 if (SubIdx == AMDGPU::sub0)
6144 if (SubIdx == AMDGPU::sub1)
6156void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6172 if (Reg.isPhysical())
6182 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6185 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6192 unsigned Opc =
MI.getOpcode();
6198 constexpr const AMDGPU::OpName OpNames[] = {
6199 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6202 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6203 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6213 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6214 if (IsAGPR && !ST.hasMAIInsts())
6216 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6220 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6221 const int DataIdx = AMDGPU::getNamedOperandIdx(
6222 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6223 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6224 MI.getOperand(DataIdx).isReg() &&
6225 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6227 if ((
int)
OpIdx == DataIdx) {
6228 if (VDstIdx != -1 &&
6229 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6232 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6233 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6234 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6239 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6240 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6260 constexpr const unsigned NumOps = 3;
6261 constexpr const AMDGPU::OpName OpNames[
NumOps * 2] = {
6262 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6263 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6264 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6269 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6272 MO = &
MI.getOperand(SrcIdx);
6279 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6283 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6287 return !OpSel && !OpSelHi;
6297 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) :
nullptr;
6306 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6307 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6311 if (!LiteralLimit--)
6321 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6329 if (--ConstantBusLimit <= 0)
6341 if (!LiteralLimit--)
6343 if (--ConstantBusLimit <= 0)
6349 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6353 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6355 !
Op.isIdenticalTo(*MO))
6365 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6379 bool Is64BitOp = Is64BitFPOp ||
6386 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6395 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6413 bool IsGFX950Only = ST.hasGFX950Insts();
6414 bool IsGFX940Only = ST.hasGFX940Insts();
6416 if (!IsGFX950Only && !IsGFX940Only)
6434 unsigned Opcode =
MI.getOpcode();
6436 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6437 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6438 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6439 case AMDGPU::V_MQSAD_U32_U8_e64:
6440 case AMDGPU::V_PK_ADD_F16:
6441 case AMDGPU::V_PK_ADD_F32:
6442 case AMDGPU::V_PK_ADD_I16:
6443 case AMDGPU::V_PK_ADD_U16:
6444 case AMDGPU::V_PK_ASHRREV_I16:
6445 case AMDGPU::V_PK_FMA_F16:
6446 case AMDGPU::V_PK_FMA_F32:
6447 case AMDGPU::V_PK_FMAC_F16_e32:
6448 case AMDGPU::V_PK_FMAC_F16_e64:
6449 case AMDGPU::V_PK_LSHLREV_B16:
6450 case AMDGPU::V_PK_LSHRREV_B16:
6451 case AMDGPU::V_PK_MAD_I16:
6452 case AMDGPU::V_PK_MAD_U16:
6453 case AMDGPU::V_PK_MAX_F16:
6454 case AMDGPU::V_PK_MAX_I16:
6455 case AMDGPU::V_PK_MAX_U16:
6456 case AMDGPU::V_PK_MIN_F16:
6457 case AMDGPU::V_PK_MIN_I16:
6458 case AMDGPU::V_PK_MIN_U16:
6459 case AMDGPU::V_PK_MOV_B32:
6460 case AMDGPU::V_PK_MUL_F16:
6461 case AMDGPU::V_PK_MUL_F32:
6462 case AMDGPU::V_PK_MUL_LO_U16:
6463 case AMDGPU::V_PK_SUB_I16:
6464 case AMDGPU::V_PK_SUB_U16:
6465 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6474 unsigned Opc =
MI.getOpcode();
6477 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6480 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6486 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6493 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6496 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6502 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6512 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6513 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6514 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6526 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6528 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6540 if (HasImplicitSGPR || !
MI.isCommutable()) {
6557 if (CommutedOpc == -1) {
6562 MI.setDesc(
get(CommutedOpc));
6566 bool Src0Kill = Src0.
isKill();
6570 else if (Src1.
isReg()) {
6585 unsigned Opc =
MI.getOpcode();
6588 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6589 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6590 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6593 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6594 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6595 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6596 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6597 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6598 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6599 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6603 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6604 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6609 if (VOP3Idx[2] != -1) {
6611 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6612 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6621 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6622 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6624 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6626 SGPRsUsed.
insert(SGPRReg);
6630 for (
int Idx : VOP3Idx) {
6639 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6651 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6658 if (ConstantBusLimit > 0) {
6670 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6671 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6677 for (
unsigned I = 0;
I < 3; ++
I) {
6690 SRC = RI.getCommonSubClass(SRC, DstRC);
6693 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6695 if (RI.hasAGPRs(VRC)) {
6696 VRC = RI.getEquivalentVGPRClass(VRC);
6697 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6699 get(TargetOpcode::COPY), NewSrcReg)
6706 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6712 for (
unsigned i = 0; i < SubRegs; ++i) {
6713 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6715 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6716 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6722 get(AMDGPU::REG_SEQUENCE), DstReg);
6723 for (
unsigned i = 0; i < SubRegs; ++i) {
6725 MIB.
addImm(RI.getSubRegFromChannel(i));
6738 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6740 SBase->setReg(SGPR);
6743 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6751 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6752 if (OldSAddrIdx < 0)
6768 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6769 if (NewVAddrIdx < 0)
6772 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6776 if (OldVAddrIdx >= 0) {
6778 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6790 if (OldVAddrIdx == NewVAddrIdx) {
6793 MRI.removeRegOperandFromUseList(&NewVAddr);
6794 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6798 MRI.removeRegOperandFromUseList(&NewVAddr);
6799 MRI.addRegOperandToUseList(&NewVAddr);
6801 assert(OldSAddrIdx == NewVAddrIdx);
6803 if (OldVAddrIdx >= 0) {
6804 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6805 AMDGPU::OpName::vdst_in);
6809 if (NewVDstIn != -1) {
6810 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6816 if (NewVDstIn != -1) {
6817 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6838 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6858 unsigned OpSubReg =
Op.getSubReg();
6861 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6867 Register DstReg =
MRI.createVirtualRegister(DstRC);
6877 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6880 bool ImpDef = Def->isImplicitDef();
6881 while (!ImpDef && Def && Def->isCopy()) {
6882 if (Def->getOperand(1).getReg().isPhysical())
6884 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6885 ImpDef = Def && Def->isImplicitDef();
6887 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6906 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6912 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6913 unsigned NumSubRegs =
RegSize / 32;
6914 Register VScalarOp = ScalarOp->getReg();
6916 if (NumSubRegs == 1) {
6917 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6919 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6922 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6924 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6930 CondReg = NewCondReg;
6932 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6940 ScalarOp->setReg(CurReg);
6941 ScalarOp->setIsKill();
6945 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6946 "Unhandled register size");
6948 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6950 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6952 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6955 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6956 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6959 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6960 .
addReg(VScalarOp, VScalarOpUndef,
6961 TRI->getSubRegFromChannel(Idx + 1));
6967 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6968 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6974 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6975 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6978 if (NumSubRegs <= 2)
6979 Cmp.addReg(VScalarOp);
6981 Cmp.addReg(VScalarOp, VScalarOpUndef,
6982 TRI->getSubRegFromChannel(Idx, 2));
6986 CondReg = NewCondReg;
6988 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6996 const auto *SScalarOpRC =
6997 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6998 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7002 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7003 unsigned Channel = 0;
7004 for (
Register Piece : ReadlanePieces) {
7005 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7009 ScalarOp->setReg(SScalarOp);
7010 ScalarOp->setIsKill();
7014 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7015 MRI.setSimpleHint(SaveExec, CondReg);
7046 if (!Begin.isValid())
7048 if (!End.isValid()) {
7054 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7062 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7063 std::numeric_limits<unsigned>::max()) !=
7066 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7072 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7081 for (
auto I = Begin;
I != AfterMI;
I++) {
7082 for (
auto &MO :
I->all_uses())
7083 MRI.clearKillFlags(MO.getReg());
7108 MBB.addSuccessor(LoopBB);
7118 for (
auto &Succ : RemainderBB->
successors()) {
7142static std::tuple<unsigned, unsigned>
7150 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7151 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7154 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7155 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7156 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7157 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7158 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7175 .
addImm(AMDGPU::sub0_sub1)
7181 return std::tuple(RsrcPtr, NewSRsrc);
7218 if (
MI.getOpcode() == AMDGPU::PHI) {
7220 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7221 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7224 MRI.getRegClass(
MI.getOperand(i).getReg());
7225 if (RI.hasVectorRegisters(OpRC)) {
7239 VRC = &AMDGPU::VReg_1RegClass;
7242 ? RI.getEquivalentAGPRClass(SRC)
7243 : RI.getEquivalentVGPRClass(SRC);
7246 ? RI.getEquivalentAGPRClass(VRC)
7247 : RI.getEquivalentVGPRClass(VRC);
7255 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7257 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7273 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7276 if (RI.hasVGPRs(DstRC)) {
7280 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7282 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7300 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7305 if (DstRC != Src0RC) {
7314 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7316 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7322 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7323 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7324 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7325 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7326 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7327 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7328 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7330 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7343 ? AMDGPU::OpName::rsrc
7344 : AMDGPU::OpName::srsrc;
7346 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7349 AMDGPU::OpName SampOpName =
7350 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7352 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7359 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7361 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7365 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7366 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7371 while (Start->getOpcode() != FrameSetupOpcode)
7374 while (End->getOpcode() != FrameDestroyOpcode)
7378 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7379 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7387 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7389 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7391 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7401 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7402 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7403 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7404 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7406 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7413 bool isSoffsetLegal =
true;
7415 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7416 if (SoffsetIdx != -1) {
7419 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7420 isSoffsetLegal =
false;
7424 bool isRsrcLegal =
true;
7426 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7427 if (RsrcIdx != -1) {
7430 isRsrcLegal =
false;
7434 if (isRsrcLegal && isSoffsetLegal)
7458 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7459 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7460 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7462 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7463 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7464 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7466 unsigned RsrcPtr, NewSRsrc;
7473 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7480 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7494 }
else if (!VAddr && ST.hasAddr64()) {
7498 "FIXME: Need to emit flat atomics here");
7500 unsigned RsrcPtr, NewSRsrc;
7503 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7526 MIB.
addImm(CPol->getImm());
7531 MIB.
addImm(TFE->getImm());
7551 MI.removeFromParent();
7556 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7558 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7562 if (!isSoffsetLegal) {
7574 if (!isSoffsetLegal) {
7586 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7587 if (RsrcIdx != -1) {
7588 DeferredList.insert(
MI);
7593 return DeferredList.contains(
MI);
7603 if (!ST.useRealTrue16Insts())
7606 unsigned Opcode =
MI.getOpcode();
7610 OpIdx >=
get(Opcode).getNumOperands() ||
7611 get(Opcode).operands()[
OpIdx].RegClass == -1)
7615 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7619 if (!RI.isVGPRClass(CurrRC))
7622 unsigned RCID =
get(Opcode).operands()[
OpIdx].RegClass;
7624 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7625 Op.setSubReg(AMDGPU::lo16);
7626 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7628 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7629 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7636 Op.setReg(NewDstReg);
7648 while (!Worklist.
empty()) {
7662 "Deferred MachineInstr are not supposed to re-populate worklist");
7680 case AMDGPU::S_ADD_I32:
7681 case AMDGPU::S_SUB_I32: {
7685 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7693 case AMDGPU::S_MUL_U64:
7694 if (ST.hasVectorMulU64()) {
7695 NewOpcode = AMDGPU::V_MUL_U64_e64;
7699 splitScalarSMulU64(Worklist, Inst, MDT);
7703 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7704 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7707 splitScalarSMulPseudo(Worklist, Inst, MDT);
7711 case AMDGPU::S_AND_B64:
7712 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7716 case AMDGPU::S_OR_B64:
7717 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7721 case AMDGPU::S_XOR_B64:
7722 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7726 case AMDGPU::S_NAND_B64:
7727 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7731 case AMDGPU::S_NOR_B64:
7732 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7736 case AMDGPU::S_XNOR_B64:
7737 if (ST.hasDLInsts())
7738 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7740 splitScalar64BitXnor(Worklist, Inst, MDT);
7744 case AMDGPU::S_ANDN2_B64:
7745 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7749 case AMDGPU::S_ORN2_B64:
7750 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7754 case AMDGPU::S_BREV_B64:
7755 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7759 case AMDGPU::S_NOT_B64:
7760 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7764 case AMDGPU::S_BCNT1_I32_B64:
7765 splitScalar64BitBCNT(Worklist, Inst);
7769 case AMDGPU::S_BFE_I64:
7770 splitScalar64BitBFE(Worklist, Inst);
7774 case AMDGPU::S_FLBIT_I32_B64:
7775 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7778 case AMDGPU::S_FF1_I32_B64:
7779 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7783 case AMDGPU::S_LSHL_B32:
7784 if (ST.hasOnlyRevVALUShifts()) {
7785 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7789 case AMDGPU::S_ASHR_I32:
7790 if (ST.hasOnlyRevVALUShifts()) {
7791 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7795 case AMDGPU::S_LSHR_B32:
7796 if (ST.hasOnlyRevVALUShifts()) {
7797 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7801 case AMDGPU::S_LSHL_B64:
7802 if (ST.hasOnlyRevVALUShifts()) {
7804 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7805 : AMDGPU::V_LSHLREV_B64_e64;
7809 case AMDGPU::S_ASHR_I64:
7810 if (ST.hasOnlyRevVALUShifts()) {
7811 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7815 case AMDGPU::S_LSHR_B64:
7816 if (ST.hasOnlyRevVALUShifts()) {
7817 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7822 case AMDGPU::S_ABS_I32:
7823 lowerScalarAbs(Worklist, Inst);
7827 case AMDGPU::S_CBRANCH_SCC0:
7828 case AMDGPU::S_CBRANCH_SCC1: {
7831 bool IsSCC = CondReg == AMDGPU::SCC;
7839 case AMDGPU::S_BFE_U64:
7840 case AMDGPU::S_BFM_B64:
7843 case AMDGPU::S_PACK_LL_B32_B16:
7844 case AMDGPU::S_PACK_LH_B32_B16:
7845 case AMDGPU::S_PACK_HL_B32_B16:
7846 case AMDGPU::S_PACK_HH_B32_B16:
7847 movePackToVALU(Worklist,
MRI, Inst);
7851 case AMDGPU::S_XNOR_B32:
7852 lowerScalarXnor(Worklist, Inst);
7856 case AMDGPU::S_NAND_B32:
7857 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7861 case AMDGPU::S_NOR_B32:
7862 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7866 case AMDGPU::S_ANDN2_B32:
7867 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7871 case AMDGPU::S_ORN2_B32:
7872 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7880 case AMDGPU::S_ADD_CO_PSEUDO:
7881 case AMDGPU::S_SUB_CO_PSEUDO: {
7882 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7883 ? AMDGPU::V_ADDC_U32_e64
7884 : AMDGPU::V_SUBB_U32_e64;
7885 const auto *CarryRC = RI.getWaveMaskRegClass();
7888 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7889 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7896 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7907 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7911 case AMDGPU::S_UADDO_PSEUDO:
7912 case AMDGPU::S_USUBO_PSEUDO: {
7919 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7920 ? AMDGPU::V_ADD_CO_U32_e64
7921 : AMDGPU::V_SUB_CO_U32_e64;
7923 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7924 Register DestReg =
MRI.createVirtualRegister(NewRC);
7932 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7939 case AMDGPU::S_CSELECT_B32:
7940 case AMDGPU::S_CSELECT_B64:
7941 lowerSelect(Worklist, Inst, MDT);
7944 case AMDGPU::S_CMP_EQ_I32:
7945 case AMDGPU::S_CMP_LG_I32:
7946 case AMDGPU::S_CMP_GT_I32:
7947 case AMDGPU::S_CMP_GE_I32:
7948 case AMDGPU::S_CMP_LT_I32:
7949 case AMDGPU::S_CMP_LE_I32:
7950 case AMDGPU::S_CMP_EQ_U32:
7951 case AMDGPU::S_CMP_LG_U32:
7952 case AMDGPU::S_CMP_GT_U32:
7953 case AMDGPU::S_CMP_GE_U32:
7954 case AMDGPU::S_CMP_LT_U32:
7955 case AMDGPU::S_CMP_LE_U32:
7956 case AMDGPU::S_CMP_EQ_U64:
7957 case AMDGPU::S_CMP_LG_U64:
7958 case AMDGPU::S_CMP_LT_F32:
7959 case AMDGPU::S_CMP_EQ_F32:
7960 case AMDGPU::S_CMP_LE_F32:
7961 case AMDGPU::S_CMP_GT_F32:
7962 case AMDGPU::S_CMP_LG_F32:
7963 case AMDGPU::S_CMP_GE_F32:
7964 case AMDGPU::S_CMP_O_F32:
7965 case AMDGPU::S_CMP_U_F32:
7966 case AMDGPU::S_CMP_NGE_F32:
7967 case AMDGPU::S_CMP_NLG_F32:
7968 case AMDGPU::S_CMP_NGT_F32:
7969 case AMDGPU::S_CMP_NLE_F32:
7970 case AMDGPU::S_CMP_NEQ_F32:
7971 case AMDGPU::S_CMP_NLT_F32: {
7972 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7976 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7990 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7994 case AMDGPU::S_CMP_LT_F16:
7995 case AMDGPU::S_CMP_EQ_F16:
7996 case AMDGPU::S_CMP_LE_F16:
7997 case AMDGPU::S_CMP_GT_F16:
7998 case AMDGPU::S_CMP_LG_F16:
7999 case AMDGPU::S_CMP_GE_F16:
8000 case AMDGPU::S_CMP_O_F16:
8001 case AMDGPU::S_CMP_U_F16:
8002 case AMDGPU::S_CMP_NGE_F16:
8003 case AMDGPU::S_CMP_NLG_F16:
8004 case AMDGPU::S_CMP_NGT_F16:
8005 case AMDGPU::S_CMP_NLE_F16:
8006 case AMDGPU::S_CMP_NEQ_F16:
8007 case AMDGPU::S_CMP_NLT_F16: {
8008 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8030 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8034 case AMDGPU::S_CVT_HI_F32_F16: {
8036 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8037 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8038 if (ST.useRealTrue16Insts()) {
8043 .
addReg(TmpReg, 0, AMDGPU::hi16)
8059 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8063 case AMDGPU::S_MINIMUM_F32:
8064 case AMDGPU::S_MAXIMUM_F32: {
8066 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8077 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8081 case AMDGPU::S_MINIMUM_F16:
8082 case AMDGPU::S_MAXIMUM_F16: {
8084 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8085 ? &AMDGPU::VGPR_16RegClass
8086 : &AMDGPU::VGPR_32RegClass);
8098 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8102 case AMDGPU::V_S_EXP_F16_e64:
8103 case AMDGPU::V_S_LOG_F16_e64:
8104 case AMDGPU::V_S_RCP_F16_e64:
8105 case AMDGPU::V_S_RSQ_F16_e64:
8106 case AMDGPU::V_S_SQRT_F16_e64: {
8108 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8109 ? &AMDGPU::VGPR_16RegClass
8110 : &AMDGPU::VGPR_32RegClass);
8122 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8128 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8136 if (NewOpcode == Opcode) {
8144 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8146 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8164 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8166 MRI.replaceRegWith(DstReg, NewDstReg);
8167 MRI.clearKillFlags(NewDstReg);
8181 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8185 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8186 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8187 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8189 get(AMDGPU::IMPLICIT_DEF), Undef);
8191 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8197 MRI.replaceRegWith(DstReg, NewDstReg);
8198 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8200 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8203 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8204 MRI.replaceRegWith(DstReg, NewDstReg);
8205 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8210 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8211 MRI.replaceRegWith(DstReg, NewDstReg);
8213 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8223 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8224 AMDGPU::OpName::src0_modifiers) >= 0)
8228 NewInstr->addOperand(Src);
8231 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8234 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8236 NewInstr.addImm(
Size);
8237 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8241 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8246 "Scalar BFE is only implemented for constant width and offset");
8254 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8255 AMDGPU::OpName::src1_modifiers) >= 0)
8257 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8259 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8260 AMDGPU::OpName::src2_modifiers) >= 0)
8262 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8264 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8266 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8268 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8274 NewInstr->addOperand(
Op);
8281 if (
Op.getReg() == AMDGPU::SCC) {
8283 if (
Op.isDef() && !
Op.isDead())
8284 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8286 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8291 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8292 Register DstReg = NewInstr->getOperand(0).getReg();
8297 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8298 MRI.replaceRegWith(DstReg, NewDstReg);
8307 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8311std::pair<bool, MachineBasicBlock *>
8323 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8326 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8328 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8329 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8337 MRI.replaceRegWith(OldDstReg, ResultReg);
8340 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8341 return std::pair(
true, NewBB);
8344 return std::pair(
false,
nullptr);
8361 bool IsSCC = (CondReg == AMDGPU::SCC);
8369 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8375 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8376 NewCondReg =
MRI.createVirtualRegister(TC);
8380 bool CopyFound =
false;
8381 for (MachineInstr &CandI :
8384 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8386 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8388 .
addReg(CandI.getOperand(1).getReg());
8400 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8408 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8409 MachineInstr *NewInst;
8410 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8411 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8424 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8426 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8438 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8439 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8441 unsigned SubOp = ST.hasAddNoCarry() ?
8442 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8452 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8453 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8467 if (ST.hasDLInsts()) {
8468 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8476 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8477 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8483 bool Src0IsSGPR = Src0.
isReg() &&
8484 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8485 bool Src1IsSGPR = Src1.
isReg() &&
8486 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8488 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8489 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8499 }
else if (Src1IsSGPR) {
8513 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8517 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8523 unsigned Opcode)
const {
8533 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8534 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8546 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8547 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8552 unsigned Opcode)
const {
8562 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8563 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8575 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8576 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8591 const MCInstrDesc &InstDesc =
get(Opcode);
8592 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8594 &AMDGPU::SGPR_32RegClass;
8596 const TargetRegisterClass *Src0SubRC =
8597 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8600 AMDGPU::sub0, Src0SubRC);
8602 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8603 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8604 const TargetRegisterClass *NewDestSubRC =
8605 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8607 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8608 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8611 AMDGPU::sub1, Src0SubRC);
8613 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8614 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8619 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8626 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8628 Worklist.
insert(&LoHalf);
8629 Worklist.
insert(&HiHalf);
8635 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8646 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8647 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8648 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8656 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8657 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8658 const TargetRegisterClass *Src0SubRC =
8659 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8660 if (RI.isSGPRClass(Src0SubRC))
8661 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8662 const TargetRegisterClass *Src1SubRC =
8663 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8664 if (RI.isSGPRClass(Src1SubRC))
8665 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8669 MachineOperand Op0L =
8671 MachineOperand Op1L =
8673 MachineOperand Op0H =
8675 MachineOperand Op1H =
8693 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8694 MachineInstr *Op1L_Op0H =
8699 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8700 MachineInstr *Op1H_Op0L =
8705 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8706 MachineInstr *Carry =
8711 MachineInstr *LoHalf =
8716 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8721 MachineInstr *HiHalf =
8732 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8744 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8755 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8756 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8757 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8765 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8766 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8767 const TargetRegisterClass *Src0SubRC =
8768 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8769 if (RI.isSGPRClass(Src0SubRC))
8770 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8771 const TargetRegisterClass *Src1SubRC =
8772 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8773 if (RI.isSGPRClass(Src1SubRC))
8774 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8778 MachineOperand Op0L =
8780 MachineOperand Op1L =
8784 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8785 ? AMDGPU::V_MUL_HI_U32_e64
8786 : AMDGPU::V_MUL_HI_I32_e64;
8787 MachineInstr *HiHalf =
8790 MachineInstr *LoHalf =
8801 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8809 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8825 const MCInstrDesc &InstDesc =
get(Opcode);
8826 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8828 &AMDGPU::SGPR_32RegClass;
8830 const TargetRegisterClass *Src0SubRC =
8831 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8832 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8834 &AMDGPU::SGPR_32RegClass;
8836 const TargetRegisterClass *Src1SubRC =
8837 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8840 AMDGPU::sub0, Src0SubRC);
8842 AMDGPU::sub0, Src1SubRC);
8844 AMDGPU::sub1, Src0SubRC);
8846 AMDGPU::sub1, Src1SubRC);
8848 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8849 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8850 const TargetRegisterClass *NewDestSubRC =
8851 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8853 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8854 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8858 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8859 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8863 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8870 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8872 Worklist.
insert(&LoHalf);
8873 Worklist.
insert(&HiHalf);
8876 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8892 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8894 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8896 MachineOperand* Op0;
8897 MachineOperand* Op1;
8910 Register NewDest =
MRI.createVirtualRegister(DestRC);
8916 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8932 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
8933 const TargetRegisterClass *SrcRC = Src.isReg() ?
8934 MRI.getRegClass(Src.getReg()) :
8935 &AMDGPU::SGPR_32RegClass;
8937 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8938 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8940 const TargetRegisterClass *SrcSubRC =
8941 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8944 AMDGPU::sub0, SrcSubRC);
8946 AMDGPU::sub1, SrcSubRC);
8952 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8956 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8975 Offset == 0 &&
"Not implemented");
8978 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8979 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8980 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8997 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8998 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9003 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9004 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9008 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
9011 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
9016 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9017 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9036 const MCInstrDesc &InstDesc =
get(Opcode);
9038 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9039 unsigned OpcodeAdd =
9040 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9042 const TargetRegisterClass *SrcRC =
9043 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9044 const TargetRegisterClass *SrcSubRC =
9045 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9047 MachineOperand SrcRegSub0 =
9049 MachineOperand SrcRegSub1 =
9052 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9053 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9054 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9055 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9062 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9068 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9070 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9072 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9075void SIInstrInfo::addUsersToMoveToVALUWorklist(
9079 MachineInstr &
UseMI = *MO.getParent();
9083 switch (
UseMI.getOpcode()) {
9086 case AMDGPU::SOFT_WQM:
9087 case AMDGPU::STRICT_WWM:
9088 case AMDGPU::STRICT_WQM:
9089 case AMDGPU::REG_SEQUENCE:
9091 case AMDGPU::INSERT_SUBREG:
9094 OpNo = MO.getOperandNo();
9099 MRI.constrainRegClass(DstReg, OpRC);
9101 if (!RI.hasVectorRegisters(OpRC))
9112 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9119 case AMDGPU::S_PACK_LL_B32_B16: {
9120 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9121 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9138 case AMDGPU::S_PACK_LH_B32_B16: {
9139 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9148 case AMDGPU::S_PACK_HL_B32_B16: {
9149 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9159 case AMDGPU::S_PACK_HH_B32_B16: {
9160 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9161 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9178 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9179 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9188 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9189 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9190 SmallVector<MachineInstr *, 4> CopyToDelete;
9193 for (MachineInstr &
MI :
9197 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9200 MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9201 Register DestReg =
MI.getOperand(0).getReg();
9203 MRI.replaceRegWith(DestReg, NewCond);
9208 MI.getOperand(SCCIdx).setReg(NewCond);
9214 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9217 for (
auto &Copy : CopyToDelete)
9218 Copy->eraseFromParent();
9226void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9232 for (MachineInstr &
MI :
9235 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9237 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9246 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9254 case AMDGPU::REG_SEQUENCE:
9255 case AMDGPU::INSERT_SUBREG:
9257 case AMDGPU::SOFT_WQM:
9258 case AMDGPU::STRICT_WWM:
9259 case AMDGPU::STRICT_WQM: {
9261 if (RI.isAGPRClass(SrcRC)) {
9262 if (RI.isAGPRClass(NewDstRC))
9267 case AMDGPU::REG_SEQUENCE:
9268 case AMDGPU::INSERT_SUBREG:
9269 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9272 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9278 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9281 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9295 int OpIndices[3])
const {
9296 const MCInstrDesc &
Desc =
MI.getDesc();
9312 const MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9314 for (
unsigned i = 0; i < 3; ++i) {
9315 int Idx = OpIndices[i];
9319 const MachineOperand &MO =
MI.getOperand(Idx);
9325 const TargetRegisterClass *OpRC =
9326 RI.getRegClass(
Desc.operands()[Idx].RegClass);
9327 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9333 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9334 if (RI.isSGPRClass(RegRC))
9352 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9353 SGPRReg = UsedSGPRs[0];
9356 if (!SGPRReg && UsedSGPRs[1]) {
9357 if (UsedSGPRs[1] == UsedSGPRs[2])
9358 SGPRReg = UsedSGPRs[1];
9365 AMDGPU::OpName OperandName)
const {
9366 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9369 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9373 return &
MI.getOperand(Idx);
9387 if (ST.isAmdHsaOS()) {
9390 RsrcDataFormat |= (1ULL << 56);
9395 RsrcDataFormat |= (2ULL << 59);
9398 return RsrcDataFormat;
9408 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9413 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9420 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9426 unsigned Opc =
MI.getOpcode();
9432 return get(
Opc).mayLoad() &&
9437 int &FrameIndex)
const {
9439 if (!Addr || !Addr->
isFI())
9450 int &FrameIndex)
const {
9458 int &FrameIndex)
const {
9472 int &FrameIndex)
const {
9489 while (++
I != E &&
I->isInsideBundle()) {
9490 assert(!
I->isBundle() &&
"No nested bundle!");
9498 unsigned Opc =
MI.getOpcode();
9500 unsigned DescSize =
Desc.getSize();
9505 unsigned Size = DescSize;
9509 if (
MI.isBranch() && ST.hasOffset3fBug())
9520 bool HasLiteral =
false;
9521 unsigned LiteralSize = 4;
9522 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9527 if (ST.has64BitLiterals()) {
9528 switch (OpInfo.OperandType) {
9544 return HasLiteral ? DescSize + LiteralSize : DescSize;
9549 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9553 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9554 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9558 case TargetOpcode::BUNDLE:
9560 case TargetOpcode::INLINEASM:
9561 case TargetOpcode::INLINEASM_BR: {
9563 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9567 if (
MI.isMetaInstruction())
9571 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9574 unsigned LoInstOpcode = D16Info->LoOp;
9576 DescSize =
Desc.getSize();
9580 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9583 DescSize =
Desc.getSize();
9594 if (
MI.memoperands_empty())
9606 static const std::pair<int, const char *> TargetIndices[] = {
9644std::pair<unsigned, unsigned>
9651 static const std::pair<unsigned, const char *> TargetFlags[] = {
9669 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9684 return AMDGPU::WWM_COPY;
9686 return AMDGPU::COPY;
9698 bool IsNullOrVectorRegister =
true;
9701 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9706 return IsNullOrVectorRegister &&
9708 (Opcode == AMDGPU::IMPLICIT_DEF &&
9710 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9711 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9719 if (ST.hasAddNoCarry())
9723 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9724 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9735 if (ST.hasAddNoCarry())
9742 *RI.getBoolRC(),
I,
false,
9755 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9756 case AMDGPU::SI_KILL_I1_TERMINATOR:
9765 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9766 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9767 case AMDGPU::SI_KILL_I1_PSEUDO:
9768 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9780 const unsigned OffsetBits =
9782 return (1 << OffsetBits) - 1;
9789 if (
MI.isInlineAsm())
9792 for (
auto &
Op :
MI.implicit_operands()) {
9793 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9794 Op.setReg(AMDGPU::VCC_LO);
9803 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9807 const auto RCID =
MI.getDesc().operands()[Idx].RegClass;
9808 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9825 if (Imm <= MaxImm + 64) {
9827 Overflow = Imm - MaxImm;
9854 if (ST.hasRestrictedSOffset())
9897 if (!ST.hasFlatInstOffsets())
9905 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9917std::pair<int64_t, int64_t>
9920 int64_t RemainderOffset = COffsetVal;
9921 int64_t ImmField = 0;
9926 if (AllowNegative) {
9928 int64_t
D = 1LL << NumBits;
9929 RemainderOffset = (COffsetVal /
D) *
D;
9930 ImmField = COffsetVal - RemainderOffset;
9932 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9934 (ImmField % 4) != 0) {
9936 RemainderOffset += ImmField % 4;
9937 ImmField -= ImmField % 4;
9939 }
else if (COffsetVal >= 0) {
9941 RemainderOffset = COffsetVal - ImmField;
9945 assert(RemainderOffset + ImmField == COffsetVal);
9946 return {ImmField, RemainderOffset};
9950 if (ST.hasNegativeScratchOffsetBug() &&
9958 switch (ST.getGeneration()) {
9984 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9985 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9986 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9987 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9988 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9989 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9990 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9991 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9998#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9999 case OPCODE##_dpp: \
10000 case OPCODE##_e32: \
10001 case OPCODE##_e64: \
10002 case OPCODE##_e64_dpp: \
10003 case OPCODE##_sdwa:
10017 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10018 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10019 case AMDGPU::V_FMA_F16_gfx9_e64:
10020 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10021 case AMDGPU::V_INTERP_P2_F16:
10022 case AMDGPU::V_MAD_F16_e64:
10023 case AMDGPU::V_MAD_U16_e64:
10024 case AMDGPU::V_MAD_I16_e64:
10046 switch (ST.getGeneration()) {
10059 if (
isMAI(Opcode)) {
10067 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10074 if (ST.hasGFX90AInsts()) {
10076 if (ST.hasGFX940Insts())
10107 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10108 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10109 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10121 switch (
MI.getOpcode()) {
10123 case AMDGPU::REG_SEQUENCE:
10127 case AMDGPU::INSERT_SUBREG:
10128 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10145 if (!
P.Reg.isVirtual())
10149 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10150 while (
auto *
MI = DefInst) {
10152 switch (
MI->getOpcode()) {
10154 case AMDGPU::V_MOV_B32_e32: {
10155 auto &Op1 =
MI->getOperand(1);
10160 DefInst =
MRI.getVRegDef(RSR.Reg);
10168 DefInst =
MRI.getVRegDef(RSR.Reg);
10181 assert(
MRI.isSSA() &&
"Must be run on SSA");
10183 auto *
TRI =
MRI.getTargetRegisterInfo();
10184 auto *DefBB =
DefMI.getParent();
10188 if (
UseMI.getParent() != DefBB)
10191 const int MaxInstScan = 20;
10195 auto E =
UseMI.getIterator();
10196 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10197 if (
I->isDebugInstr())
10200 if (++NumInst > MaxInstScan)
10203 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10213 assert(
MRI.isSSA() &&
"Must be run on SSA");
10215 auto *
TRI =
MRI.getTargetRegisterInfo();
10216 auto *DefBB =
DefMI.getParent();
10218 const int MaxUseScan = 10;
10221 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10222 auto &UseInst = *
Use.getParent();
10225 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10228 if (++NumUse > MaxUseScan)
10235 const int MaxInstScan = 20;
10239 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10242 if (
I->isDebugInstr())
10245 if (++NumInst > MaxInstScan)
10258 if (Reg == VReg && --NumUse == 0)
10260 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10269 auto Cur =
MBB.begin();
10270 if (Cur !=
MBB.end())
10272 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10275 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10284 if (InsPt !=
MBB.end() &&
10285 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10286 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10287 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10288 InsPt->definesRegister(Src,
nullptr)) {
10292 .
addReg(Src, 0, SrcSubReg)
10317 if (isFullCopyInstr(
MI)) {
10318 Register DstReg =
MI.getOperand(0).getReg();
10319 Register SrcReg =
MI.getOperand(1).getReg();
10326 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10330 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10341 unsigned *PredCost)
const {
10342 if (
MI.isBundle()) {
10345 unsigned Lat = 0,
Count = 0;
10346 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10348 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10350 return Lat +
Count - 1;
10353 return SchedModel.computeInstrLatency(&
MI);
10359 unsigned Opcode =
MI.getOpcode();
10364 :
MI.getOperand(1).getReg();
10365 LLT DstTy =
MRI.getType(Dst);
10366 LLT SrcTy =
MRI.getType(Src);
10368 unsigned SrcAS = SrcTy.getAddressSpace();
10371 ST.hasGloballyAddressableScratch()
10379 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10380 return HandleAddrSpaceCast(
MI);
10383 auto IID = GI->getIntrinsicID();
10390 case Intrinsic::amdgcn_addrspacecast_nonnull:
10391 return HandleAddrSpaceCast(
MI);
10392 case Intrinsic::amdgcn_if:
10393 case Intrinsic::amdgcn_else:
10407 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10408 Opcode == AMDGPU::G_SEXTLOAD) {
10409 if (
MI.memoperands_empty())
10413 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10414 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10422 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10423 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10424 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10437 unsigned opcode =
MI.getOpcode();
10438 if (opcode == AMDGPU::V_READLANE_B32 ||
10439 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10440 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10443 if (isCopyInstr(
MI)) {
10447 RI.getPhysRegBaseClass(srcOp.
getReg());
10455 if (
MI.isPreISelOpcode())
10470 if (
MI.memoperands_empty())
10474 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10475 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10490 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10492 if (!
SrcOp.isReg())
10496 if (!Reg || !
SrcOp.readsReg())
10502 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10529 F,
"ds_ordered_count unsupported for this calling conv"));
10543 Register &SrcReg2, int64_t &CmpMask,
10544 int64_t &CmpValue)
const {
10545 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10548 switch (
MI.getOpcode()) {
10551 case AMDGPU::S_CMP_EQ_U32:
10552 case AMDGPU::S_CMP_EQ_I32:
10553 case AMDGPU::S_CMP_LG_U32:
10554 case AMDGPU::S_CMP_LG_I32:
10555 case AMDGPU::S_CMP_LT_U32:
10556 case AMDGPU::S_CMP_LT_I32:
10557 case AMDGPU::S_CMP_GT_U32:
10558 case AMDGPU::S_CMP_GT_I32:
10559 case AMDGPU::S_CMP_LE_U32:
10560 case AMDGPU::S_CMP_LE_I32:
10561 case AMDGPU::S_CMP_GE_U32:
10562 case AMDGPU::S_CMP_GE_I32:
10563 case AMDGPU::S_CMP_EQ_U64:
10564 case AMDGPU::S_CMP_LG_U64:
10565 SrcReg =
MI.getOperand(0).getReg();
10566 if (
MI.getOperand(1).isReg()) {
10567 if (
MI.getOperand(1).getSubReg())
10569 SrcReg2 =
MI.getOperand(1).getReg();
10571 }
else if (
MI.getOperand(1).isImm()) {
10573 CmpValue =
MI.getOperand(1).getImm();
10579 case AMDGPU::S_CMPK_EQ_U32:
10580 case AMDGPU::S_CMPK_EQ_I32:
10581 case AMDGPU::S_CMPK_LG_U32:
10582 case AMDGPU::S_CMPK_LG_I32:
10583 case AMDGPU::S_CMPK_LT_U32:
10584 case AMDGPU::S_CMPK_LT_I32:
10585 case AMDGPU::S_CMPK_GT_U32:
10586 case AMDGPU::S_CMPK_GT_I32:
10587 case AMDGPU::S_CMPK_LE_U32:
10588 case AMDGPU::S_CMPK_LE_I32:
10589 case AMDGPU::S_CMPK_GE_U32:
10590 case AMDGPU::S_CMPK_GE_I32:
10591 SrcReg =
MI.getOperand(0).getReg();
10593 CmpValue =
MI.getOperand(1).getImm();
10602 Register SrcReg2, int64_t CmpMask,
10611 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10612 this](int64_t ExpectedValue,
unsigned SrcSize,
10613 bool IsReversible,
bool IsSigned) ->
bool {
10638 if (!Def || Def->getParent() != CmpInstr.
getParent())
10641 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10642 Def->getOpcode() != AMDGPU::S_AND_B64)
10646 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10657 SrcOp = &Def->getOperand(2);
10658 else if (isMask(&Def->getOperand(2)))
10659 SrcOp = &Def->getOperand(1);
10667 if (IsSigned && BitNo == SrcSize - 1)
10670 ExpectedValue <<= BitNo;
10672 bool IsReversedCC =
false;
10673 if (CmpValue != ExpectedValue) {
10676 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10681 Register DefReg = Def->getOperand(0).getReg();
10682 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10685 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10687 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10688 I->killsRegister(AMDGPU::SCC, &RI))
10693 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10697 if (!
MRI->use_nodbg_empty(DefReg)) {
10705 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10706 : AMDGPU::S_BITCMP1_B32
10707 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10708 : AMDGPU::S_BITCMP1_B64;
10713 Def->eraseFromParent();
10721 case AMDGPU::S_CMP_EQ_U32:
10722 case AMDGPU::S_CMP_EQ_I32:
10723 case AMDGPU::S_CMPK_EQ_U32:
10724 case AMDGPU::S_CMPK_EQ_I32:
10725 return optimizeCmpAnd(1, 32,
true,
false);
10726 case AMDGPU::S_CMP_GE_U32:
10727 case AMDGPU::S_CMPK_GE_U32:
10728 return optimizeCmpAnd(1, 32,
false,
false);
10729 case AMDGPU::S_CMP_GE_I32:
10730 case AMDGPU::S_CMPK_GE_I32:
10731 return optimizeCmpAnd(1, 32,
false,
true);
10732 case AMDGPU::S_CMP_EQ_U64:
10733 return optimizeCmpAnd(1, 64,
true,
false);
10734 case AMDGPU::S_CMP_LG_U32:
10735 case AMDGPU::S_CMP_LG_I32:
10736 case AMDGPU::S_CMPK_LG_U32:
10737 case AMDGPU::S_CMPK_LG_I32:
10738 return optimizeCmpAnd(0, 32,
true,
false);
10739 case AMDGPU::S_CMP_GT_U32:
10740 case AMDGPU::S_CMPK_GT_U32:
10741 return optimizeCmpAnd(0, 32,
false,
false);
10742 case AMDGPU::S_CMP_GT_I32:
10743 case AMDGPU::S_CMPK_GT_I32:
10744 return optimizeCmpAnd(0, 32,
false,
true);
10745 case AMDGPU::S_CMP_LG_U64:
10746 return optimizeCmpAnd(0, 64,
true,
false);
10753 AMDGPU::OpName
OpName)
const {
10754 if (!ST.needsAlignedVGPRs())
10757 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10769 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10771 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10774 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10775 : &AMDGPU::VReg_64_Align2RegClass);
10777 .
addReg(DataReg, 0,
Op.getSubReg())
10782 Op.setSubReg(AMDGPU::sub0);
10804 unsigned Opcode =
MI.getOpcode();
10810 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10811 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
10814 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.