32#include "llvm/IR/IntrinsicsAMDGPU.h"
39#define DEBUG_TYPE "si-instr-info"
41#define GET_INSTRINFO_CTOR_DTOR
42#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
56 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
59 "amdgpu-fix-16-bit-physreg-copies",
60 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
75 unsigned N =
Node->getNumOperands();
76 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
88 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
89 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
91 if (Op0Idx == -1 && Op1Idx == -1)
95 if ((Op0Idx == -1 && Op1Idx != -1) ||
96 (Op1Idx == -1 && Op0Idx != -1))
117 return !
MI.memoperands_empty() &&
119 return MMO->isLoad() && MMO->isInvariant();
141 if (!
MI.hasImplicitDef() &&
142 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
143 !
MI.mayRaiseFPException())
151bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
154 if (
MI.isCompare()) {
165 switch (
Use.getOpcode()) {
166 case AMDGPU::S_AND_SAVEEXEC_B32:
167 case AMDGPU::S_AND_SAVEEXEC_B64:
169 case AMDGPU::S_AND_B32:
170 case AMDGPU::S_AND_B64:
171 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
181 switch (
MI.getOpcode()) {
184 case AMDGPU::V_READFIRSTLANE_B32:
201 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
206 for (
auto Op :
MI.uses()) {
207 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
208 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
213 if (FromCycle ==
nullptr)
219 while (FromCycle && !FromCycle->
contains(ToCycle)) {
239 int64_t &Offset1)
const {
247 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
251 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
267 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
268 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
269 if (Offset0Idx == -1 || Offset1Idx == -1)
276 Offset0Idx -=
get(Opc0).NumDefs;
277 Offset1Idx -=
get(Opc1).NumDefs;
307 if (!Load0Offset || !Load1Offset)
324 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
325 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
327 if (OffIdx0 == -1 || OffIdx1 == -1)
333 OffIdx0 -=
get(Opc0).NumDefs;
334 OffIdx1 -=
get(Opc1).NumDefs;
353 case AMDGPU::DS_READ2ST64_B32:
354 case AMDGPU::DS_READ2ST64_B64:
355 case AMDGPU::DS_WRITE2ST64_B32:
356 case AMDGPU::DS_WRITE2ST64_B64:
371 OffsetIsScalable =
false;
388 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
391 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
404 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
405 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
406 if (Offset0 + 1 != Offset1)
417 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
425 Offset = EltSize * Offset0;
427 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
428 if (DataOpIdx == -1) {
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
447 if (BaseOp && !BaseOp->
isFI())
455 if (SOffset->
isReg())
461 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
472 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
473 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
475 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
476 if (VAddr0Idx >= 0) {
478 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
485 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
500 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
517 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
536 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
544 if (MO1->getAddrSpace() != MO2->getAddrSpace())
547 const auto *Base1 = MO1->getValue();
548 const auto *Base2 = MO2->getValue();
549 if (!Base1 || !Base2)
557 return Base1 == Base2;
561 int64_t Offset1,
bool OffsetIsScalable1,
563 int64_t Offset2,
bool OffsetIsScalable2,
564 unsigned ClusterSize,
565 unsigned NumBytes)
const {
578 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
597 const unsigned LoadSize = NumBytes / ClusterSize;
598 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
599 return NumDWords <= MaxMemoryClusterDWords;
613 int64_t Offset0, int64_t Offset1,
614 unsigned NumLoads)
const {
615 assert(Offset1 > Offset0 &&
616 "Second offset should be larger than first offset!");
621 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
628 const char *Msg =
"illegal VGPR to SGPR copy") {
649 assert((
TII.getSubtarget().hasMAIInsts() &&
650 !
TII.getSubtarget().hasGFX90AInsts()) &&
651 "Expected GFX908 subtarget.");
654 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
655 "Source register of the copy should be either an SGPR or an AGPR.");
658 "Destination register of the copy should be an AGPR.");
667 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
670 if (!Def->modifiesRegister(SrcReg, &RI))
673 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
674 Def->getOperand(0).getReg() != SrcReg)
681 bool SafeToPropagate =
true;
684 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
685 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
686 SafeToPropagate =
false;
688 if (!SafeToPropagate)
691 for (
auto I = Def;
I !=
MI; ++
I)
692 I->clearRegisterKills(DefOp.
getReg(), &RI);
701 if (ImpUseSuperReg) {
702 Builder.addReg(ImpUseSuperReg,
720 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
723 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
724 "VGPR used for an intermediate copy should have been reserved.");
739 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
740 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
741 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
748 if (ImpUseSuperReg) {
749 UseBuilder.
addReg(ImpUseSuperReg,
770 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
771 int16_t SubIdx = BaseIndices[Idx];
772 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
773 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
774 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
775 unsigned Opcode = AMDGPU::S_MOV_B32;
778 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
779 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
780 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
784 DestSubReg = RI.getSubReg(DestReg, SubIdx);
785 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
786 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
787 Opcode = AMDGPU::S_MOV_B64;
802 assert(FirstMI && LastMI);
810 LastMI->addRegisterKilled(SrcReg, &RI);
816 Register SrcReg,
bool KillSrc,
bool RenamableDest,
817 bool RenamableSrc)
const {
819 unsigned Size = RI.getRegSizeInBits(*RC);
821 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
827 if (((
Size == 16) != (SrcSize == 16))) {
829 assert(ST.useRealTrue16Insts());
834 if (DestReg == SrcReg) {
840 RC = RI.getPhysRegBaseClass(DestReg);
841 Size = RI.getRegSizeInBits(*RC);
842 SrcRC = RI.getPhysRegBaseClass(SrcReg);
843 SrcSize = RI.getRegSizeInBits(*SrcRC);
847 if (RC == &AMDGPU::VGPR_32RegClass) {
849 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
850 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
851 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
852 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
858 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
859 RC == &AMDGPU::SReg_32RegClass) {
860 if (SrcReg == AMDGPU::SCC) {
867 if (DestReg == AMDGPU::VCC_LO) {
868 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
882 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
892 if (RC == &AMDGPU::SReg_64RegClass) {
893 if (SrcReg == AMDGPU::SCC) {
900 if (DestReg == AMDGPU::VCC) {
901 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
915 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
925 if (DestReg == AMDGPU::SCC) {
928 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
932 assert(ST.hasScalarCompareEq64());
946 if (RC == &AMDGPU::AGPR_32RegClass) {
947 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
948 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
954 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
963 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
970 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
971 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
973 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
974 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
975 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
976 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
979 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
980 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
993 if (IsAGPRDst || IsAGPRSrc) {
994 if (!DstLow || !SrcLow) {
996 "Cannot use hi16 subreg with an AGPR!");
1003 if (ST.useRealTrue16Insts()) {
1009 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1010 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1022 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1023 if (!DstLow || !SrcLow) {
1025 "Cannot use hi16 subreg on VI!");
1048 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1049 if (ST.hasMovB64()) {
1054 if (ST.hasPkMovB32()) {
1070 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1071 if (RI.isSGPRClass(RC)) {
1072 if (!RI.isSGPRClass(SrcRC)) {
1076 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1082 unsigned EltSize = 4;
1083 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1084 if (RI.isAGPRClass(RC)) {
1085 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1086 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1087 else if (RI.hasVGPRs(SrcRC) ||
1088 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1089 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1091 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1092 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1093 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1094 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1095 (RI.isProperlyAlignedRC(*RC) &&
1096 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1098 if (ST.hasMovB64()) {
1099 Opcode = AMDGPU::V_MOV_B64_e32;
1101 }
else if (ST.hasPkMovB32()) {
1102 Opcode = AMDGPU::V_PK_MOV_B32;
1112 std::unique_ptr<RegScavenger> RS;
1113 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1114 RS = std::make_unique<RegScavenger>();
1120 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1121 const bool CanKillSuperReg = KillSrc && !Overlap;
1123 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1126 SubIdx = SubIndices[Idx];
1128 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1129 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1130 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1131 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1133 bool IsFirstSubreg = Idx == 0;
1134 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1136 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1140 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1141 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1187 return &AMDGPU::VGPR_32RegClass;
1198 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1199 "Not a VGPR32 reg");
1201 if (
Cond.size() == 1) {
1202 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1211 }
else if (
Cond.size() == 2) {
1212 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1214 case SIInstrInfo::SCC_TRUE: {
1215 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1217 : AMDGPU::S_CSELECT_B64), SReg)
1228 case SIInstrInfo::SCC_FALSE: {
1229 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1231 : AMDGPU::S_CSELECT_B64), SReg)
1242 case SIInstrInfo::VCCNZ: {
1245 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1256 case SIInstrInfo::VCCZ: {
1259 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 case SIInstrInfo::EXECNZ: {
1271 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1272 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1274 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1277 : AMDGPU::S_CSELECT_B64), SReg)
1288 case SIInstrInfo::EXECZ: {
1289 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1290 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1292 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1295 : AMDGPU::S_CSELECT_B64), SReg)
1320 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1333 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1343 int64_t &ImmVal)
const {
1344 switch (
MI.getOpcode()) {
1345 case AMDGPU::V_MOV_B32_e32:
1346 case AMDGPU::S_MOV_B32:
1347 case AMDGPU::S_MOVK_I32:
1348 case AMDGPU::S_MOV_B64:
1349 case AMDGPU::V_MOV_B64_e32:
1350 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1351 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1352 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1353 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1354 case AMDGPU::V_MOV_B64_PSEUDO: {
1358 return MI.getOperand(0).getReg() == Reg;
1363 case AMDGPU::S_BREV_B32:
1364 case AMDGPU::V_BFREV_B32_e32:
1365 case AMDGPU::V_BFREV_B32_e64: {
1369 return MI.getOperand(0).getReg() == Reg;
1374 case AMDGPU::S_NOT_B32:
1375 case AMDGPU::V_NOT_B32_e32:
1376 case AMDGPU::V_NOT_B32_e64: {
1379 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1380 return MI.getOperand(0).getReg() == Reg;
1392 if (RI.isAGPRClass(DstRC))
1393 return AMDGPU::COPY;
1394 if (RI.getRegSizeInBits(*DstRC) == 16) {
1397 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1399 if (RI.getRegSizeInBits(*DstRC) == 32)
1400 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1401 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1402 return AMDGPU::S_MOV_B64;
1403 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1404 return AMDGPU::V_MOV_B64_PSEUDO;
1405 return AMDGPU::COPY;
1410 bool IsIndirectSrc)
const {
1411 if (IsIndirectSrc) {
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1423 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1425 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1427 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1429 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1431 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1433 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1434 if (VecSize <= 1024)
1435 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1451 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1453 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1455 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1457 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1459 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1461 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1462 if (VecSize <= 1024)
1463 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1480 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1482 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1484 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1486 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1488 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1490 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1491 if (VecSize <= 1024)
1492 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1509 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1511 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1513 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1515 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1520 if (VecSize <= 1024)
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1528 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1530 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1532 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1534 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1535 if (VecSize <= 1024)
1536 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1543 bool IsSGPR)
const {
1555 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1562 return AMDGPU::SI_SPILL_S32_SAVE;
1564 return AMDGPU::SI_SPILL_S64_SAVE;
1566 return AMDGPU::SI_SPILL_S96_SAVE;
1568 return AMDGPU::SI_SPILL_S128_SAVE;
1570 return AMDGPU::SI_SPILL_S160_SAVE;
1572 return AMDGPU::SI_SPILL_S192_SAVE;
1574 return AMDGPU::SI_SPILL_S224_SAVE;
1576 return AMDGPU::SI_SPILL_S256_SAVE;
1578 return AMDGPU::SI_SPILL_S288_SAVE;
1580 return AMDGPU::SI_SPILL_S320_SAVE;
1582 return AMDGPU::SI_SPILL_S352_SAVE;
1584 return AMDGPU::SI_SPILL_S384_SAVE;
1586 return AMDGPU::SI_SPILL_S512_SAVE;
1588 return AMDGPU::SI_SPILL_S1024_SAVE;
1597 return AMDGPU::SI_SPILL_V16_SAVE;
1599 return AMDGPU::SI_SPILL_V32_SAVE;
1601 return AMDGPU::SI_SPILL_V64_SAVE;
1603 return AMDGPU::SI_SPILL_V96_SAVE;
1605 return AMDGPU::SI_SPILL_V128_SAVE;
1607 return AMDGPU::SI_SPILL_V160_SAVE;
1609 return AMDGPU::SI_SPILL_V192_SAVE;
1611 return AMDGPU::SI_SPILL_V224_SAVE;
1613 return AMDGPU::SI_SPILL_V256_SAVE;
1615 return AMDGPU::SI_SPILL_V288_SAVE;
1617 return AMDGPU::SI_SPILL_V320_SAVE;
1619 return AMDGPU::SI_SPILL_V352_SAVE;
1621 return AMDGPU::SI_SPILL_V384_SAVE;
1623 return AMDGPU::SI_SPILL_V512_SAVE;
1625 return AMDGPU::SI_SPILL_V1024_SAVE;
1634 return AMDGPU::SI_SPILL_AV32_SAVE;
1636 return AMDGPU::SI_SPILL_AV64_SAVE;
1638 return AMDGPU::SI_SPILL_AV96_SAVE;
1640 return AMDGPU::SI_SPILL_AV128_SAVE;
1642 return AMDGPU::SI_SPILL_AV160_SAVE;
1644 return AMDGPU::SI_SPILL_AV192_SAVE;
1646 return AMDGPU::SI_SPILL_AV224_SAVE;
1648 return AMDGPU::SI_SPILL_AV256_SAVE;
1650 return AMDGPU::SI_SPILL_AV288_SAVE;
1652 return AMDGPU::SI_SPILL_AV320_SAVE;
1654 return AMDGPU::SI_SPILL_AV352_SAVE;
1656 return AMDGPU::SI_SPILL_AV384_SAVE;
1658 return AMDGPU::SI_SPILL_AV512_SAVE;
1660 return AMDGPU::SI_SPILL_AV1024_SAVE;
1667 bool IsVectorSuperClass) {
1672 if (IsVectorSuperClass)
1673 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1675 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1681 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1688 if (ST.hasMAIInsts())
1708 FrameInfo.getObjectAlign(FrameIndex));
1709 unsigned SpillSize =
TRI->getSpillSize(*RC);
1712 if (RI.isSGPRClass(RC)) {
1714 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1715 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1716 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1724 if (SrcReg.
isVirtual() && SpillSize == 4) {
1725 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1734 if (RI.spillSGPRToVGPR())
1754 return AMDGPU::SI_SPILL_S32_RESTORE;
1756 return AMDGPU::SI_SPILL_S64_RESTORE;
1758 return AMDGPU::SI_SPILL_S96_RESTORE;
1760 return AMDGPU::SI_SPILL_S128_RESTORE;
1762 return AMDGPU::SI_SPILL_S160_RESTORE;
1764 return AMDGPU::SI_SPILL_S192_RESTORE;
1766 return AMDGPU::SI_SPILL_S224_RESTORE;
1768 return AMDGPU::SI_SPILL_S256_RESTORE;
1770 return AMDGPU::SI_SPILL_S288_RESTORE;
1772 return AMDGPU::SI_SPILL_S320_RESTORE;
1774 return AMDGPU::SI_SPILL_S352_RESTORE;
1776 return AMDGPU::SI_SPILL_S384_RESTORE;
1778 return AMDGPU::SI_SPILL_S512_RESTORE;
1780 return AMDGPU::SI_SPILL_S1024_RESTORE;
1789 return AMDGPU::SI_SPILL_V16_RESTORE;
1791 return AMDGPU::SI_SPILL_V32_RESTORE;
1793 return AMDGPU::SI_SPILL_V64_RESTORE;
1795 return AMDGPU::SI_SPILL_V96_RESTORE;
1797 return AMDGPU::SI_SPILL_V128_RESTORE;
1799 return AMDGPU::SI_SPILL_V160_RESTORE;
1801 return AMDGPU::SI_SPILL_V192_RESTORE;
1803 return AMDGPU::SI_SPILL_V224_RESTORE;
1805 return AMDGPU::SI_SPILL_V256_RESTORE;
1807 return AMDGPU::SI_SPILL_V288_RESTORE;
1809 return AMDGPU::SI_SPILL_V320_RESTORE;
1811 return AMDGPU::SI_SPILL_V352_RESTORE;
1813 return AMDGPU::SI_SPILL_V384_RESTORE;
1815 return AMDGPU::SI_SPILL_V512_RESTORE;
1817 return AMDGPU::SI_SPILL_V1024_RESTORE;
1826 return AMDGPU::SI_SPILL_AV32_RESTORE;
1828 return AMDGPU::SI_SPILL_AV64_RESTORE;
1830 return AMDGPU::SI_SPILL_AV96_RESTORE;
1832 return AMDGPU::SI_SPILL_AV128_RESTORE;
1834 return AMDGPU::SI_SPILL_AV160_RESTORE;
1836 return AMDGPU::SI_SPILL_AV192_RESTORE;
1838 return AMDGPU::SI_SPILL_AV224_RESTORE;
1840 return AMDGPU::SI_SPILL_AV256_RESTORE;
1842 return AMDGPU::SI_SPILL_AV288_RESTORE;
1844 return AMDGPU::SI_SPILL_AV320_RESTORE;
1846 return AMDGPU::SI_SPILL_AV352_RESTORE;
1848 return AMDGPU::SI_SPILL_AV384_RESTORE;
1850 return AMDGPU::SI_SPILL_AV512_RESTORE;
1852 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1859 bool IsVectorSuperClass) {
1864 if (IsVectorSuperClass)
1865 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1867 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1873 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1880 if (ST.hasMAIInsts())
1883 assert(!RI.isAGPRClass(RC));
1898 unsigned SpillSize =
TRI->getSpillSize(*RC);
1905 FrameInfo.getObjectAlign(FrameIndex));
1907 if (RI.isSGPRClass(RC)) {
1909 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1910 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1911 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1916 if (DestReg.
isVirtual() && SpillSize == 4) {
1918 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1921 if (RI.spillSGPRToVGPR())
1947 unsigned Quantity)
const {
1949 while (Quantity > 0) {
1950 unsigned Arg = std::min(Quantity, 8u);
1957 auto *MF =
MBB.getParent();
1960 assert(Info->isEntryFunction());
1962 if (
MBB.succ_empty()) {
1963 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1964 if (HasNoTerminator) {
1965 if (Info->returnsVoid()) {
1979 constexpr unsigned DoorbellIDMask = 0x3ff;
1980 constexpr unsigned ECQueueWaveAbort = 0x400;
1986 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1987 ContBB =
MBB.splitAt(
MI,
false);
1991 MBB.addSuccessor(TrapBB);
1998 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2002 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2005 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2006 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2010 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2011 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2012 .
addUse(DoorbellRegMasked)
2013 .
addImm(ECQueueWaveAbort);
2014 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2015 .
addUse(SetWaveAbortBit);
2018 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2033 switch (
MI.getOpcode()) {
2035 if (
MI.isMetaInstruction())
2040 return MI.getOperand(0).getImm() + 1;
2049 switch (
MI.getOpcode()) {
2051 case AMDGPU::S_MOV_B64_term:
2054 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2057 case AMDGPU::S_MOV_B32_term:
2060 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2063 case AMDGPU::S_XOR_B64_term:
2066 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2069 case AMDGPU::S_XOR_B32_term:
2072 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2074 case AMDGPU::S_OR_B64_term:
2077 MI.setDesc(
get(AMDGPU::S_OR_B64));
2079 case AMDGPU::S_OR_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_OR_B32));
2085 case AMDGPU::S_ANDN2_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2091 case AMDGPU::S_ANDN2_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2097 case AMDGPU::S_AND_B64_term:
2100 MI.setDesc(
get(AMDGPU::S_AND_B64));
2103 case AMDGPU::S_AND_B32_term:
2106 MI.setDesc(
get(AMDGPU::S_AND_B32));
2109 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2112 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2115 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2118 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2121 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2122 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2125 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2126 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2127 MI.getMF()->getRegInfo().constrainRegClass(
MI.getOperand(0).getReg(),
2128 &AMDGPU::SReg_32_XM0RegClass);
2130 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2134 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2137 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2140 int64_t Imm =
MI.getOperand(1).getImm();
2142 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2143 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2150 MI.eraseFromParent();
2156 case AMDGPU::V_MOV_B64_PSEUDO: {
2158 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2159 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2164 if (ST.hasMovB64()) {
2165 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2170 if (
SrcOp.isImm()) {
2172 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2173 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2195 if (ST.hasPkMovB32() &&
2216 MI.eraseFromParent();
2219 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2223 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2227 if (ST.has64BitLiterals()) {
2228 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2234 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2239 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2240 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2242 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2243 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2250 MI.eraseFromParent();
2253 case AMDGPU::V_SET_INACTIVE_B32: {
2257 .
add(
MI.getOperand(3))
2258 .
add(
MI.getOperand(4))
2259 .
add(
MI.getOperand(1))
2260 .
add(
MI.getOperand(2))
2261 .
add(
MI.getOperand(5));
2262 MI.eraseFromParent();
2265 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2266 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2267 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2272 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2273 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2274 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2275 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2276 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2291 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2292 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2293 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2297 if (RI.hasVGPRs(EltRC)) {
2298 Opc = AMDGPU::V_MOVRELD_B32_e32;
2300 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2301 : AMDGPU::S_MOVRELD_B32;
2306 bool IsUndef =
MI.getOperand(1).isUndef();
2307 unsigned SubReg =
MI.getOperand(3).getImm();
2308 assert(VecReg ==
MI.getOperand(1).getReg());
2313 .
add(
MI.getOperand(2))
2317 const int ImpDefIdx =
2319 const int ImpUseIdx = ImpDefIdx + 1;
2321 MI.eraseFromParent();
2324 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2325 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2326 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2327 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2328 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2329 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2330 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2335 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2336 assert(ST.useVGPRIndexMode());
2338 bool IsUndef =
MI.getOperand(1).isUndef();
2347 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2351 .
add(
MI.getOperand(2))
2356 const int ImpDefIdx =
2358 const int ImpUseIdx = ImpDefIdx + 1;
2365 MI.eraseFromParent();
2368 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2369 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2370 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2371 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2372 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2373 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2374 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2375 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2376 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2380 assert(ST.useVGPRIndexMode());
2383 bool IsUndef =
MI.getOperand(1).isUndef();
2401 MI.eraseFromParent();
2404 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2407 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2408 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2427 if (ST.hasGetPCZeroExtension()) {
2431 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2438 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2448 MI.eraseFromParent();
2451 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2461 Op.setOffset(
Op.getOffset() + 4);
2463 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2467 MI.eraseFromParent();
2470 case AMDGPU::ENTER_STRICT_WWM: {
2473 MI.setDesc(
get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
2474 : AMDGPU::S_OR_SAVEEXEC_B64));
2477 case AMDGPU::ENTER_STRICT_WQM: {
2480 const unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2481 const unsigned WQMOp = ST.isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2482 const unsigned MovOp = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2486 MI.eraseFromParent();
2489 case AMDGPU::EXIT_STRICT_WWM:
2490 case AMDGPU::EXIT_STRICT_WQM: {
2493 MI.setDesc(
get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2496 case AMDGPU::SI_RETURN: {
2510 MI.eraseFromParent();
2514 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2515 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2516 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2519 case AMDGPU::S_GETPC_B64_pseudo:
2520 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2521 if (ST.hasGetPCZeroExtension()) {
2523 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2532 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2533 assert(ST.hasBF16PackedInsts());
2534 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2558 case AMDGPU::S_LOAD_DWORDX16_IMM:
2559 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2572 for (
auto &CandMO :
I->operands()) {
2573 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2581 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2585 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2589 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2591 unsigned NewOpcode = -1;
2592 if (SubregSize == 256)
2593 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2594 else if (SubregSize == 128)
2595 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2601 RI.getAllocatableClass(
getRegClass(TID, 0, &RI, *MF));
2602 MRI.setRegClass(DestReg, NewRC);
2605 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2610 MI->getOperand(0).setReg(DestReg);
2611 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2615 OffsetMO->
setImm(FinalOffset);
2621 MI->setMemRefs(*MF, NewMMOs);
2634std::pair<MachineInstr*, MachineInstr*>
2636 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2638 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2641 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2642 return std::pair(&
MI,
nullptr);
2653 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2655 if (Dst.isPhysical()) {
2656 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2659 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2663 for (
unsigned I = 1;
I <= 2; ++
I) {
2666 if (
SrcOp.isImm()) {
2668 Imm.ashrInPlace(Part * 32);
2669 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2673 if (Src.isPhysical())
2674 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2681 MovDPP.addImm(MO.getImm());
2683 Split[Part] = MovDPP;
2687 if (Dst.isVirtual())
2694 MI.eraseFromParent();
2695 return std::pair(Split[0], Split[1]);
2698std::optional<DestSourcePair>
2700 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2703 return std::nullopt;
2707 AMDGPU::OpName Src0OpName,
2709 AMDGPU::OpName Src1OpName)
const {
2716 "All commutable instructions have both src0 and src1 modifiers");
2718 int Src0ModsVal = Src0Mods->
getImm();
2719 int Src1ModsVal = Src1Mods->
getImm();
2721 Src1Mods->
setImm(Src0ModsVal);
2722 Src0Mods->
setImm(Src1ModsVal);
2731 bool IsKill = RegOp.
isKill();
2733 bool IsUndef = RegOp.
isUndef();
2734 bool IsDebug = RegOp.
isDebug();
2736 if (NonRegOp.
isImm())
2738 else if (NonRegOp.
isFI())
2759 int64_t NonRegVal = NonRegOp1.
getImm();
2762 NonRegOp2.
setImm(NonRegVal);
2769 unsigned OpIdx1)
const {
2774 unsigned Opc =
MI.getOpcode();
2775 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2785 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2788 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2793 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2799 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2814 unsigned Src1Idx)
const {
2815 assert(!NewMI &&
"this should never be used");
2817 unsigned Opc =
MI.getOpcode();
2819 if (CommutedOpcode == -1)
2822 if (Src0Idx > Src1Idx)
2825 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2826 static_cast<int>(Src0Idx) &&
2827 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2828 static_cast<int>(Src1Idx) &&
2829 "inconsistency with findCommutedOpIndices");
2854 Src1, AMDGPU::OpName::src1_modifiers);
2857 AMDGPU::OpName::src1_sel);
2869 unsigned &SrcOpIdx0,
2870 unsigned &SrcOpIdx1)
const {
2875 unsigned &SrcOpIdx0,
2876 unsigned &SrcOpIdx1)
const {
2877 if (!
Desc.isCommutable())
2880 unsigned Opc =
Desc.getOpcode();
2881 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2885 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2889 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2893 int64_t BrOffset)
const {
2910 return MI.getOperand(0).getMBB();
2915 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2916 MI.getOpcode() == AMDGPU::SI_LOOP)
2928 "new block should be inserted for expanding unconditional branch");
2931 "restore block should be inserted for restoring clobbered registers");
2939 if (ST.hasAddPC64Inst()) {
2941 MCCtx.createTempSymbol(
"offset",
true);
2945 MCCtx.createTempSymbol(
"post_addpc",
true);
2946 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2950 Offset->setVariableValue(OffsetExpr);
2954 assert(RS &&
"RegScavenger required for long branching");
2958 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2962 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2963 ST.hasVALUReadSGPRHazard();
2964 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2965 if (FlushSGPRWrites)
2973 ApplyHazardWorkarounds();
2976 MCCtx.createTempSymbol(
"post_getpc",
true);
2980 MCCtx.createTempSymbol(
"offset_lo",
true);
2982 MCCtx.createTempSymbol(
"offset_hi",
true);
2985 .
addReg(PCReg, 0, AMDGPU::sub0)
2989 .
addReg(PCReg, 0, AMDGPU::sub1)
2991 ApplyHazardWorkarounds();
3032 if (LongBranchReservedReg) {
3034 Scav = LongBranchReservedReg;
3043 MRI.replaceRegWith(PCReg, Scav);
3044 MRI.clearVirtRegs();
3050 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3051 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3052 MRI.clearVirtRegs();
3067unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3069 case SIInstrInfo::SCC_TRUE:
3070 return AMDGPU::S_CBRANCH_SCC1;
3071 case SIInstrInfo::SCC_FALSE:
3072 return AMDGPU::S_CBRANCH_SCC0;
3073 case SIInstrInfo::VCCNZ:
3074 return AMDGPU::S_CBRANCH_VCCNZ;
3075 case SIInstrInfo::VCCZ:
3076 return AMDGPU::S_CBRANCH_VCCZ;
3077 case SIInstrInfo::EXECNZ:
3078 return AMDGPU::S_CBRANCH_EXECNZ;
3079 case SIInstrInfo::EXECZ:
3080 return AMDGPU::S_CBRANCH_EXECZ;
3086SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3088 case AMDGPU::S_CBRANCH_SCC0:
3090 case AMDGPU::S_CBRANCH_SCC1:
3092 case AMDGPU::S_CBRANCH_VCCNZ:
3094 case AMDGPU::S_CBRANCH_VCCZ:
3096 case AMDGPU::S_CBRANCH_EXECNZ:
3098 case AMDGPU::S_CBRANCH_EXECZ:
3110 bool AllowModify)
const {
3111 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3113 TBB =
I->getOperand(0).getMBB();
3117 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3118 if (Pred == INVALID_BR)
3123 Cond.push_back(
I->getOperand(1));
3127 if (
I ==
MBB.end()) {
3133 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3135 FBB =
I->getOperand(0).getMBB();
3145 bool AllowModify)
const {
3153 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3154 switch (
I->getOpcode()) {
3155 case AMDGPU::S_MOV_B64_term:
3156 case AMDGPU::S_XOR_B64_term:
3157 case AMDGPU::S_OR_B64_term:
3158 case AMDGPU::S_ANDN2_B64_term:
3159 case AMDGPU::S_AND_B64_term:
3160 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3161 case AMDGPU::S_MOV_B32_term:
3162 case AMDGPU::S_XOR_B32_term:
3163 case AMDGPU::S_OR_B32_term:
3164 case AMDGPU::S_ANDN2_B32_term:
3165 case AMDGPU::S_AND_B32_term:
3166 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3169 case AMDGPU::SI_ELSE:
3170 case AMDGPU::SI_KILL_I1_TERMINATOR:
3171 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3188 int *BytesRemoved)
const {
3190 unsigned RemovedSize = 0;
3193 if (
MI.isBranch() ||
MI.isReturn()) {
3195 MI.eraseFromParent();
3201 *BytesRemoved = RemovedSize;
3218 int *BytesAdded)
const {
3219 if (!FBB &&
Cond.empty()) {
3223 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3230 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3242 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3260 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3267 if (
Cond.size() != 2) {
3271 if (
Cond[0].isImm()) {
3282 Register FalseReg,
int &CondCycles,
3283 int &TrueCycles,
int &FalseCycles)
const {
3289 if (
MRI.getRegClass(FalseReg) != RC)
3293 CondCycles = TrueCycles = FalseCycles = NumInsts;
3296 return RI.hasVGPRs(RC) && NumInsts <= 6;
3304 if (
MRI.getRegClass(FalseReg) != RC)
3310 if (NumInsts % 2 == 0)
3313 CondCycles = TrueCycles = FalseCycles = NumInsts;
3314 return RI.isSGPRClass(RC);
3325 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3326 if (Pred == VCCZ || Pred == SCC_FALSE) {
3327 Pred =
static_cast<BranchPredicate
>(-Pred);
3333 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3335 if (DstSize == 32) {
3337 if (Pred == SCC_TRUE) {
3352 if (DstSize == 64 && Pred == SCC_TRUE) {
3362 static const int16_t Sub0_15[] = {
3363 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3364 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3365 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3366 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3369 static const int16_t Sub0_15_64[] = {
3370 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3371 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3372 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3373 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3376 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3378 const int16_t *SubIndices = Sub0_15;
3379 int NElts = DstSize / 32;
3383 if (Pred == SCC_TRUE) {
3385 SelOp = AMDGPU::S_CSELECT_B32;
3386 EltRC = &AMDGPU::SGPR_32RegClass;
3388 SelOp = AMDGPU::S_CSELECT_B64;
3389 EltRC = &AMDGPU::SGPR_64RegClass;
3390 SubIndices = Sub0_15_64;
3396 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3401 for (
int Idx = 0; Idx != NElts; ++Idx) {
3402 Register DstElt =
MRI.createVirtualRegister(EltRC);
3405 unsigned SubIdx = SubIndices[Idx];
3408 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3411 .
addReg(FalseReg, 0, SubIdx)
3412 .
addReg(TrueReg, 0, SubIdx);
3416 .
addReg(TrueReg, 0, SubIdx)
3417 .
addReg(FalseReg, 0, SubIdx);
3429 switch (
MI.getOpcode()) {
3430 case AMDGPU::V_MOV_B16_t16_e32:
3431 case AMDGPU::V_MOV_B16_t16_e64:
3432 case AMDGPU::V_MOV_B32_e32:
3433 case AMDGPU::V_MOV_B32_e64:
3434 case AMDGPU::V_MOV_B64_PSEUDO:
3435 case AMDGPU::V_MOV_B64_e32:
3436 case AMDGPU::V_MOV_B64_e64:
3437 case AMDGPU::S_MOV_B32:
3438 case AMDGPU::S_MOV_B64:
3439 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3441 case AMDGPU::WWM_COPY:
3442 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3443 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3444 case AMDGPU::V_ACCVGPR_MOV_B32:
3445 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3446 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3454 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3455 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3456 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3459 unsigned Opc =
MI.getOpcode();
3461 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3463 MI.removeOperand(Idx);
3468 unsigned SubRegIndex) {
3469 switch (SubRegIndex) {
3470 case AMDGPU::NoSubRegister:
3480 case AMDGPU::sub1_lo16:
3482 case AMDGPU::sub1_hi16:
3485 return std::nullopt;
3493 case AMDGPU::V_MAC_F16_e32:
3494 case AMDGPU::V_MAC_F16_e64:
3495 case AMDGPU::V_MAD_F16_e64:
3496 return AMDGPU::V_MADAK_F16;
3497 case AMDGPU::V_MAC_F32_e32:
3498 case AMDGPU::V_MAC_F32_e64:
3499 case AMDGPU::V_MAD_F32_e64:
3500 return AMDGPU::V_MADAK_F32;
3501 case AMDGPU::V_FMAC_F32_e32:
3502 case AMDGPU::V_FMAC_F32_e64:
3503 case AMDGPU::V_FMA_F32_e64:
3504 return AMDGPU::V_FMAAK_F32;
3505 case AMDGPU::V_FMAC_F16_e32:
3506 case AMDGPU::V_FMAC_F16_e64:
3507 case AMDGPU::V_FMAC_F16_t16_e64:
3508 case AMDGPU::V_FMAC_F16_fake16_e64:
3509 case AMDGPU::V_FMA_F16_e64:
3510 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3511 ? AMDGPU::V_FMAAK_F16_t16
3512 : AMDGPU::V_FMAAK_F16_fake16
3513 : AMDGPU::V_FMAAK_F16;
3514 case AMDGPU::V_FMAC_F64_e32:
3515 case AMDGPU::V_FMAC_F64_e64:
3516 case AMDGPU::V_FMA_F64_e64:
3517 return AMDGPU::V_FMAAK_F64;
3525 case AMDGPU::V_MAC_F16_e32:
3526 case AMDGPU::V_MAC_F16_e64:
3527 case AMDGPU::V_MAD_F16_e64:
3528 return AMDGPU::V_MADMK_F16;
3529 case AMDGPU::V_MAC_F32_e32:
3530 case AMDGPU::V_MAC_F32_e64:
3531 case AMDGPU::V_MAD_F32_e64:
3532 return AMDGPU::V_MADMK_F32;
3533 case AMDGPU::V_FMAC_F32_e32:
3534 case AMDGPU::V_FMAC_F32_e64:
3535 case AMDGPU::V_FMA_F32_e64:
3536 return AMDGPU::V_FMAMK_F32;
3537 case AMDGPU::V_FMAC_F16_e32:
3538 case AMDGPU::V_FMAC_F16_e64:
3539 case AMDGPU::V_FMAC_F16_t16_e64:
3540 case AMDGPU::V_FMAC_F16_fake16_e64:
3541 case AMDGPU::V_FMA_F16_e64:
3542 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3543 ? AMDGPU::V_FMAMK_F16_t16
3544 : AMDGPU::V_FMAMK_F16_fake16
3545 : AMDGPU::V_FMAMK_F16;
3546 case AMDGPU::V_FMAC_F64_e32:
3547 case AMDGPU::V_FMAC_F64_e64:
3548 case AMDGPU::V_FMA_F64_e64:
3549 return AMDGPU::V_FMAMK_F64;
3561 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3563 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3566 if (
Opc == AMDGPU::COPY) {
3567 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3574 if (HasMultipleUses) {
3577 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3580 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3588 if (ImmDefSize == 32 &&
3593 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3594 RI.getSubRegIdxSize(UseSubReg) == 16;
3597 if (RI.hasVGPRs(DstRC))
3600 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3606 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3613 for (
unsigned MovOp :
3614 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3615 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3623 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3627 if (MovDstPhysReg) {
3631 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3638 if (MovDstPhysReg) {
3639 if (!MovDstRC->
contains(MovDstPhysReg))
3641 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3655 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3663 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3667 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3669 UseMI.getOperand(0).setReg(MovDstPhysReg);
3674 UseMI.setDesc(NewMCID);
3675 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3676 UseMI.addImplicitDefUseOperands(*MF);
3680 if (HasMultipleUses)
3683 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3684 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3685 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3686 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3687 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3688 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3689 Opc == AMDGPU::V_FMAC_F64_e64) {
3698 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3713 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3714 if (!RegSrc->
isReg())
3716 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3717 ST.getConstantBusLimit(
Opc) < 2)
3720 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3732 if (Def && Def->isMoveImmediate() &&
3743 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3744 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3754 unsigned SrcSubReg = RegSrc->
getSubReg();
3759 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3760 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3761 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3762 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3763 UseMI.untieRegOperand(
3764 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3771 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3773 DefMI.eraseFromParent();
3780 if (ST.getConstantBusLimit(
Opc) < 2) {
3783 bool Src0Inlined =
false;
3784 if (Src0->
isReg()) {
3789 if (Def && Def->isMoveImmediate() &&
3794 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3801 if (Src1->
isReg() && !Src0Inlined) {
3804 if (Def && Def->isMoveImmediate() &&
3808 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3821 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3822 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3828 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3829 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3830 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3831 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3832 UseMI.untieRegOperand(
3833 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3835 const std::optional<int64_t> SubRegImm =
3849 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3851 DefMI.eraseFromParent();
3863 if (BaseOps1.
size() != BaseOps2.
size())
3865 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3866 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3874 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3875 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3876 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3878 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3881bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3884 int64_t Offset0, Offset1;
3887 bool Offset0IsScalable, Offset1IsScalable;
3901 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3902 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3909 "MIa must load from or modify a memory location");
3911 "MIb must load from or modify a memory location");
3930 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3937 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3947 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3961 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3972 if (
Reg.isPhysical())
3974 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3976 Imm = Def->getOperand(1).getImm();
3996 unsigned NumOps =
MI.getNumOperands();
3999 if (
Op.isReg() &&
Op.isKill())
4007 case AMDGPU::V_MAC_F16_e32:
4008 case AMDGPU::V_MAC_F16_e64:
4009 return AMDGPU::V_MAD_F16_e64;
4010 case AMDGPU::V_MAC_F32_e32:
4011 case AMDGPU::V_MAC_F32_e64:
4012 return AMDGPU::V_MAD_F32_e64;
4013 case AMDGPU::V_MAC_LEGACY_F32_e32:
4014 case AMDGPU::V_MAC_LEGACY_F32_e64:
4015 return AMDGPU::V_MAD_LEGACY_F32_e64;
4016 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4017 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4018 return AMDGPU::V_FMA_LEGACY_F32_e64;
4019 case AMDGPU::V_FMAC_F16_e32:
4020 case AMDGPU::V_FMAC_F16_e64:
4021 case AMDGPU::V_FMAC_F16_t16_e64:
4022 case AMDGPU::V_FMAC_F16_fake16_e64:
4023 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4024 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4025 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4026 : AMDGPU::V_FMA_F16_gfx9_e64;
4027 case AMDGPU::V_FMAC_F32_e32:
4028 case AMDGPU::V_FMAC_F32_e64:
4029 return AMDGPU::V_FMA_F32_e64;
4030 case AMDGPU::V_FMAC_F64_e32:
4031 case AMDGPU::V_FMAC_F64_e64:
4032 return AMDGPU::V_FMA_F64_e64;
4042 unsigned Opc =
MI.getOpcode();
4046 if (NewMFMAOpc != -1) {
4049 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4050 MIB.
add(
MI.getOperand(
I));
4056 if (Def.isEarlyClobber() && Def.isReg() &&
4061 auto UpdateDefIndex = [&](
LiveRange &LR) {
4062 auto *S = LR.find(OldIndex);
4063 if (S != LR.end() && S->start == OldIndex) {
4064 assert(S->valno && S->valno->def == OldIndex);
4065 S->start = NewIndex;
4066 S->valno->def = NewIndex;
4070 for (
auto &SR : LI.subranges())
4081 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4091 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4092 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4093 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4097 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4098 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4099 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4100 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4101 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4102 bool Src0Literal =
false;
4107 case AMDGPU::V_MAC_F16_e64:
4108 case AMDGPU::V_FMAC_F16_e64:
4109 case AMDGPU::V_FMAC_F16_t16_e64:
4110 case AMDGPU::V_FMAC_F16_fake16_e64:
4111 case AMDGPU::V_MAC_F32_e64:
4112 case AMDGPU::V_MAC_LEGACY_F32_e64:
4113 case AMDGPU::V_FMAC_F32_e64:
4114 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4115 case AMDGPU::V_FMAC_F64_e64:
4117 case AMDGPU::V_MAC_F16_e32:
4118 case AMDGPU::V_FMAC_F16_e32:
4119 case AMDGPU::V_MAC_F32_e32:
4120 case AMDGPU::V_MAC_LEGACY_F32_e32:
4121 case AMDGPU::V_FMAC_F32_e32:
4122 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4123 case AMDGPU::V_FMAC_F64_e32: {
4124 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4125 AMDGPU::OpName::src0);
4152 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4153 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4155 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4156 !RI.isSGPRReg(
MBB.getParent()->getRegInfo(), Src0->
getReg()))) {
4158 const auto killDef = [&]() ->
void {
4163 if (
MRI.hasOneNonDBGUse(DefReg)) {
4165 DefMI->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4166 DefMI->getOperand(0).setIsDead(
true);
4167 for (
unsigned I =
DefMI->getNumOperands() - 1;
I != 0; --
I)
4180 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4182 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4183 MIOp.setIsUndef(
true);
4184 MIOp.setReg(DummyReg);
4233 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4254 if (Src0Literal && !ST.hasVOP3Literal())
4274 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4285 switch (
MI.getOpcode()) {
4286 case AMDGPU::S_SET_GPR_IDX_ON:
4287 case AMDGPU::S_SET_GPR_IDX_MODE:
4288 case AMDGPU::S_SET_GPR_IDX_OFF:
4306 if (
MI.isTerminator() ||
MI.isPosition())
4310 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4313 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4319 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4320 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4321 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4322 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4323 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4328 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4329 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4330 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4338 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4347 if (
MI.memoperands_empty())
4352 unsigned AS = Memop->getAddrSpace();
4353 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4354 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4355 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4356 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4370 unsigned Opcode =
MI.getOpcode();
4385 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4386 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4387 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4390 if (
MI.isCall() ||
MI.isInlineAsm())
4406 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4407 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4408 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4409 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4417 if (
MI.isMetaInstruction())
4421 if (
MI.isCopyLike()) {
4422 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4426 return MI.readsRegister(AMDGPU::EXEC, &RI);
4437 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4441 switch (Imm.getBitWidth()) {
4447 ST.hasInv2PiInlineImm());
4450 ST.hasInv2PiInlineImm());
4452 return ST.has16BitInsts() &&
4454 ST.hasInv2PiInlineImm());
4461 APInt IntImm = Imm.bitcastToAPInt();
4463 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4471 return ST.has16BitInsts() &&
4474 return ST.has16BitInsts() &&
4484 switch (OperandType) {
4494 int32_t Trunc =
static_cast<int32_t
>(Imm);
4534 int16_t Trunc =
static_cast<int16_t
>(Imm);
4535 return ST.has16BitInsts() &&
4544 int16_t Trunc =
static_cast<int16_t
>(Imm);
4545 return ST.has16BitInsts() &&
4596 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4602 return ST.hasVOP3Literal();
4606 int64_t ImmVal)
const {
4609 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4610 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4611 AMDGPU::OpName::src2))
4613 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4625 "unexpected imm-like operand kind");
4638 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4656 AMDGPU::OpName
OpName)
const {
4658 return Mods && Mods->
getImm();
4671 switch (
MI.getOpcode()) {
4672 default:
return false;
4674 case AMDGPU::V_ADDC_U32_e64:
4675 case AMDGPU::V_SUBB_U32_e64:
4676 case AMDGPU::V_SUBBREV_U32_e64: {
4684 case AMDGPU::V_MAC_F16_e64:
4685 case AMDGPU::V_MAC_F32_e64:
4686 case AMDGPU::V_MAC_LEGACY_F32_e64:
4687 case AMDGPU::V_FMAC_F16_e64:
4688 case AMDGPU::V_FMAC_F16_t16_e64:
4689 case AMDGPU::V_FMAC_F16_fake16_e64:
4690 case AMDGPU::V_FMAC_F32_e64:
4691 case AMDGPU::V_FMAC_F64_e64:
4692 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4698 case AMDGPU::V_CNDMASK_B32_e64:
4704 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4734 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4743 unsigned Op32)
const {
4757 Inst32.
add(
MI.getOperand(
I));
4761 int Idx =
MI.getNumExplicitDefs();
4763 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4768 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4790 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4798 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4801 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4802 AMDGPU::SReg_64RegClass.contains(Reg);
4808 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4820 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4830 switch (MO.getReg()) {
4832 case AMDGPU::VCC_LO:
4833 case AMDGPU::VCC_HI:
4835 case AMDGPU::FLAT_SCR:
4848 switch (
MI.getOpcode()) {
4849 case AMDGPU::V_READLANE_B32:
4850 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4851 case AMDGPU::V_WRITELANE_B32:
4852 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4859 if (
MI.isPreISelOpcode() ||
4860 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4875 if (
SubReg.getReg().isPhysical())
4878 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4889 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
4890 ErrInfo =
"illegal copy from vector register to SGPR";
4908 if (!
MRI.isSSA() &&
MI.isCopy())
4909 return verifyCopy(
MI,
MRI, ErrInfo);
4911 if (SIInstrInfo::isGenericOpcode(Opcode))
4914 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4915 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4916 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4918 if (Src0Idx == -1) {
4920 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4921 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4922 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4923 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4928 if (!
Desc.isVariadic() &&
4929 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4930 ErrInfo =
"Instruction has wrong number of operands.";
4934 if (
MI.isInlineAsm()) {
4947 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4948 ErrInfo =
"inlineasm operand has incorrect register class.";
4956 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4957 ErrInfo =
"missing memory operand from image instruction.";
4962 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4965 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4966 "all fp values to integers.";
4970 int RegClass =
Desc.operands()[i].RegClass;
4973 switch (OpInfo.OperandType) {
4975 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4976 ErrInfo =
"Illegal immediate value for operand.";
5010 ErrInfo =
"Illegal immediate value for operand.";
5017 ErrInfo =
"Expected inline constant for operand.";
5032 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5033 ErrInfo =
"Expected immediate, but got non-immediate";
5042 if (OpInfo.isGenericType())
5057 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5059 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5061 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5062 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5069 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5070 ErrInfo =
"Subtarget requires even aligned vector registers";
5075 if (RegClass != -1) {
5076 if (Reg.isVirtual())
5081 ErrInfo =
"Operand has incorrect register class.";
5089 if (!ST.hasSDWA()) {
5090 ErrInfo =
"SDWA is not supported on this target";
5094 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5095 AMDGPU::OpName::dst_sel}) {
5099 int64_t Imm = MO->
getImm();
5101 ErrInfo =
"Invalid SDWA selection";
5106 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5108 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5113 if (!ST.hasSDWAScalar()) {
5115 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5116 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5123 "Only reg allowed as operands in SDWA instructions on GFX9+";
5129 if (!ST.hasSDWAOmod()) {
5132 if (OMod !=
nullptr &&
5134 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5139 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5140 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5141 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5142 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5145 unsigned Mods = Src0ModsMO->
getImm();
5148 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5154 if (
isVOPC(BasicOpcode)) {
5155 if (!ST.hasSDWASdst() && DstIdx != -1) {
5158 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5159 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5162 }
else if (!ST.hasSDWAOutModsVOPC()) {
5165 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5166 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5172 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5173 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5180 if (DstUnused && DstUnused->isImm() &&
5183 if (!Dst.isReg() || !Dst.isTied()) {
5184 ErrInfo =
"Dst register should have tied register";
5189 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5192 "Dst register should be tied to implicit use of preserved register";
5196 ErrInfo =
"Dst register should use same physical register as preserved";
5203 if (
isImage(Opcode) && !
MI.mayStore()) {
5215 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5223 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5227 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5228 if (RegCount > DstSize) {
5229 ErrInfo =
"Image instruction returns too many registers for dst "
5238 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5239 unsigned ConstantBusCount = 0;
5240 bool UsesLiteral =
false;
5243 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5247 LiteralVal = &
MI.getOperand(ImmIdx);
5256 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5267 }
else if (!MO.
isFI()) {
5274 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5284 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5285 return !RI.regsOverlap(SGPRUsed, SGPR);
5294 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5295 Opcode != AMDGPU::V_WRITELANE_B32) {
5296 ErrInfo =
"VOP* instruction violates constant bus restriction";
5300 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5301 ErrInfo =
"VOP3 instruction uses literal";
5308 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5309 unsigned SGPRCount = 0;
5312 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5320 if (MO.
getReg() != SGPRUsed)
5325 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5326 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5333 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5334 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5341 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5351 ErrInfo =
"ABS not allowed in VOP3B instructions";
5364 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5371 if (
Desc.isBranch()) {
5373 ErrInfo =
"invalid branch target for SOPK instruction";
5380 ErrInfo =
"invalid immediate for SOPK instruction";
5385 ErrInfo =
"invalid immediate for SOPK instruction";
5392 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5393 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5394 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5395 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5396 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5397 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5399 const unsigned StaticNumOps =
5400 Desc.getNumOperands() +
Desc.implicit_uses().size();
5401 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5406 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5407 ErrInfo =
"missing implicit register operands";
5413 if (!Dst->isUse()) {
5414 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5419 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5420 UseOpIdx != StaticNumOps + 1) {
5421 ErrInfo =
"movrel implicit operands should be tied";
5428 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5430 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5431 ErrInfo =
"src0 should be subreg of implicit vector use";
5439 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5440 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5446 if (
MI.mayStore() &&
5451 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5452 ErrInfo =
"scalar stores must use m0 as offset register";
5458 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5460 if (
Offset->getImm() != 0) {
5461 ErrInfo =
"subtarget does not support offsets in flat instructions";
5466 if (
isDS(
MI) && !ST.hasGDS()) {
5468 if (GDSOp && GDSOp->
getImm() != 0) {
5469 ErrInfo =
"GDS is not supported on this subtarget";
5477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5478 AMDGPU::OpName::vaddr0);
5479 AMDGPU::OpName RSrcOpName =
5480 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5481 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5489 ErrInfo =
"dim is out of range";
5494 if (ST.hasR128A16()) {
5496 IsA16 = R128A16->
getImm() != 0;
5497 }
else if (ST.hasA16()) {
5499 IsA16 = A16->
getImm() != 0;
5502 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5504 unsigned AddrWords =
5507 unsigned VAddrWords;
5509 VAddrWords = RsrcIdx - VAddr0Idx;
5510 if (ST.hasPartialNSAEncoding() &&
5512 unsigned LastVAddrIdx = RsrcIdx - 1;
5513 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5521 if (VAddrWords != AddrWords) {
5523 <<
" but got " << VAddrWords <<
"\n");
5524 ErrInfo =
"bad vaddr size";
5534 unsigned DC = DppCt->
getImm();
5535 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5536 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5537 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5538 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5539 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5540 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5541 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5542 ErrInfo =
"Invalid dpp_ctrl value";
5545 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5547 ErrInfo =
"Invalid dpp_ctrl value: "
5548 "wavefront shifts are not supported on GFX10+";
5551 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5553 ErrInfo =
"Invalid dpp_ctrl value: "
5554 "broadcasts are not supported on GFX10+";
5557 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5559 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5560 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5561 !ST.hasGFX90AInsts()) {
5562 ErrInfo =
"Invalid dpp_ctrl value: "
5563 "row_newbroadcast/row_share is not supported before "
5567 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5568 ErrInfo =
"Invalid dpp_ctrl value: "
5569 "row_share and row_xmask are not supported before GFX10";
5574 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5577 ErrInfo =
"Invalid dpp_ctrl value: "
5578 "DP ALU dpp only support row_newbcast";
5585 AMDGPU::OpName DataName =
5586 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5592 if (ST.hasGFX90AInsts()) {
5594 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5595 ErrInfo =
"Invalid register class: "
5596 "vdata and vdst should be both VGPR or AGPR";
5599 if (
Data && Data2 &&
5601 ErrInfo =
"Invalid register class: "
5602 "both data operands should be VGPR or AGPR";
5606 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5608 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5609 ErrInfo =
"Invalid register class: "
5610 "agpr loads and stores not supported on this GPU";
5616 if (ST.needsAlignedVGPRs()) {
5617 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5622 if (Reg.isPhysical())
5623 return !(RI.getHWRegIndex(Reg) & 1);
5625 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5626 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5629 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5630 Opcode == AMDGPU::DS_GWS_BARRIER) {
5632 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5633 ErrInfo =
"Subtarget requires even aligned vector registers "
5634 "for DS_GWS instructions";
5640 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5641 ErrInfo =
"Subtarget requires even aligned vector registers "
5642 "for vaddr operand of image instructions";
5648 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5650 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5651 ErrInfo =
"Invalid register class: "
5652 "v_accvgpr_write with an SGPR is not supported on this GPU";
5657 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5660 ErrInfo =
"pseudo expects only physical SGPRs";
5667 if (!ST.hasScaleOffset()) {
5668 ErrInfo =
"Subtarget does not support offset scaling";
5672 ErrInfo =
"Instruction does not support offset scaling";
5681 for (
unsigned I = 0;
I < 3; ++
I) {
5694 switch (
MI.getOpcode()) {
5695 default:
return AMDGPU::INSTRUCTION_LIST_END;
5696 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5697 case AMDGPU::COPY:
return AMDGPU::COPY;
5698 case AMDGPU::PHI:
return AMDGPU::PHI;
5699 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5700 case AMDGPU::WQM:
return AMDGPU::WQM;
5701 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5702 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5703 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5704 case AMDGPU::S_MOV_B32: {
5706 return MI.getOperand(1).isReg() ||
5707 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5708 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5710 case AMDGPU::S_ADD_I32:
5711 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5712 case AMDGPU::S_ADDC_U32:
5713 return AMDGPU::V_ADDC_U32_e32;
5714 case AMDGPU::S_SUB_I32:
5715 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5718 case AMDGPU::S_ADD_U32:
5719 return AMDGPU::V_ADD_CO_U32_e32;
5720 case AMDGPU::S_SUB_U32:
5721 return AMDGPU::V_SUB_CO_U32_e32;
5722 case AMDGPU::S_ADD_U64_PSEUDO:
5723 return AMDGPU::V_ADD_U64_PSEUDO;
5724 case AMDGPU::S_SUB_U64_PSEUDO:
5725 return AMDGPU::V_SUB_U64_PSEUDO;
5726 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5727 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5728 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5729 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5730 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5731 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5732 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5733 case AMDGPU::S_XNOR_B32:
5734 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5735 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5736 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5737 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5738 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5739 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5740 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5741 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5742 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5743 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5744 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5745 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5746 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5747 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5748 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5749 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5750 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5751 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5752 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5753 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5754 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5755 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5756 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5757 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5758 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5759 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5760 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5761 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5762 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5763 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5764 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5765 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5766 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5767 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5768 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5769 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5770 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5771 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5772 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5773 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5774 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5775 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5776 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5777 case AMDGPU::S_CVT_F32_F16:
5778 case AMDGPU::S_CVT_HI_F32_F16:
5779 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5780 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5781 case AMDGPU::S_CVT_F16_F32:
5782 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5783 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5784 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5785 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5786 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5787 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5788 case AMDGPU::S_CEIL_F16:
5789 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5790 : AMDGPU::V_CEIL_F16_fake16_e64;
5791 case AMDGPU::S_FLOOR_F16:
5792 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5793 : AMDGPU::V_FLOOR_F16_fake16_e64;
5794 case AMDGPU::S_TRUNC_F16:
5795 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5796 : AMDGPU::V_TRUNC_F16_fake16_e64;
5797 case AMDGPU::S_RNDNE_F16:
5798 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5799 : AMDGPU::V_RNDNE_F16_fake16_e64;
5800 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5801 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5802 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5803 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5804 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5805 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5806 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5807 case AMDGPU::S_ADD_F16:
5808 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5809 : AMDGPU::V_ADD_F16_fake16_e64;
5810 case AMDGPU::S_SUB_F16:
5811 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5812 : AMDGPU::V_SUB_F16_fake16_e64;
5813 case AMDGPU::S_MIN_F16:
5814 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5815 : AMDGPU::V_MIN_F16_fake16_e64;
5816 case AMDGPU::S_MAX_F16:
5817 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5818 : AMDGPU::V_MAX_F16_fake16_e64;
5819 case AMDGPU::S_MINIMUM_F16:
5820 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5821 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5822 case AMDGPU::S_MAXIMUM_F16:
5823 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5824 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5825 case AMDGPU::S_MUL_F16:
5826 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5827 : AMDGPU::V_MUL_F16_fake16_e64;
5828 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5829 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5830 case AMDGPU::S_FMAC_F16:
5831 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5832 : AMDGPU::V_FMAC_F16_fake16_e64;
5833 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5834 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5835 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5836 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5837 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5838 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5839 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5840 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5841 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5842 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5843 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5844 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5845 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5846 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5847 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5848 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5849 case AMDGPU::S_CMP_LT_F16:
5850 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5851 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5852 case AMDGPU::S_CMP_EQ_F16:
5853 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5854 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5855 case AMDGPU::S_CMP_LE_F16:
5856 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5857 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5858 case AMDGPU::S_CMP_GT_F16:
5859 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5860 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5861 case AMDGPU::S_CMP_LG_F16:
5862 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5863 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5864 case AMDGPU::S_CMP_GE_F16:
5865 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5866 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5867 case AMDGPU::S_CMP_O_F16:
5868 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5869 : AMDGPU::V_CMP_O_F16_fake16_e64;
5870 case AMDGPU::S_CMP_U_F16:
5871 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5872 : AMDGPU::V_CMP_U_F16_fake16_e64;
5873 case AMDGPU::S_CMP_NGE_F16:
5874 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5875 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5876 case AMDGPU::S_CMP_NLG_F16:
5877 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5878 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5879 case AMDGPU::S_CMP_NGT_F16:
5880 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5881 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5882 case AMDGPU::S_CMP_NLE_F16:
5883 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5884 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5885 case AMDGPU::S_CMP_NEQ_F16:
5886 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5887 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5888 case AMDGPU::S_CMP_NLT_F16:
5889 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5890 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5891 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5892 case AMDGPU::V_S_EXP_F16_e64:
5893 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5894 : AMDGPU::V_EXP_F16_fake16_e64;
5895 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5896 case AMDGPU::V_S_LOG_F16_e64:
5897 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5898 : AMDGPU::V_LOG_F16_fake16_e64;
5899 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5900 case AMDGPU::V_S_RCP_F16_e64:
5901 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5902 : AMDGPU::V_RCP_F16_fake16_e64;
5903 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5904 case AMDGPU::V_S_RSQ_F16_e64:
5905 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5906 : AMDGPU::V_RSQ_F16_fake16_e64;
5907 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5908 case AMDGPU::V_S_SQRT_F16_e64:
5909 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5910 : AMDGPU::V_SQRT_F16_fake16_e64;
5913 "Unexpected scalar opcode without corresponding vector one!");
5926 bool IsWave32 = ST.isWave32();
5931 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5932 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5941 const unsigned OrSaveExec =
5942 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5955 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5957 auto ExecRestoreMI =
5966 "Not a whole wave func");
5969 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
5970 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
5979 bool IsAllocatable) {
5980 if ((IsAllocatable || !ST.hasGFX90AInsts()) &&
5985 case AMDGPU::AV_32RegClassID:
5986 RCID = AMDGPU::VGPR_32RegClassID;
5988 case AMDGPU::AV_64RegClassID:
5989 RCID = AMDGPU::VReg_64RegClassID;
5991 case AMDGPU::AV_96RegClassID:
5992 RCID = AMDGPU::VReg_96RegClassID;
5994 case AMDGPU::AV_128RegClassID:
5995 RCID = AMDGPU::VReg_128RegClassID;
5997 case AMDGPU::AV_160RegClassID:
5998 RCID = AMDGPU::VReg_160RegClassID;
6000 case AMDGPU::AV_512RegClassID:
6001 RCID = AMDGPU::VReg_512RegClassID;
6017 auto RegClass = TID.
operands()[OpNum].RegClass;
6018 if (TID.
getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
6020 return RI.getRegClass(RegClass);
6027 unsigned OpNo)
const {
6029 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6030 Desc.operands()[OpNo].RegClass == -1) {
6033 if (Reg.isVirtual()) {
6035 MI.getParent()->getParent()->getRegInfo();
6036 return MRI.getRegClass(Reg);
6038 return RI.getPhysRegBaseClass(Reg);
6041 unsigned RCID =
Desc.operands()[OpNo].RegClass;
6050 unsigned RCID =
get(
MI.getOpcode()).operands()[
OpIdx].RegClass;
6052 unsigned Size = RI.getRegSizeInBits(*RC);
6053 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6054 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6055 : AMDGPU::V_MOV_B32_e32;
6057 Opcode = AMDGPU::COPY;
6058 else if (RI.isSGPRClass(RC))
6059 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6073 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6079 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6090 if (SubIdx == AMDGPU::sub0)
6092 if (SubIdx == AMDGPU::sub1)
6104void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6120 if (Reg.isPhysical())
6131 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
6142 unsigned Opc =
MI.getOpcode();
6148 constexpr const AMDGPU::OpName OpNames[] = {
6149 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6152 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6153 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6163 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6164 if (IsAGPR && !ST.hasMAIInsts())
6166 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6170 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6171 const int DataIdx = AMDGPU::getNamedOperandIdx(
6172 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6173 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6174 MI.getOperand(DataIdx).isReg() &&
6175 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6177 if ((
int)
OpIdx == DataIdx) {
6178 if (VDstIdx != -1 &&
6179 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6182 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6183 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6184 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6189 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6190 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6210 constexpr const unsigned NumOps = 3;
6211 constexpr const AMDGPU::OpName OpNames[
NumOps * 2] = {
6212 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6213 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6214 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6219 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6222 MO = &
MI.getOperand(SrcIdx);
6229 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6233 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6237 return !OpSel && !OpSelHi;
6247 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) :
nullptr;
6256 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6257 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6261 if (!LiteralLimit--)
6271 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6279 if (--ConstantBusLimit <= 0)
6291 if (!LiteralLimit--)
6293 if (--ConstantBusLimit <= 0)
6299 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6303 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6305 !
Op.isIdenticalTo(*MO))
6315 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6329 bool Is64BitOp = Is64BitFPOp ||
6336 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6345 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6364 unsigned Opc =
MI.getOpcode();
6367 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6370 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6376 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6383 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6386 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6392 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6409 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6410 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6411 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6423 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6425 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6437 if (HasImplicitSGPR || !
MI.isCommutable()) {
6454 if (CommutedOpc == -1) {
6459 MI.setDesc(
get(CommutedOpc));
6463 bool Src0Kill = Src0.
isKill();
6467 else if (Src1.
isReg()) {
6482 unsigned Opc =
MI.getOpcode();
6485 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6486 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6487 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6490 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6491 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6492 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6493 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6494 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6495 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6496 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6500 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6501 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6506 if (VOP3Idx[2] != -1) {
6508 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6509 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6518 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6519 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6521 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6523 SGPRsUsed.
insert(SGPRReg);
6527 for (
int Idx : VOP3Idx) {
6536 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6548 if (RI.hasAGPRs(RI.getRegClassForReg(
MRI, MO.
getReg())) &&
6554 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6561 if (ConstantBusLimit > 0) {
6573 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6574 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6581 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::scale_src0);
6582 if (ScaleSrc0Idx != -1) {
6584 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::scale_src1);
6595 for (
unsigned I = 0;
I < 3; ++
I) {
6608 SRC = RI.getCommonSubClass(SRC, DstRC);
6611 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6613 if (RI.hasAGPRs(VRC)) {
6614 VRC = RI.getEquivalentVGPRClass(VRC);
6615 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6617 get(TargetOpcode::COPY), NewSrcReg)
6624 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6630 for (
unsigned i = 0; i < SubRegs; ++i) {
6631 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6633 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6634 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6640 get(AMDGPU::REG_SEQUENCE), DstReg);
6641 for (
unsigned i = 0; i < SubRegs; ++i) {
6643 MIB.
addImm(RI.getSubRegFromChannel(i));
6656 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6658 SBase->setReg(SGPR);
6661 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6669 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6670 if (OldSAddrIdx < 0)
6686 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6687 if (NewVAddrIdx < 0)
6690 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6694 if (OldVAddrIdx >= 0) {
6696 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6708 if (OldVAddrIdx == NewVAddrIdx) {
6711 MRI.removeRegOperandFromUseList(&NewVAddr);
6712 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6716 MRI.removeRegOperandFromUseList(&NewVAddr);
6717 MRI.addRegOperandToUseList(&NewVAddr);
6719 assert(OldSAddrIdx == NewVAddrIdx);
6721 if (OldVAddrIdx >= 0) {
6722 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6723 AMDGPU::OpName::vdst_in);
6727 if (NewVDstIn != -1) {
6728 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6734 if (NewVDstIn != -1) {
6735 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6756 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6776 unsigned OpSubReg =
Op.getSubReg();
6779 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6785 Register DstReg =
MRI.createVirtualRegister(DstRC);
6795 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6798 bool ImpDef = Def->isImplicitDef();
6799 while (!ImpDef && Def && Def->isCopy()) {
6800 if (Def->getOperand(1).getReg().isPhysical())
6802 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6803 ImpDef = Def && Def->isImplicitDef();
6805 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6823 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6824 unsigned SaveExecOpc =
6825 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6826 unsigned XorTermOpc =
6827 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6829 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6830 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6836 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6837 unsigned NumSubRegs =
RegSize / 32;
6838 Register VScalarOp = ScalarOp->getReg();
6840 if (NumSubRegs == 1) {
6841 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6843 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6846 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6848 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6854 CondReg = NewCondReg;
6856 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6864 ScalarOp->setReg(CurReg);
6865 ScalarOp->setIsKill();
6869 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6870 "Unhandled register size");
6872 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6874 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6876 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6879 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6880 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6883 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6884 .
addReg(VScalarOp, VScalarOpUndef,
6885 TRI->getSubRegFromChannel(Idx + 1));
6891 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6892 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6898 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6899 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6902 if (NumSubRegs <= 2)
6903 Cmp.addReg(VScalarOp);
6905 Cmp.addReg(VScalarOp, VScalarOpUndef,
6906 TRI->getSubRegFromChannel(Idx, 2));
6910 CondReg = NewCondReg;
6912 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6920 const auto *SScalarOpRC =
6921 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6922 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6926 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6927 unsigned Channel = 0;
6928 for (
Register Piece : ReadlanePieces) {
6929 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6933 ScalarOp->setReg(SScalarOp);
6934 ScalarOp->setIsKill();
6938 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6939 MRI.setSimpleHint(SaveExec, CondReg);
6970 if (!Begin.isValid())
6972 if (!End.isValid()) {
6977 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6978 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6979 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6987 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
6988 std::numeric_limits<unsigned>::max()) !=
6991 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6997 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7006 for (
auto I = Begin;
I != AfterMI;
I++) {
7007 for (
auto &MO :
I->all_uses())
7008 MRI.clearKillFlags(MO.getReg());
7033 MBB.addSuccessor(LoopBB);
7043 for (
auto &Succ : RemainderBB->
successors()) {
7066static std::tuple<unsigned, unsigned>
7074 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7075 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7078 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7079 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7080 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7081 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7082 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7099 .
addImm(AMDGPU::sub0_sub1)
7105 return std::tuple(RsrcPtr, NewSRsrc);
7142 if (
MI.getOpcode() == AMDGPU::PHI) {
7144 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7145 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7148 MRI.getRegClass(
MI.getOperand(i).getReg());
7149 if (RI.hasVectorRegisters(OpRC)) {
7163 VRC = &AMDGPU::VReg_1RegClass;
7166 ? RI.getEquivalentAGPRClass(SRC)
7167 : RI.getEquivalentVGPRClass(SRC);
7170 ? RI.getEquivalentAGPRClass(VRC)
7171 : RI.getEquivalentVGPRClass(VRC);
7179 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7181 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7197 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7200 if (RI.hasVGPRs(DstRC)) {
7204 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7206 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7224 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7229 if (DstRC != Src0RC) {
7238 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7240 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7246 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7247 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7248 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7249 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7250 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7251 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7252 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7254 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7267 ? AMDGPU::OpName::rsrc
7268 : AMDGPU::OpName::srsrc;
7270 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7273 AMDGPU::OpName SampOpName =
7274 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7276 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7283 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7285 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7289 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7290 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7295 while (Start->getOpcode() != FrameSetupOpcode)
7298 while (End->getOpcode() != FrameDestroyOpcode)
7302 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7303 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7311 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7313 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7315 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7325 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7326 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7327 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7328 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7330 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7337 bool isSoffsetLegal =
true;
7339 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7340 if (SoffsetIdx != -1) {
7343 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7344 isSoffsetLegal =
false;
7348 bool isRsrcLegal =
true;
7350 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7351 if (RsrcIdx != -1) {
7354 isRsrcLegal =
false;
7358 if (isRsrcLegal && isSoffsetLegal)
7382 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7383 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7384 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7386 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7387 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7388 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7390 unsigned RsrcPtr, NewSRsrc;
7397 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7404 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7418 }
else if (!VAddr && ST.hasAddr64()) {
7422 "FIXME: Need to emit flat atomics here");
7424 unsigned RsrcPtr, NewSRsrc;
7427 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7450 MIB.
addImm(CPol->getImm());
7455 MIB.
addImm(TFE->getImm());
7475 MI.removeFromParent();
7480 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7482 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7486 if (!isSoffsetLegal) {
7498 if (!isSoffsetLegal) {
7510 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7511 if (RsrcIdx != -1) {
7512 DeferredList.insert(
MI);
7517 return DeferredList.contains(
MI);
7527 if (!ST.useRealTrue16Insts())
7530 unsigned Opcode =
MI.getOpcode();
7534 OpIdx >=
get(Opcode).getNumOperands() ||
7535 get(Opcode).operands()[
OpIdx].RegClass == -1)
7539 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7543 if (!RI.isVGPRClass(CurrRC))
7546 unsigned RCID =
get(Opcode).operands()[
OpIdx].RegClass;
7548 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7549 Op.setSubReg(AMDGPU::lo16);
7550 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7552 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7553 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7560 Op.setReg(NewDstReg);
7572 while (!Worklist.
empty()) {
7586 "Deferred MachineInstr are not supposed to re-populate worklist");
7604 case AMDGPU::S_ADD_I32:
7605 case AMDGPU::S_SUB_I32: {
7609 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7617 case AMDGPU::S_MUL_U64:
7618 if (ST.hasVectorMulU64()) {
7619 NewOpcode = AMDGPU::V_MUL_U64_e64;
7623 splitScalarSMulU64(Worklist, Inst, MDT);
7627 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7628 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7631 splitScalarSMulPseudo(Worklist, Inst, MDT);
7635 case AMDGPU::S_AND_B64:
7636 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7640 case AMDGPU::S_OR_B64:
7641 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7645 case AMDGPU::S_XOR_B64:
7646 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7650 case AMDGPU::S_NAND_B64:
7651 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7655 case AMDGPU::S_NOR_B64:
7656 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7660 case AMDGPU::S_XNOR_B64:
7661 if (ST.hasDLInsts())
7662 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7664 splitScalar64BitXnor(Worklist, Inst, MDT);
7668 case AMDGPU::S_ANDN2_B64:
7669 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7673 case AMDGPU::S_ORN2_B64:
7674 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7678 case AMDGPU::S_BREV_B64:
7679 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7683 case AMDGPU::S_NOT_B64:
7684 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7688 case AMDGPU::S_BCNT1_I32_B64:
7689 splitScalar64BitBCNT(Worklist, Inst);
7693 case AMDGPU::S_BFE_I64:
7694 splitScalar64BitBFE(Worklist, Inst);
7698 case AMDGPU::S_FLBIT_I32_B64:
7699 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7702 case AMDGPU::S_FF1_I32_B64:
7703 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7707 case AMDGPU::S_LSHL_B32:
7708 if (ST.hasOnlyRevVALUShifts()) {
7709 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7713 case AMDGPU::S_ASHR_I32:
7714 if (ST.hasOnlyRevVALUShifts()) {
7715 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7719 case AMDGPU::S_LSHR_B32:
7720 if (ST.hasOnlyRevVALUShifts()) {
7721 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7725 case AMDGPU::S_LSHL_B64:
7726 if (ST.hasOnlyRevVALUShifts()) {
7728 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7729 : AMDGPU::V_LSHLREV_B64_e64;
7733 case AMDGPU::S_ASHR_I64:
7734 if (ST.hasOnlyRevVALUShifts()) {
7735 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7739 case AMDGPU::S_LSHR_B64:
7740 if (ST.hasOnlyRevVALUShifts()) {
7741 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7746 case AMDGPU::S_ABS_I32:
7747 lowerScalarAbs(Worklist, Inst);
7751 case AMDGPU::S_CBRANCH_SCC0:
7752 case AMDGPU::S_CBRANCH_SCC1: {
7755 bool IsSCC = CondReg == AMDGPU::SCC;
7757 Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
7758 unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7761 .
addReg(IsSCC ? VCC : CondReg);
7765 case AMDGPU::S_BFE_U64:
7766 case AMDGPU::S_BFM_B64:
7769 case AMDGPU::S_PACK_LL_B32_B16:
7770 case AMDGPU::S_PACK_LH_B32_B16:
7771 case AMDGPU::S_PACK_HL_B32_B16:
7772 case AMDGPU::S_PACK_HH_B32_B16:
7773 movePackToVALU(Worklist,
MRI, Inst);
7777 case AMDGPU::S_XNOR_B32:
7778 lowerScalarXnor(Worklist, Inst);
7782 case AMDGPU::S_NAND_B32:
7783 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7787 case AMDGPU::S_NOR_B32:
7788 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7792 case AMDGPU::S_ANDN2_B32:
7793 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7797 case AMDGPU::S_ORN2_B32:
7798 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7806 case AMDGPU::S_ADD_CO_PSEUDO:
7807 case AMDGPU::S_SUB_CO_PSEUDO: {
7808 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7809 ? AMDGPU::V_ADDC_U32_e64
7810 : AMDGPU::V_SUBB_U32_e64;
7811 const auto *CarryRC = RI.getWaveMaskRegClass();
7814 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7815 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7822 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7833 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7837 case AMDGPU::S_UADDO_PSEUDO:
7838 case AMDGPU::S_USUBO_PSEUDO: {
7845 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7846 ? AMDGPU::V_ADD_CO_U32_e64
7847 : AMDGPU::V_SUB_CO_U32_e64;
7849 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7850 Register DestReg =
MRI.createVirtualRegister(NewRC);
7858 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7865 case AMDGPU::S_CSELECT_B32:
7866 case AMDGPU::S_CSELECT_B64:
7867 lowerSelect(Worklist, Inst, MDT);
7870 case AMDGPU::S_CMP_EQ_I32:
7871 case AMDGPU::S_CMP_LG_I32:
7872 case AMDGPU::S_CMP_GT_I32:
7873 case AMDGPU::S_CMP_GE_I32:
7874 case AMDGPU::S_CMP_LT_I32:
7875 case AMDGPU::S_CMP_LE_I32:
7876 case AMDGPU::S_CMP_EQ_U32:
7877 case AMDGPU::S_CMP_LG_U32:
7878 case AMDGPU::S_CMP_GT_U32:
7879 case AMDGPU::S_CMP_GE_U32:
7880 case AMDGPU::S_CMP_LT_U32:
7881 case AMDGPU::S_CMP_LE_U32:
7882 case AMDGPU::S_CMP_EQ_U64:
7883 case AMDGPU::S_CMP_LG_U64:
7884 case AMDGPU::S_CMP_LT_F32:
7885 case AMDGPU::S_CMP_EQ_F32:
7886 case AMDGPU::S_CMP_LE_F32:
7887 case AMDGPU::S_CMP_GT_F32:
7888 case AMDGPU::S_CMP_LG_F32:
7889 case AMDGPU::S_CMP_GE_F32:
7890 case AMDGPU::S_CMP_O_F32:
7891 case AMDGPU::S_CMP_U_F32:
7892 case AMDGPU::S_CMP_NGE_F32:
7893 case AMDGPU::S_CMP_NLG_F32:
7894 case AMDGPU::S_CMP_NGT_F32:
7895 case AMDGPU::S_CMP_NLE_F32:
7896 case AMDGPU::S_CMP_NEQ_F32:
7897 case AMDGPU::S_CMP_NLT_F32: {
7898 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7902 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7916 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7920 case AMDGPU::S_CMP_LT_F16:
7921 case AMDGPU::S_CMP_EQ_F16:
7922 case AMDGPU::S_CMP_LE_F16:
7923 case AMDGPU::S_CMP_GT_F16:
7924 case AMDGPU::S_CMP_LG_F16:
7925 case AMDGPU::S_CMP_GE_F16:
7926 case AMDGPU::S_CMP_O_F16:
7927 case AMDGPU::S_CMP_U_F16:
7928 case AMDGPU::S_CMP_NGE_F16:
7929 case AMDGPU::S_CMP_NLG_F16:
7930 case AMDGPU::S_CMP_NGT_F16:
7931 case AMDGPU::S_CMP_NLE_F16:
7932 case AMDGPU::S_CMP_NEQ_F16:
7933 case AMDGPU::S_CMP_NLT_F16: {
7934 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7956 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7960 case AMDGPU::S_CVT_HI_F32_F16: {
7962 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7963 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7964 if (ST.useRealTrue16Insts()) {
7969 .
addReg(TmpReg, 0, AMDGPU::hi16)
7985 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7989 case AMDGPU::S_MINIMUM_F32:
7990 case AMDGPU::S_MAXIMUM_F32: {
7992 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8003 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8007 case AMDGPU::S_MINIMUM_F16:
8008 case AMDGPU::S_MAXIMUM_F16: {
8010 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8011 ? &AMDGPU::VGPR_16RegClass
8012 : &AMDGPU::VGPR_32RegClass);
8024 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8028 case AMDGPU::V_S_EXP_F16_e64:
8029 case AMDGPU::V_S_LOG_F16_e64:
8030 case AMDGPU::V_S_RCP_F16_e64:
8031 case AMDGPU::V_S_RSQ_F16_e64:
8032 case AMDGPU::V_S_SQRT_F16_e64: {
8034 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8035 ? &AMDGPU::VGPR_16RegClass
8036 : &AMDGPU::VGPR_32RegClass);
8048 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8054 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8062 if (NewOpcode == Opcode) {
8071 if (
MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
8073 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
8077 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8079 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8097 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8099 MRI.replaceRegWith(DstReg, NewDstReg);
8100 MRI.clearKillFlags(NewDstReg);
8114 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8118 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8119 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8120 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8122 get(AMDGPU::IMPLICIT_DEF), Undef);
8124 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8130 MRI.replaceRegWith(DstReg, NewDstReg);
8131 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8133 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8136 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8137 MRI.replaceRegWith(DstReg, NewDstReg);
8138 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8143 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8144 MRI.replaceRegWith(DstReg, NewDstReg);
8146 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8156 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8157 AMDGPU::OpName::src0_modifiers) >= 0)
8161 NewInstr->addOperand(Src);
8164 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8167 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8169 NewInstr.addImm(
Size);
8170 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8174 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8179 "Scalar BFE is only implemented for constant width and offset");
8187 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8188 AMDGPU::OpName::src1_modifiers) >= 0)
8190 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8192 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8193 AMDGPU::OpName::src2_modifiers) >= 0)
8195 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8197 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8199 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8201 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8207 NewInstr->addOperand(
Op);
8214 if (
Op.getReg() == AMDGPU::SCC) {
8216 if (
Op.isDef() && !
Op.isDead())
8217 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8219 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8224 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8225 Register DstReg = NewInstr->getOperand(0).getReg();
8230 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8231 MRI.replaceRegWith(DstReg, NewDstReg);
8240 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8244std::pair<bool, MachineBasicBlock *>
8256 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8259 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8261 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8262 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8270 MRI.replaceRegWith(OldDstReg, ResultReg);
8273 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8274 return std::pair(
true, NewBB);
8277 return std::pair(
false,
nullptr);
8294 bool IsSCC = (CondReg == AMDGPU::SCC);
8302 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8308 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8309 NewCondReg =
MRI.createVirtualRegister(TC);
8313 bool CopyFound =
false;
8314 for (MachineInstr &CandI :
8317 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8319 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8321 .
addReg(CandI.getOperand(1).getReg());
8333 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8341 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8342 MachineInstr *NewInst;
8343 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8344 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8357 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8359 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8371 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8372 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8374 unsigned SubOp = ST.hasAddNoCarry() ?
8375 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8385 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8386 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8400 if (ST.hasDLInsts()) {
8401 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8409 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8410 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8416 bool Src0IsSGPR = Src0.
isReg() &&
8417 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8418 bool Src1IsSGPR = Src1.
isReg() &&
8419 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8421 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8422 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8432 }
else if (Src1IsSGPR) {
8446 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8450 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8456 unsigned Opcode)
const {
8466 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8467 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8479 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8480 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8485 unsigned Opcode)
const {
8495 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8496 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8508 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8509 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8524 const MCInstrDesc &InstDesc =
get(Opcode);
8525 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8527 &AMDGPU::SGPR_32RegClass;
8529 const TargetRegisterClass *Src0SubRC =
8530 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8533 AMDGPU::sub0, Src0SubRC);
8535 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8536 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8537 const TargetRegisterClass *NewDestSubRC =
8538 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8540 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8541 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8544 AMDGPU::sub1, Src0SubRC);
8546 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8547 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8552 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8559 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8561 Worklist.
insert(&LoHalf);
8562 Worklist.
insert(&HiHalf);
8568 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8579 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8580 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8581 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8589 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8590 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8591 const TargetRegisterClass *Src0SubRC =
8592 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8593 if (RI.isSGPRClass(Src0SubRC))
8594 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8595 const TargetRegisterClass *Src1SubRC =
8596 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8597 if (RI.isSGPRClass(Src1SubRC))
8598 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8602 MachineOperand Op0L =
8604 MachineOperand Op1L =
8606 MachineOperand Op0H =
8608 MachineOperand Op1H =
8626 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8627 MachineInstr *Op1L_Op0H =
8632 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8633 MachineInstr *Op1H_Op0L =
8638 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8639 MachineInstr *Carry =
8644 MachineInstr *LoHalf =
8649 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8654 MachineInstr *HiHalf =
8665 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8677 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8688 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8689 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8690 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8698 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8699 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8700 const TargetRegisterClass *Src0SubRC =
8701 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8702 if (RI.isSGPRClass(Src0SubRC))
8703 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8704 const TargetRegisterClass *Src1SubRC =
8705 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8706 if (RI.isSGPRClass(Src1SubRC))
8707 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8711 MachineOperand Op0L =
8713 MachineOperand Op1L =
8717 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8718 ? AMDGPU::V_MUL_HI_U32_e64
8719 : AMDGPU::V_MUL_HI_I32_e64;
8720 MachineInstr *HiHalf =
8723 MachineInstr *LoHalf =
8734 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8742 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8758 const MCInstrDesc &InstDesc =
get(Opcode);
8759 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8761 &AMDGPU::SGPR_32RegClass;
8763 const TargetRegisterClass *Src0SubRC =
8764 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8765 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8767 &AMDGPU::SGPR_32RegClass;
8769 const TargetRegisterClass *Src1SubRC =
8770 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8773 AMDGPU::sub0, Src0SubRC);
8775 AMDGPU::sub0, Src1SubRC);
8777 AMDGPU::sub1, Src0SubRC);
8779 AMDGPU::sub1, Src1SubRC);
8781 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8782 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8783 const TargetRegisterClass *NewDestSubRC =
8784 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8786 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8787 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8791 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8792 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8796 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8803 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8805 Worklist.
insert(&LoHalf);
8806 Worklist.
insert(&HiHalf);
8809 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8825 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8827 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8829 MachineOperand* Op0;
8830 MachineOperand* Op1;
8843 Register NewDest =
MRI.createVirtualRegister(DestRC);
8849 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8865 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
8866 const TargetRegisterClass *SrcRC = Src.isReg() ?
8867 MRI.getRegClass(Src.getReg()) :
8868 &AMDGPU::SGPR_32RegClass;
8870 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8871 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8873 const TargetRegisterClass *SrcSubRC =
8874 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8877 AMDGPU::sub0, SrcSubRC);
8879 AMDGPU::sub1, SrcSubRC);
8885 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8889 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8908 Offset == 0 &&
"Not implemented");
8911 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8912 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8913 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8930 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8931 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8936 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8937 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8941 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8944 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8949 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8950 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8969 const MCInstrDesc &InstDesc =
get(Opcode);
8971 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8972 unsigned OpcodeAdd =
8973 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8975 const TargetRegisterClass *SrcRC =
8976 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8977 const TargetRegisterClass *SrcSubRC =
8978 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8980 MachineOperand SrcRegSub0 =
8982 MachineOperand SrcRegSub1 =
8985 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8986 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8987 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8988 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8995 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9001 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9003 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9005 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9008void SIInstrInfo::addUsersToMoveToVALUWorklist(
9012 MachineInstr &
UseMI = *MO.getParent();
9016 switch (
UseMI.getOpcode()) {
9019 case AMDGPU::SOFT_WQM:
9020 case AMDGPU::STRICT_WWM:
9021 case AMDGPU::STRICT_WQM:
9022 case AMDGPU::REG_SEQUENCE:
9024 case AMDGPU::INSERT_SUBREG:
9027 OpNo = MO.getOperandNo();
9042 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9049 case AMDGPU::S_PACK_LL_B32_B16: {
9050 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9051 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9068 case AMDGPU::S_PACK_LH_B32_B16: {
9069 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9078 case AMDGPU::S_PACK_HL_B32_B16: {
9079 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9089 case AMDGPU::S_PACK_HH_B32_B16: {
9090 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9091 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9108 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9109 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9118 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9119 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9120 SmallVector<MachineInstr *, 4> CopyToDelete;
9123 for (MachineInstr &
MI :
9127 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9130 MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9131 Register DestReg =
MI.getOperand(0).getReg();
9133 MRI.replaceRegWith(DestReg, NewCond);
9138 MI.getOperand(SCCIdx).setReg(NewCond);
9144 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9147 for (
auto &Copy : CopyToDelete)
9148 Copy->eraseFromParent();
9156void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9162 for (MachineInstr &
MI :
9165 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9167 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9176 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9184 case AMDGPU::REG_SEQUENCE:
9185 case AMDGPU::INSERT_SUBREG:
9187 case AMDGPU::SOFT_WQM:
9188 case AMDGPU::STRICT_WWM:
9189 case AMDGPU::STRICT_WQM: {
9191 if (RI.isAGPRClass(SrcRC)) {
9192 if (RI.isAGPRClass(NewDstRC))
9197 case AMDGPU::REG_SEQUENCE:
9198 case AMDGPU::INSERT_SUBREG:
9199 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9202 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9208 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9211 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9225 int OpIndices[3])
const {
9226 const MCInstrDesc &
Desc =
MI.getDesc();
9242 const MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9244 for (
unsigned i = 0; i < 3; ++i) {
9245 int Idx = OpIndices[i];
9249 const MachineOperand &MO =
MI.getOperand(Idx);
9255 const TargetRegisterClass *OpRC =
9256 RI.getRegClass(
Desc.operands()[Idx].RegClass);
9257 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9263 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9264 if (RI.isSGPRClass(RegRC))
9282 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9283 SGPRReg = UsedSGPRs[0];
9286 if (!SGPRReg && UsedSGPRs[1]) {
9287 if (UsedSGPRs[1] == UsedSGPRs[2])
9288 SGPRReg = UsedSGPRs[1];
9295 AMDGPU::OpName OperandName)
const {
9296 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9299 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9303 return &
MI.getOperand(Idx);
9317 if (ST.isAmdHsaOS()) {
9320 RsrcDataFormat |= (1ULL << 56);
9325 RsrcDataFormat |= (2ULL << 59);
9328 return RsrcDataFormat;
9338 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9343 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9350 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9356 unsigned Opc =
MI.getOpcode();
9362 return get(
Opc).mayLoad() &&
9367 int &FrameIndex)
const {
9369 if (!Addr || !Addr->
isFI())
9380 int &FrameIndex)
const {
9388 int &FrameIndex)
const {
9402 int &FrameIndex)
const {
9419 while (++
I != E &&
I->isInsideBundle()) {
9420 assert(!
I->isBundle() &&
"No nested bundle!");
9428 unsigned Opc =
MI.getOpcode();
9430 unsigned DescSize =
Desc.getSize();
9435 unsigned Size = DescSize;
9439 if (
MI.isBranch() && ST.hasOffset3fBug())
9450 bool HasLiteral =
false;
9451 unsigned LiteralSize = 4;
9452 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9457 if (ST.has64BitLiterals()) {
9458 switch (OpInfo.OperandType) {
9474 return HasLiteral ? DescSize + LiteralSize : DescSize;
9479 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9483 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9484 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9488 case TargetOpcode::BUNDLE:
9490 case TargetOpcode::INLINEASM:
9491 case TargetOpcode::INLINEASM_BR: {
9493 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9497 if (
MI.isMetaInstruction())
9501 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9504 unsigned LoInstOpcode = D16Info->LoOp;
9506 DescSize =
Desc.getSize();
9517 if (
MI.memoperands_empty())
9529 static const std::pair<int, const char *> TargetIndices[] = {
9567std::pair<unsigned, unsigned>
9574 static const std::pair<unsigned, const char *> TargetFlags[] = {
9592 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9607 return AMDGPU::WWM_COPY;
9609 return AMDGPU::COPY;
9621 bool IsNullOrVectorRegister =
true;
9624 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9629 return IsNullOrVectorRegister &&
9631 (Opcode == AMDGPU::IMPLICIT_DEF &&
9633 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9634 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9642 if (ST.hasAddNoCarry())
9646 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9647 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9658 if (ST.hasAddNoCarry())
9665 *RI.getBoolRC(),
I,
false,
9678 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9679 case AMDGPU::SI_KILL_I1_TERMINATOR:
9688 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9689 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9690 case AMDGPU::SI_KILL_I1_PSEUDO:
9691 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9703 const unsigned OffsetBits =
9705 return (1 << OffsetBits) - 1;
9712 if (
MI.isInlineAsm())
9715 for (
auto &
Op :
MI.implicit_operands()) {
9716 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9717 Op.setReg(AMDGPU::VCC_LO);
9726 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9730 const auto RCID =
MI.getDesc().operands()[Idx].RegClass;
9731 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9748 if (Imm <= MaxImm + 64) {
9750 Overflow = Imm - MaxImm;
9777 if (ST.hasRestrictedSOffset())
9820 if (!ST.hasFlatInstOffsets())
9828 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9840std::pair<int64_t, int64_t>
9843 int64_t RemainderOffset = COffsetVal;
9844 int64_t ImmField = 0;
9849 if (AllowNegative) {
9851 int64_t
D = 1LL << NumBits;
9852 RemainderOffset = (COffsetVal /
D) *
D;
9853 ImmField = COffsetVal - RemainderOffset;
9855 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9857 (ImmField % 4) != 0) {
9859 RemainderOffset += ImmField % 4;
9860 ImmField -= ImmField % 4;
9862 }
else if (COffsetVal >= 0) {
9864 RemainderOffset = COffsetVal - ImmField;
9868 assert(RemainderOffset + ImmField == COffsetVal);
9869 return {ImmField, RemainderOffset};
9873 if (ST.hasNegativeScratchOffsetBug() &&
9881 switch (ST.getGeneration()) {
9907 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9908 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9909 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9910 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9911 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9912 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9913 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9914 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9921#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9922 case OPCODE##_dpp: \
9923 case OPCODE##_e32: \
9924 case OPCODE##_e64: \
9925 case OPCODE##_e64_dpp: \
9940 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9941 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9942 case AMDGPU::V_FMA_F16_gfx9_e64:
9943 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9944 case AMDGPU::V_INTERP_P2_F16:
9945 case AMDGPU::V_MAD_F16_e64:
9946 case AMDGPU::V_MAD_U16_e64:
9947 case AMDGPU::V_MAD_I16_e64:
9969 switch (ST.getGeneration()) {
9982 if (
isMAI(Opcode)) {
9990 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
9997 if (ST.hasGFX90AInsts()) {
9999 if (ST.hasGFX940Insts())
10030 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10031 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10032 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10044 switch (
MI.getOpcode()) {
10046 case AMDGPU::REG_SEQUENCE:
10050 case AMDGPU::INSERT_SUBREG:
10051 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10068 if (!
P.Reg.isVirtual())
10072 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10073 while (
auto *
MI = DefInst) {
10075 switch (
MI->getOpcode()) {
10077 case AMDGPU::V_MOV_B32_e32: {
10078 auto &Op1 =
MI->getOperand(1);
10083 DefInst =
MRI.getVRegDef(RSR.Reg);
10091 DefInst =
MRI.getVRegDef(RSR.Reg);
10104 assert(
MRI.isSSA() &&
"Must be run on SSA");
10106 auto *
TRI =
MRI.getTargetRegisterInfo();
10107 auto *DefBB =
DefMI.getParent();
10111 if (
UseMI.getParent() != DefBB)
10114 const int MaxInstScan = 20;
10118 auto E =
UseMI.getIterator();
10119 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10120 if (
I->isDebugInstr())
10123 if (++NumInst > MaxInstScan)
10126 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10136 assert(
MRI.isSSA() &&
"Must be run on SSA");
10138 auto *
TRI =
MRI.getTargetRegisterInfo();
10139 auto *DefBB =
DefMI.getParent();
10141 const int MaxUseScan = 10;
10144 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10145 auto &UseInst = *
Use.getParent();
10148 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10151 if (++NumUse > MaxUseScan)
10158 const int MaxInstScan = 20;
10162 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10165 if (
I->isDebugInstr())
10168 if (++NumInst > MaxInstScan)
10181 if (Reg == VReg && --NumUse == 0)
10183 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10192 auto Cur =
MBB.begin();
10193 if (Cur !=
MBB.end())
10195 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10198 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10207 if (InsPt !=
MBB.end() &&
10208 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10209 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10210 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10211 InsPt->definesRegister(Src,
nullptr)) {
10214 get(ST.isWave32() ? AMDGPU::S_MOV_B32_term
10215 : AMDGPU::S_MOV_B64_term),
10217 .
addReg(Src, 0, SrcSubReg)
10242 if (isFullCopyInstr(
MI)) {
10243 Register DstReg =
MI.getOperand(0).getReg();
10244 Register SrcReg =
MI.getOperand(1).getReg();
10251 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10255 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10266 unsigned *PredCost)
const {
10267 if (
MI.isBundle()) {
10270 unsigned Lat = 0,
Count = 0;
10271 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10273 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10275 return Lat +
Count - 1;
10278 return SchedModel.computeInstrLatency(&
MI);
10284 unsigned opcode =
MI.getOpcode();
10289 :
MI.getOperand(1).getReg();
10290 LLT DstTy =
MRI.getType(Dst);
10291 LLT SrcTy =
MRI.getType(Src);
10293 unsigned SrcAS = SrcTy.getAddressSpace();
10296 ST.hasGloballyAddressableScratch()
10304 if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
10305 return HandleAddrSpaceCast(
MI);
10308 auto IID = GI->getIntrinsicID();
10315 case Intrinsic::amdgcn_addrspacecast_nonnull:
10316 return HandleAddrSpaceCast(
MI);
10317 case Intrinsic::amdgcn_if:
10318 case Intrinsic::amdgcn_else:
10332 if (opcode == AMDGPU::G_LOAD) {
10333 if (
MI.memoperands_empty())
10337 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10338 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10346 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
10347 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10348 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10361 unsigned opcode =
MI.getOpcode();
10362 if (opcode == AMDGPU::V_READLANE_B32 ||
10363 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10364 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10367 if (isCopyInstr(
MI)) {
10371 RI.getPhysRegBaseClass(srcOp.
getReg());
10379 if (
MI.isPreISelOpcode())
10394 if (
MI.memoperands_empty())
10398 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10399 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10414 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10416 if (!
SrcOp.isReg())
10420 if (!Reg || !
SrcOp.readsReg())
10426 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10453 F,
"ds_ordered_count unsupported for this calling conv"));
10467 Register &SrcReg2, int64_t &CmpMask,
10468 int64_t &CmpValue)
const {
10469 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10472 switch (
MI.getOpcode()) {
10475 case AMDGPU::S_CMP_EQ_U32:
10476 case AMDGPU::S_CMP_EQ_I32:
10477 case AMDGPU::S_CMP_LG_U32:
10478 case AMDGPU::S_CMP_LG_I32:
10479 case AMDGPU::S_CMP_LT_U32:
10480 case AMDGPU::S_CMP_LT_I32:
10481 case AMDGPU::S_CMP_GT_U32:
10482 case AMDGPU::S_CMP_GT_I32:
10483 case AMDGPU::S_CMP_LE_U32:
10484 case AMDGPU::S_CMP_LE_I32:
10485 case AMDGPU::S_CMP_GE_U32:
10486 case AMDGPU::S_CMP_GE_I32:
10487 case AMDGPU::S_CMP_EQ_U64:
10488 case AMDGPU::S_CMP_LG_U64:
10489 SrcReg =
MI.getOperand(0).getReg();
10490 if (
MI.getOperand(1).isReg()) {
10491 if (
MI.getOperand(1).getSubReg())
10493 SrcReg2 =
MI.getOperand(1).getReg();
10495 }
else if (
MI.getOperand(1).isImm()) {
10497 CmpValue =
MI.getOperand(1).getImm();
10503 case AMDGPU::S_CMPK_EQ_U32:
10504 case AMDGPU::S_CMPK_EQ_I32:
10505 case AMDGPU::S_CMPK_LG_U32:
10506 case AMDGPU::S_CMPK_LG_I32:
10507 case AMDGPU::S_CMPK_LT_U32:
10508 case AMDGPU::S_CMPK_LT_I32:
10509 case AMDGPU::S_CMPK_GT_U32:
10510 case AMDGPU::S_CMPK_GT_I32:
10511 case AMDGPU::S_CMPK_LE_U32:
10512 case AMDGPU::S_CMPK_LE_I32:
10513 case AMDGPU::S_CMPK_GE_U32:
10514 case AMDGPU::S_CMPK_GE_I32:
10515 SrcReg =
MI.getOperand(0).getReg();
10517 CmpValue =
MI.getOperand(1).getImm();
10526 Register SrcReg2, int64_t CmpMask,
10535 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10536 this](int64_t ExpectedValue,
unsigned SrcSize,
10537 bool IsReversible,
bool IsSigned) ->
bool {
10562 if (!Def || Def->getParent() != CmpInstr.
getParent())
10565 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10566 Def->getOpcode() != AMDGPU::S_AND_B64)
10570 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10581 SrcOp = &Def->getOperand(2);
10582 else if (isMask(&Def->getOperand(2)))
10583 SrcOp = &Def->getOperand(1);
10591 if (IsSigned && BitNo == SrcSize - 1)
10594 ExpectedValue <<= BitNo;
10596 bool IsReversedCC =
false;
10597 if (CmpValue != ExpectedValue) {
10600 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10605 Register DefReg = Def->getOperand(0).getReg();
10606 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10609 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10611 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10612 I->killsRegister(AMDGPU::SCC, &RI))
10617 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10621 if (!
MRI->use_nodbg_empty(DefReg)) {
10629 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10630 : AMDGPU::S_BITCMP1_B32
10631 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10632 : AMDGPU::S_BITCMP1_B64;
10637 Def->eraseFromParent();
10645 case AMDGPU::S_CMP_EQ_U32:
10646 case AMDGPU::S_CMP_EQ_I32:
10647 case AMDGPU::S_CMPK_EQ_U32:
10648 case AMDGPU::S_CMPK_EQ_I32:
10649 return optimizeCmpAnd(1, 32,
true,
false);
10650 case AMDGPU::S_CMP_GE_U32:
10651 case AMDGPU::S_CMPK_GE_U32:
10652 return optimizeCmpAnd(1, 32,
false,
false);
10653 case AMDGPU::S_CMP_GE_I32:
10654 case AMDGPU::S_CMPK_GE_I32:
10655 return optimizeCmpAnd(1, 32,
false,
true);
10656 case AMDGPU::S_CMP_EQ_U64:
10657 return optimizeCmpAnd(1, 64,
true,
false);
10658 case AMDGPU::S_CMP_LG_U32:
10659 case AMDGPU::S_CMP_LG_I32:
10660 case AMDGPU::S_CMPK_LG_U32:
10661 case AMDGPU::S_CMPK_LG_I32:
10662 return optimizeCmpAnd(0, 32,
true,
false);
10663 case AMDGPU::S_CMP_GT_U32:
10664 case AMDGPU::S_CMPK_GT_U32:
10665 return optimizeCmpAnd(0, 32,
false,
false);
10666 case AMDGPU::S_CMP_GT_I32:
10667 case AMDGPU::S_CMPK_GT_I32:
10668 return optimizeCmpAnd(0, 32,
false,
true);
10669 case AMDGPU::S_CMP_LG_U64:
10670 return optimizeCmpAnd(0, 64,
true,
false);
10677 AMDGPU::OpName
OpName)
const {
10678 if (!ST.needsAlignedVGPRs())
10681 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10693 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10695 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10698 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10699 : &AMDGPU::VReg_64_Align2RegClass);
10701 .
addReg(DataReg, 0,
Op.getSubReg())
10706 Op.setSubReg(AMDGPU::sub0);
10728 unsigned Opcode =
MI.getOpcode();
10734 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10735 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
10738 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.