33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
76 unsigned N =
Node->getNumOperands();
77 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
90 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
152bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
155 if (
MI.isCompare()) {
166 switch (
Use.getOpcode()) {
167 case AMDGPU::S_AND_SAVEEXEC_B32:
168 case AMDGPU::S_AND_SAVEEXEC_B64:
170 case AMDGPU::S_AND_B32:
171 case AMDGPU::S_AND_B64:
172 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
182 switch (
MI.getOpcode()) {
185 case AMDGPU::V_READFIRSTLANE_B32:
202 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
207 for (
auto Op :
MI.uses()) {
208 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
214 if (FromCycle ==
nullptr)
220 while (FromCycle && !FromCycle->
contains(ToCycle)) {
240 int64_t &Offset1)
const {
248 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
252 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
268 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
269 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
270 if (Offset0Idx == -1 || Offset1Idx == -1)
277 Offset0Idx -=
get(Opc0).NumDefs;
278 Offset1Idx -=
get(Opc1).NumDefs;
308 if (!Load0Offset || !Load1Offset)
325 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
326 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
328 if (OffIdx0 == -1 || OffIdx1 == -1)
334 OffIdx0 -=
get(Opc0).NumDefs;
335 OffIdx1 -=
get(Opc1).NumDefs;
354 case AMDGPU::DS_READ2ST64_B32:
355 case AMDGPU::DS_READ2ST64_B64:
356 case AMDGPU::DS_WRITE2ST64_B32:
357 case AMDGPU::DS_WRITE2ST64_B64:
372 OffsetIsScalable =
false;
389 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
391 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
392 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
405 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
406 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
407 if (Offset0 + 1 != Offset1)
418 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
426 Offset = EltSize * Offset0;
428 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
429 if (DataOpIdx == -1) {
430 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
448 if (BaseOp && !BaseOp->
isFI())
456 if (SOffset->
isReg())
462 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
464 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
473 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
474 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
476 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
477 if (VAddr0Idx >= 0) {
479 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
486 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
501 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
518 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
520 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
537 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
545 if (MO1->getAddrSpace() != MO2->getAddrSpace())
548 const auto *Base1 = MO1->getValue();
549 const auto *Base2 = MO2->getValue();
550 if (!Base1 || !Base2)
558 return Base1 == Base2;
562 int64_t Offset1,
bool OffsetIsScalable1,
564 int64_t Offset2,
bool OffsetIsScalable2,
565 unsigned ClusterSize,
566 unsigned NumBytes)
const {
579 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
598 const unsigned LoadSize = NumBytes / ClusterSize;
599 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
600 return NumDWords <= MaxMemoryClusterDWords;
614 int64_t Offset0, int64_t Offset1,
615 unsigned NumLoads)
const {
616 assert(Offset1 > Offset0 &&
617 "Second offset should be larger than first offset!");
622 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
629 const char *Msg =
"illegal VGPR to SGPR copy") {
650 assert((
TII.getSubtarget().hasMAIInsts() &&
651 !
TII.getSubtarget().hasGFX90AInsts()) &&
652 "Expected GFX908 subtarget.");
655 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
656 "Source register of the copy should be either an SGPR or an AGPR.");
659 "Destination register of the copy should be an AGPR.");
668 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
671 if (!Def->modifiesRegister(SrcReg, &RI))
674 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
675 Def->getOperand(0).getReg() != SrcReg)
682 bool SafeToPropagate =
true;
685 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
686 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
687 SafeToPropagate =
false;
689 if (!SafeToPropagate)
692 for (
auto I = Def;
I !=
MI; ++
I)
693 I->clearRegisterKills(DefOp.
getReg(), &RI);
702 if (ImpUseSuperReg) {
703 Builder.addReg(ImpUseSuperReg,
721 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
724 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
725 "VGPR used for an intermediate copy should have been reserved.");
740 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
741 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
742 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
749 if (ImpUseSuperReg) {
750 UseBuilder.
addReg(ImpUseSuperReg,
771 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
772 int16_t SubIdx = BaseIndices[Idx];
773 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
774 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
775 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
776 unsigned Opcode = AMDGPU::S_MOV_B32;
779 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
780 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
781 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
785 DestSubReg = RI.getSubReg(DestReg, SubIdx);
786 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
787 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
788 Opcode = AMDGPU::S_MOV_B64;
803 assert(FirstMI && LastMI);
811 LastMI->addRegisterKilled(SrcReg, &RI);
817 Register SrcReg,
bool KillSrc,
bool RenamableDest,
818 bool RenamableSrc)
const {
820 unsigned Size = RI.getRegSizeInBits(*RC);
822 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (((
Size == 16) != (SrcSize == 16))) {
830 assert(ST.useRealTrue16Insts());
835 if (DestReg == SrcReg) {
841 RC = RI.getPhysRegBaseClass(DestReg);
842 Size = RI.getRegSizeInBits(*RC);
843 SrcRC = RI.getPhysRegBaseClass(SrcReg);
844 SrcSize = RI.getRegSizeInBits(*SrcRC);
848 if (RC == &AMDGPU::VGPR_32RegClass) {
850 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
851 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
852 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
853 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
859 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
860 RC == &AMDGPU::SReg_32RegClass) {
861 if (SrcReg == AMDGPU::SCC) {
868 if (DestReg == AMDGPU::VCC_LO) {
869 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
883 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
893 if (RC == &AMDGPU::SReg_64RegClass) {
894 if (SrcReg == AMDGPU::SCC) {
901 if (DestReg == AMDGPU::VCC) {
902 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
916 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
926 if (DestReg == AMDGPU::SCC) {
929 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
933 assert(ST.hasScalarCompareEq64());
947 if (RC == &AMDGPU::AGPR_32RegClass) {
948 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
949 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
955 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
964 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
971 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
972 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
974 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
975 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
976 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
977 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
980 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
981 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
994 if (IsAGPRDst || IsAGPRSrc) {
995 if (!DstLow || !SrcLow) {
997 "Cannot use hi16 subreg with an AGPR!");
1004 if (ST.useRealTrue16Insts()) {
1010 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1011 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1023 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1024 if (!DstLow || !SrcLow) {
1026 "Cannot use hi16 subreg on VI!");
1049 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1050 if (ST.hasMovB64()) {
1055 if (ST.hasPkMovB32()) {
1071 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1072 if (RI.isSGPRClass(RC)) {
1073 if (!RI.isSGPRClass(SrcRC)) {
1077 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1083 unsigned EltSize = 4;
1084 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1085 if (RI.isAGPRClass(RC)) {
1086 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1087 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1088 else if (RI.hasVGPRs(SrcRC) ||
1089 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1090 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1092 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1093 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1094 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1095 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1096 (RI.isProperlyAlignedRC(*RC) &&
1097 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1099 if (ST.hasMovB64()) {
1100 Opcode = AMDGPU::V_MOV_B64_e32;
1102 }
else if (ST.hasPkMovB32()) {
1103 Opcode = AMDGPU::V_PK_MOV_B32;
1113 std::unique_ptr<RegScavenger> RS;
1114 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1115 RS = std::make_unique<RegScavenger>();
1121 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1122 const bool CanKillSuperReg = KillSrc && !Overlap;
1124 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1127 SubIdx = SubIndices[Idx];
1129 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1130 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1131 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1132 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1134 bool IsFirstSubreg = Idx == 0;
1135 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1137 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1141 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1142 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1188 return &AMDGPU::VGPR_32RegClass;
1200 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1201 "Not a VGPR32 reg");
1203 if (
Cond.size() == 1) {
1204 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1213 }
else if (
Cond.size() == 2) {
1214 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1216 case SIInstrInfo::SCC_TRUE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::SCC_FALSE: {
1228 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1238 case SIInstrInfo::VCCNZ: {
1241 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1252 case SIInstrInfo::VCCZ: {
1255 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1266 case SIInstrInfo::EXECNZ: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1268 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1279 case SIInstrInfo::EXECZ: {
1280 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1281 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1306 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1319 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1329 int64_t &ImmVal)
const {
1330 switch (
MI.getOpcode()) {
1331 case AMDGPU::V_MOV_B32_e32:
1332 case AMDGPU::S_MOV_B32:
1333 case AMDGPU::S_MOVK_I32:
1334 case AMDGPU::S_MOV_B64:
1335 case AMDGPU::V_MOV_B64_e32:
1336 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1337 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1338 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1339 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1340 case AMDGPU::V_MOV_B64_PSEUDO: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_BREV_B32:
1350 case AMDGPU::V_BFREV_B32_e32:
1351 case AMDGPU::V_BFREV_B32_e64: {
1355 return MI.getOperand(0).getReg() == Reg;
1360 case AMDGPU::S_NOT_B32:
1361 case AMDGPU::V_NOT_B32_e32:
1362 case AMDGPU::V_NOT_B32_e64: {
1365 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1366 return MI.getOperand(0).getReg() == Reg;
1378 if (RI.isAGPRClass(DstRC))
1379 return AMDGPU::COPY;
1380 if (RI.getRegSizeInBits(*DstRC) == 16) {
1383 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1385 if (RI.getRegSizeInBits(*DstRC) == 32)
1386 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1387 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1388 return AMDGPU::S_MOV_B64;
1389 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1390 return AMDGPU::V_MOV_B64_PSEUDO;
1391 return AMDGPU::COPY;
1396 bool IsIndirectSrc)
const {
1397 if (IsIndirectSrc) {
1399 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1401 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1403 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1405 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1407 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1409 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1411 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1420 if (VecSize <= 1024)
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1427 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1429 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1431 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1433 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1435 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1437 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1439 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1448 if (VecSize <= 1024)
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1456 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1458 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1460 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1462 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1464 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1466 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1468 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1477 if (VecSize <= 1024)
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1485 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1487 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1489 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1491 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1493 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1495 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1497 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1506 if (VecSize <= 1024)
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1529 bool IsSGPR)
const {
1541 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1548 return AMDGPU::SI_SPILL_S32_SAVE;
1550 return AMDGPU::SI_SPILL_S64_SAVE;
1552 return AMDGPU::SI_SPILL_S96_SAVE;
1554 return AMDGPU::SI_SPILL_S128_SAVE;
1556 return AMDGPU::SI_SPILL_S160_SAVE;
1558 return AMDGPU::SI_SPILL_S192_SAVE;
1560 return AMDGPU::SI_SPILL_S224_SAVE;
1562 return AMDGPU::SI_SPILL_S256_SAVE;
1564 return AMDGPU::SI_SPILL_S288_SAVE;
1566 return AMDGPU::SI_SPILL_S320_SAVE;
1568 return AMDGPU::SI_SPILL_S352_SAVE;
1570 return AMDGPU::SI_SPILL_S384_SAVE;
1572 return AMDGPU::SI_SPILL_S512_SAVE;
1574 return AMDGPU::SI_SPILL_S1024_SAVE;
1583 return AMDGPU::SI_SPILL_V16_SAVE;
1585 return AMDGPU::SI_SPILL_V32_SAVE;
1587 return AMDGPU::SI_SPILL_V64_SAVE;
1589 return AMDGPU::SI_SPILL_V96_SAVE;
1591 return AMDGPU::SI_SPILL_V128_SAVE;
1593 return AMDGPU::SI_SPILL_V160_SAVE;
1595 return AMDGPU::SI_SPILL_V192_SAVE;
1597 return AMDGPU::SI_SPILL_V224_SAVE;
1599 return AMDGPU::SI_SPILL_V256_SAVE;
1601 return AMDGPU::SI_SPILL_V288_SAVE;
1603 return AMDGPU::SI_SPILL_V320_SAVE;
1605 return AMDGPU::SI_SPILL_V352_SAVE;
1607 return AMDGPU::SI_SPILL_V384_SAVE;
1609 return AMDGPU::SI_SPILL_V512_SAVE;
1611 return AMDGPU::SI_SPILL_V1024_SAVE;
1620 return AMDGPU::SI_SPILL_AV32_SAVE;
1622 return AMDGPU::SI_SPILL_AV64_SAVE;
1624 return AMDGPU::SI_SPILL_AV96_SAVE;
1626 return AMDGPU::SI_SPILL_AV128_SAVE;
1628 return AMDGPU::SI_SPILL_AV160_SAVE;
1630 return AMDGPU::SI_SPILL_AV192_SAVE;
1632 return AMDGPU::SI_SPILL_AV224_SAVE;
1634 return AMDGPU::SI_SPILL_AV256_SAVE;
1636 return AMDGPU::SI_SPILL_AV288_SAVE;
1638 return AMDGPU::SI_SPILL_AV320_SAVE;
1640 return AMDGPU::SI_SPILL_AV352_SAVE;
1642 return AMDGPU::SI_SPILL_AV384_SAVE;
1644 return AMDGPU::SI_SPILL_AV512_SAVE;
1646 return AMDGPU::SI_SPILL_AV1024_SAVE;
1653 bool IsVectorSuperClass) {
1658 if (IsVectorSuperClass)
1659 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1661 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1667 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1674 if (ST.hasMAIInsts())
1694 FrameInfo.getObjectAlign(FrameIndex));
1695 unsigned SpillSize =
TRI->getSpillSize(*RC);
1698 if (RI.isSGPRClass(RC)) {
1700 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1701 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1702 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1710 if (SrcReg.
isVirtual() && SpillSize == 4) {
1711 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1720 if (RI.spillSGPRToVGPR())
1740 return AMDGPU::SI_SPILL_S32_RESTORE;
1742 return AMDGPU::SI_SPILL_S64_RESTORE;
1744 return AMDGPU::SI_SPILL_S96_RESTORE;
1746 return AMDGPU::SI_SPILL_S128_RESTORE;
1748 return AMDGPU::SI_SPILL_S160_RESTORE;
1750 return AMDGPU::SI_SPILL_S192_RESTORE;
1752 return AMDGPU::SI_SPILL_S224_RESTORE;
1754 return AMDGPU::SI_SPILL_S256_RESTORE;
1756 return AMDGPU::SI_SPILL_S288_RESTORE;
1758 return AMDGPU::SI_SPILL_S320_RESTORE;
1760 return AMDGPU::SI_SPILL_S352_RESTORE;
1762 return AMDGPU::SI_SPILL_S384_RESTORE;
1764 return AMDGPU::SI_SPILL_S512_RESTORE;
1766 return AMDGPU::SI_SPILL_S1024_RESTORE;
1775 return AMDGPU::SI_SPILL_V16_RESTORE;
1777 return AMDGPU::SI_SPILL_V32_RESTORE;
1779 return AMDGPU::SI_SPILL_V64_RESTORE;
1781 return AMDGPU::SI_SPILL_V96_RESTORE;
1783 return AMDGPU::SI_SPILL_V128_RESTORE;
1785 return AMDGPU::SI_SPILL_V160_RESTORE;
1787 return AMDGPU::SI_SPILL_V192_RESTORE;
1789 return AMDGPU::SI_SPILL_V224_RESTORE;
1791 return AMDGPU::SI_SPILL_V256_RESTORE;
1793 return AMDGPU::SI_SPILL_V288_RESTORE;
1795 return AMDGPU::SI_SPILL_V320_RESTORE;
1797 return AMDGPU::SI_SPILL_V352_RESTORE;
1799 return AMDGPU::SI_SPILL_V384_RESTORE;
1801 return AMDGPU::SI_SPILL_V512_RESTORE;
1803 return AMDGPU::SI_SPILL_V1024_RESTORE;
1812 return AMDGPU::SI_SPILL_AV32_RESTORE;
1814 return AMDGPU::SI_SPILL_AV64_RESTORE;
1816 return AMDGPU::SI_SPILL_AV96_RESTORE;
1818 return AMDGPU::SI_SPILL_AV128_RESTORE;
1820 return AMDGPU::SI_SPILL_AV160_RESTORE;
1822 return AMDGPU::SI_SPILL_AV192_RESTORE;
1824 return AMDGPU::SI_SPILL_AV224_RESTORE;
1826 return AMDGPU::SI_SPILL_AV256_RESTORE;
1828 return AMDGPU::SI_SPILL_AV288_RESTORE;
1830 return AMDGPU::SI_SPILL_AV320_RESTORE;
1832 return AMDGPU::SI_SPILL_AV352_RESTORE;
1834 return AMDGPU::SI_SPILL_AV384_RESTORE;
1836 return AMDGPU::SI_SPILL_AV512_RESTORE;
1838 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1845 bool IsVectorSuperClass) {
1850 if (IsVectorSuperClass)
1851 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1853 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1859 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1866 if (ST.hasMAIInsts())
1869 assert(!RI.isAGPRClass(RC));
1884 unsigned SpillSize =
TRI->getSpillSize(*RC);
1891 FrameInfo.getObjectAlign(FrameIndex));
1893 if (RI.isSGPRClass(RC)) {
1895 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1896 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1897 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1902 if (DestReg.
isVirtual() && SpillSize == 4) {
1904 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1907 if (RI.spillSGPRToVGPR())
1933 unsigned Quantity)
const {
1935 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1936 while (Quantity > 0) {
1937 unsigned Arg = std::min(Quantity, MaxSNopCount);
1944 auto *MF =
MBB.getParent();
1947 assert(Info->isEntryFunction());
1949 if (
MBB.succ_empty()) {
1950 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1951 if (HasNoTerminator) {
1952 if (Info->returnsVoid()) {
1966 constexpr unsigned DoorbellIDMask = 0x3ff;
1967 constexpr unsigned ECQueueWaveAbort = 0x400;
1973 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1974 ContBB =
MBB.splitAt(
MI,
false);
1978 MBB.addSuccessor(TrapBB);
1985 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1989 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1992 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1993 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1997 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1998 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1999 .
addUse(DoorbellRegMasked)
2000 .
addImm(ECQueueWaveAbort);
2001 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2002 .
addUse(SetWaveAbortBit);
2005 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2020 switch (
MI.getOpcode()) {
2022 if (
MI.isMetaInstruction())
2027 return MI.getOperand(0).getImm() + 1;
2037 switch (
MI.getOpcode()) {
2039 case AMDGPU::S_MOV_B64_term:
2042 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2045 case AMDGPU::S_MOV_B32_term:
2048 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2051 case AMDGPU::S_XOR_B64_term:
2054 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2057 case AMDGPU::S_XOR_B32_term:
2060 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2062 case AMDGPU::S_OR_B64_term:
2065 MI.setDesc(
get(AMDGPU::S_OR_B64));
2067 case AMDGPU::S_OR_B32_term:
2070 MI.setDesc(
get(AMDGPU::S_OR_B32));
2073 case AMDGPU::S_ANDN2_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2079 case AMDGPU::S_ANDN2_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2085 case AMDGPU::S_AND_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_AND_B64));
2091 case AMDGPU::S_AND_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_AND_B32));
2097 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2100 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2103 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2106 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2109 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2110 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2113 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2114 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2115 MI.getMF()->getRegInfo().constrainRegClass(
MI.getOperand(0).getReg(),
2116 &AMDGPU::SReg_32_XM0RegClass);
2118 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2122 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2125 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2128 int64_t Imm =
MI.getOperand(1).getImm();
2130 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2131 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2138 MI.eraseFromParent();
2144 case AMDGPU::V_MOV_B64_PSEUDO: {
2146 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2147 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2152 if (ST.hasMovB64()) {
2153 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2158 if (
SrcOp.isImm()) {
2160 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2161 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2183 if (ST.hasPkMovB32() &&
2204 MI.eraseFromParent();
2207 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2211 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2215 if (ST.has64BitLiterals()) {
2216 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2222 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2227 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2228 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2230 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2231 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2238 MI.eraseFromParent();
2241 case AMDGPU::V_SET_INACTIVE_B32: {
2245 .
add(
MI.getOperand(3))
2246 .
add(
MI.getOperand(4))
2247 .
add(
MI.getOperand(1))
2248 .
add(
MI.getOperand(2))
2249 .
add(
MI.getOperand(5));
2250 MI.eraseFromParent();
2253 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2254 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2255 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2256 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2257 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2258 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2259 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2260 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2264 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2268 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2269 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2270 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2271 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2272 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2273 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2274 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2285 if (RI.hasVGPRs(EltRC)) {
2286 Opc = AMDGPU::V_MOVRELD_B32_e32;
2288 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2289 : AMDGPU::S_MOVRELD_B32;
2294 bool IsUndef =
MI.getOperand(1).isUndef();
2295 unsigned SubReg =
MI.getOperand(3).getImm();
2296 assert(VecReg ==
MI.getOperand(1).getReg());
2301 .
add(
MI.getOperand(2))
2305 const int ImpDefIdx =
2307 const int ImpUseIdx = ImpDefIdx + 1;
2309 MI.eraseFromParent();
2312 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2321 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2322 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2323 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2324 assert(ST.useVGPRIndexMode());
2326 bool IsUndef =
MI.getOperand(1).isUndef();
2335 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2339 .
add(
MI.getOperand(2))
2344 const int ImpDefIdx =
2346 const int ImpUseIdx = ImpDefIdx + 1;
2353 MI.eraseFromParent();
2356 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2357 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2358 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2359 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2360 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2361 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2362 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2363 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2364 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2365 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2366 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2367 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2368 assert(ST.useVGPRIndexMode());
2371 bool IsUndef =
MI.getOperand(1).isUndef();
2389 MI.eraseFromParent();
2392 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2395 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2396 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2415 if (ST.hasGetPCZeroExtension()) {
2419 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2426 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2436 MI.eraseFromParent();
2439 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2449 Op.setOffset(
Op.getOffset() + 4);
2451 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2455 MI.eraseFromParent();
2458 case AMDGPU::ENTER_STRICT_WWM: {
2464 case AMDGPU::ENTER_STRICT_WQM: {
2471 MI.eraseFromParent();
2474 case AMDGPU::EXIT_STRICT_WWM:
2475 case AMDGPU::EXIT_STRICT_WQM: {
2481 case AMDGPU::SI_RETURN: {
2495 MI.eraseFromParent();
2499 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2500 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2501 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2504 case AMDGPU::S_GETPC_B64_pseudo:
2505 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2506 if (ST.hasGetPCZeroExtension()) {
2508 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2517 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2518 assert(ST.hasBF16PackedInsts());
2519 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2543 case AMDGPU::S_LOAD_DWORDX16_IMM:
2544 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2557 for (
auto &CandMO :
I->operands()) {
2558 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2566 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2570 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2574 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2576 unsigned NewOpcode = -1;
2577 if (SubregSize == 256)
2578 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2579 else if (SubregSize == 128)
2580 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2587 MRI.setRegClass(DestReg, NewRC);
2590 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2595 MI->getOperand(0).setReg(DestReg);
2596 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2600 OffsetMO->
setImm(FinalOffset);
2606 MI->setMemRefs(*MF, NewMMOs);
2619std::pair<MachineInstr*, MachineInstr*>
2621 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2623 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2626 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2627 return std::pair(&
MI,
nullptr);
2638 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2640 if (Dst.isPhysical()) {
2641 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2644 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2648 for (
unsigned I = 1;
I <= 2; ++
I) {
2651 if (
SrcOp.isImm()) {
2653 Imm.ashrInPlace(Part * 32);
2654 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2658 if (Src.isPhysical())
2659 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2666 MovDPP.addImm(MO.getImm());
2668 Split[Part] = MovDPP;
2672 if (Dst.isVirtual())
2679 MI.eraseFromParent();
2680 return std::pair(Split[0], Split[1]);
2683std::optional<DestSourcePair>
2685 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2688 return std::nullopt;
2692 AMDGPU::OpName Src0OpName,
2694 AMDGPU::OpName Src1OpName)
const {
2701 "All commutable instructions have both src0 and src1 modifiers");
2703 int Src0ModsVal = Src0Mods->
getImm();
2704 int Src1ModsVal = Src1Mods->
getImm();
2706 Src1Mods->
setImm(Src0ModsVal);
2707 Src0Mods->
setImm(Src1ModsVal);
2716 bool IsKill = RegOp.
isKill();
2718 bool IsUndef = RegOp.
isUndef();
2719 bool IsDebug = RegOp.
isDebug();
2721 if (NonRegOp.
isImm())
2723 else if (NonRegOp.
isFI())
2744 int64_t NonRegVal = NonRegOp1.
getImm();
2747 NonRegOp2.
setImm(NonRegVal);
2754 unsigned OpIdx1)
const {
2759 unsigned Opc =
MI.getOpcode();
2760 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2770 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2773 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2778 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2784 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2799 unsigned Src1Idx)
const {
2800 assert(!NewMI &&
"this should never be used");
2802 unsigned Opc =
MI.getOpcode();
2804 if (CommutedOpcode == -1)
2807 if (Src0Idx > Src1Idx)
2810 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2811 static_cast<int>(Src0Idx) &&
2812 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2813 static_cast<int>(Src1Idx) &&
2814 "inconsistency with findCommutedOpIndices");
2839 Src1, AMDGPU::OpName::src1_modifiers);
2842 AMDGPU::OpName::src1_sel);
2854 unsigned &SrcOpIdx0,
2855 unsigned &SrcOpIdx1)
const {
2860 unsigned &SrcOpIdx0,
2861 unsigned &SrcOpIdx1)
const {
2862 if (!
Desc.isCommutable())
2865 unsigned Opc =
Desc.getOpcode();
2866 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2870 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2874 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2878 int64_t BrOffset)
const {
2895 return MI.getOperand(0).getMBB();
2900 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2901 MI.getOpcode() == AMDGPU::SI_LOOP)
2913 "new block should be inserted for expanding unconditional branch");
2916 "restore block should be inserted for restoring clobbered registers");
2924 if (ST.hasAddPC64Inst()) {
2926 MCCtx.createTempSymbol(
"offset",
true);
2930 MCCtx.createTempSymbol(
"post_addpc",
true);
2931 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2935 Offset->setVariableValue(OffsetExpr);
2939 assert(RS &&
"RegScavenger required for long branching");
2943 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2947 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2948 ST.hasVALUReadSGPRHazard();
2949 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2950 if (FlushSGPRWrites)
2958 ApplyHazardWorkarounds();
2961 MCCtx.createTempSymbol(
"post_getpc",
true);
2965 MCCtx.createTempSymbol(
"offset_lo",
true);
2967 MCCtx.createTempSymbol(
"offset_hi",
true);
2970 .
addReg(PCReg, 0, AMDGPU::sub0)
2974 .
addReg(PCReg, 0, AMDGPU::sub1)
2976 ApplyHazardWorkarounds();
3017 if (LongBranchReservedReg) {
3019 Scav = LongBranchReservedReg;
3028 MRI.replaceRegWith(PCReg, Scav);
3029 MRI.clearVirtRegs();
3035 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3036 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3037 MRI.clearVirtRegs();
3052unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3054 case SIInstrInfo::SCC_TRUE:
3055 return AMDGPU::S_CBRANCH_SCC1;
3056 case SIInstrInfo::SCC_FALSE:
3057 return AMDGPU::S_CBRANCH_SCC0;
3058 case SIInstrInfo::VCCNZ:
3059 return AMDGPU::S_CBRANCH_VCCNZ;
3060 case SIInstrInfo::VCCZ:
3061 return AMDGPU::S_CBRANCH_VCCZ;
3062 case SIInstrInfo::EXECNZ:
3063 return AMDGPU::S_CBRANCH_EXECNZ;
3064 case SIInstrInfo::EXECZ:
3065 return AMDGPU::S_CBRANCH_EXECZ;
3071SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3073 case AMDGPU::S_CBRANCH_SCC0:
3075 case AMDGPU::S_CBRANCH_SCC1:
3077 case AMDGPU::S_CBRANCH_VCCNZ:
3079 case AMDGPU::S_CBRANCH_VCCZ:
3081 case AMDGPU::S_CBRANCH_EXECNZ:
3083 case AMDGPU::S_CBRANCH_EXECZ:
3095 bool AllowModify)
const {
3096 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3098 TBB =
I->getOperand(0).getMBB();
3102 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3103 if (Pred == INVALID_BR)
3108 Cond.push_back(
I->getOperand(1));
3112 if (
I ==
MBB.end()) {
3118 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3120 FBB =
I->getOperand(0).getMBB();
3130 bool AllowModify)
const {
3138 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3139 switch (
I->getOpcode()) {
3140 case AMDGPU::S_MOV_B64_term:
3141 case AMDGPU::S_XOR_B64_term:
3142 case AMDGPU::S_OR_B64_term:
3143 case AMDGPU::S_ANDN2_B64_term:
3144 case AMDGPU::S_AND_B64_term:
3145 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3146 case AMDGPU::S_MOV_B32_term:
3147 case AMDGPU::S_XOR_B32_term:
3148 case AMDGPU::S_OR_B32_term:
3149 case AMDGPU::S_ANDN2_B32_term:
3150 case AMDGPU::S_AND_B32_term:
3151 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3154 case AMDGPU::SI_ELSE:
3155 case AMDGPU::SI_KILL_I1_TERMINATOR:
3156 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3173 int *BytesRemoved)
const {
3175 unsigned RemovedSize = 0;
3178 if (
MI.isBranch() ||
MI.isReturn()) {
3180 MI.eraseFromParent();
3186 *BytesRemoved = RemovedSize;
3203 int *BytesAdded)
const {
3204 if (!FBB &&
Cond.empty()) {
3208 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3215 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3227 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3245 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3252 if (
Cond.size() != 2) {
3256 if (
Cond[0].isImm()) {
3267 Register FalseReg,
int &CondCycles,
3268 int &TrueCycles,
int &FalseCycles)
const {
3274 if (
MRI.getRegClass(FalseReg) != RC)
3278 CondCycles = TrueCycles = FalseCycles = NumInsts;
3281 return RI.hasVGPRs(RC) && NumInsts <= 6;
3289 if (
MRI.getRegClass(FalseReg) != RC)
3295 if (NumInsts % 2 == 0)
3298 CondCycles = TrueCycles = FalseCycles = NumInsts;
3299 return RI.isSGPRClass(RC);
3310 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3311 if (Pred == VCCZ || Pred == SCC_FALSE) {
3312 Pred =
static_cast<BranchPredicate
>(-Pred);
3318 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3320 if (DstSize == 32) {
3322 if (Pred == SCC_TRUE) {
3337 if (DstSize == 64 && Pred == SCC_TRUE) {
3347 static const int16_t Sub0_15[] = {
3348 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3349 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3350 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3351 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3354 static const int16_t Sub0_15_64[] = {
3355 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3356 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3357 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3358 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3361 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3363 const int16_t *SubIndices = Sub0_15;
3364 int NElts = DstSize / 32;
3368 if (Pred == SCC_TRUE) {
3370 SelOp = AMDGPU::S_CSELECT_B32;
3371 EltRC = &AMDGPU::SGPR_32RegClass;
3373 SelOp = AMDGPU::S_CSELECT_B64;
3374 EltRC = &AMDGPU::SGPR_64RegClass;
3375 SubIndices = Sub0_15_64;
3381 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3386 for (
int Idx = 0; Idx != NElts; ++Idx) {
3387 Register DstElt =
MRI.createVirtualRegister(EltRC);
3390 unsigned SubIdx = SubIndices[Idx];
3393 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3396 .
addReg(FalseReg, 0, SubIdx)
3397 .
addReg(TrueReg, 0, SubIdx);
3401 .
addReg(TrueReg, 0, SubIdx)
3402 .
addReg(FalseReg, 0, SubIdx);
3414 switch (
MI.getOpcode()) {
3415 case AMDGPU::V_MOV_B16_t16_e32:
3416 case AMDGPU::V_MOV_B16_t16_e64:
3417 case AMDGPU::V_MOV_B32_e32:
3418 case AMDGPU::V_MOV_B32_e64:
3419 case AMDGPU::V_MOV_B64_PSEUDO:
3420 case AMDGPU::V_MOV_B64_e32:
3421 case AMDGPU::V_MOV_B64_e64:
3422 case AMDGPU::S_MOV_B32:
3423 case AMDGPU::S_MOV_B64:
3424 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3426 case AMDGPU::WWM_COPY:
3427 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3428 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3429 case AMDGPU::V_ACCVGPR_MOV_B32:
3430 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3431 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3439 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3440 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3441 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3444 unsigned Opc =
MI.getOpcode();
3446 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3448 MI.removeOperand(Idx);
3453 unsigned SubRegIndex) {
3454 switch (SubRegIndex) {
3455 case AMDGPU::NoSubRegister:
3465 case AMDGPU::sub1_lo16:
3467 case AMDGPU::sub1_hi16:
3470 return std::nullopt;
3478 case AMDGPU::V_MAC_F16_e32:
3479 case AMDGPU::V_MAC_F16_e64:
3480 case AMDGPU::V_MAD_F16_e64:
3481 return AMDGPU::V_MADAK_F16;
3482 case AMDGPU::V_MAC_F32_e32:
3483 case AMDGPU::V_MAC_F32_e64:
3484 case AMDGPU::V_MAD_F32_e64:
3485 return AMDGPU::V_MADAK_F32;
3486 case AMDGPU::V_FMAC_F32_e32:
3487 case AMDGPU::V_FMAC_F32_e64:
3488 case AMDGPU::V_FMA_F32_e64:
3489 return AMDGPU::V_FMAAK_F32;
3490 case AMDGPU::V_FMAC_F16_e32:
3491 case AMDGPU::V_FMAC_F16_e64:
3492 case AMDGPU::V_FMAC_F16_t16_e64:
3493 case AMDGPU::V_FMAC_F16_fake16_e64:
3494 case AMDGPU::V_FMA_F16_e64:
3495 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3496 ? AMDGPU::V_FMAAK_F16_t16
3497 : AMDGPU::V_FMAAK_F16_fake16
3498 : AMDGPU::V_FMAAK_F16;
3499 case AMDGPU::V_FMAC_F64_e32:
3500 case AMDGPU::V_FMAC_F64_e64:
3501 case AMDGPU::V_FMA_F64_e64:
3502 return AMDGPU::V_FMAAK_F64;
3510 case AMDGPU::V_MAC_F16_e32:
3511 case AMDGPU::V_MAC_F16_e64:
3512 case AMDGPU::V_MAD_F16_e64:
3513 return AMDGPU::V_MADMK_F16;
3514 case AMDGPU::V_MAC_F32_e32:
3515 case AMDGPU::V_MAC_F32_e64:
3516 case AMDGPU::V_MAD_F32_e64:
3517 return AMDGPU::V_MADMK_F32;
3518 case AMDGPU::V_FMAC_F32_e32:
3519 case AMDGPU::V_FMAC_F32_e64:
3520 case AMDGPU::V_FMA_F32_e64:
3521 return AMDGPU::V_FMAMK_F32;
3522 case AMDGPU::V_FMAC_F16_e32:
3523 case AMDGPU::V_FMAC_F16_e64:
3524 case AMDGPU::V_FMAC_F16_t16_e64:
3525 case AMDGPU::V_FMAC_F16_fake16_e64:
3526 case AMDGPU::V_FMA_F16_e64:
3527 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3528 ? AMDGPU::V_FMAMK_F16_t16
3529 : AMDGPU::V_FMAMK_F16_fake16
3530 : AMDGPU::V_FMAMK_F16;
3531 case AMDGPU::V_FMAC_F64_e32:
3532 case AMDGPU::V_FMAC_F64_e64:
3533 case AMDGPU::V_FMA_F64_e64:
3534 return AMDGPU::V_FMAMK_F64;
3546 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3548 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3551 if (
Opc == AMDGPU::COPY) {
3552 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3559 if (HasMultipleUses) {
3562 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3565 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3573 if (ImmDefSize == 32 &&
3578 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3579 RI.getSubRegIdxSize(UseSubReg) == 16;
3582 if (RI.hasVGPRs(DstRC))
3585 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3591 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3598 for (
unsigned MovOp :
3599 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3600 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3608 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3612 if (MovDstPhysReg) {
3616 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3623 if (MovDstPhysReg) {
3624 if (!MovDstRC->
contains(MovDstPhysReg))
3626 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3640 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3648 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3652 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3654 UseMI.getOperand(0).setReg(MovDstPhysReg);
3659 UseMI.setDesc(NewMCID);
3660 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3661 UseMI.addImplicitDefUseOperands(*MF);
3665 if (HasMultipleUses)
3668 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3669 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3670 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3671 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3672 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3673 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3674 Opc == AMDGPU::V_FMAC_F64_e64) {
3683 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3698 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3699 if (!RegSrc->
isReg())
3701 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3702 ST.getConstantBusLimit(
Opc) < 2)
3705 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3717 if (Def && Def->isMoveImmediate() &&
3728 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3729 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3739 unsigned SrcSubReg = RegSrc->
getSubReg();
3744 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3745 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3746 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3747 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3748 UseMI.untieRegOperand(
3749 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3756 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3758 DefMI.eraseFromParent();
3765 if (ST.getConstantBusLimit(
Opc) < 2) {
3768 bool Src0Inlined =
false;
3769 if (Src0->
isReg()) {
3774 if (Def && Def->isMoveImmediate() &&
3779 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3786 if (Src1->
isReg() && !Src0Inlined) {
3789 if (Def && Def->isMoveImmediate() &&
3793 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3806 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3807 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3813 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3814 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3815 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3816 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3817 UseMI.untieRegOperand(
3818 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3820 const std::optional<int64_t> SubRegImm =
3834 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3836 DefMI.eraseFromParent();
3848 if (BaseOps1.
size() != BaseOps2.
size())
3850 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3851 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3859 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3860 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3861 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3863 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3866bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3869 int64_t Offset0, Offset1;
3872 bool Offset0IsScalable, Offset1IsScalable;
3886 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3887 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3894 "MIa must load from or modify a memory location");
3896 "MIb must load from or modify a memory location");
3915 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3922 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3932 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3946 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3957 if (
Reg.isPhysical())
3959 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3961 Imm = Def->getOperand(1).getImm();
3981 unsigned NumOps =
MI.getNumOperands();
3984 if (
Op.isReg() &&
Op.isKill())
3992 case AMDGPU::V_MAC_F16_e32:
3993 case AMDGPU::V_MAC_F16_e64:
3994 return AMDGPU::V_MAD_F16_e64;
3995 case AMDGPU::V_MAC_F32_e32:
3996 case AMDGPU::V_MAC_F32_e64:
3997 return AMDGPU::V_MAD_F32_e64;
3998 case AMDGPU::V_MAC_LEGACY_F32_e32:
3999 case AMDGPU::V_MAC_LEGACY_F32_e64:
4000 return AMDGPU::V_MAD_LEGACY_F32_e64;
4001 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4002 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4003 return AMDGPU::V_FMA_LEGACY_F32_e64;
4004 case AMDGPU::V_FMAC_F16_e32:
4005 case AMDGPU::V_FMAC_F16_e64:
4006 case AMDGPU::V_FMAC_F16_t16_e64:
4007 case AMDGPU::V_FMAC_F16_fake16_e64:
4008 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4009 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4010 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4011 : AMDGPU::V_FMA_F16_gfx9_e64;
4012 case AMDGPU::V_FMAC_F32_e32:
4013 case AMDGPU::V_FMAC_F32_e64:
4014 return AMDGPU::V_FMA_F32_e64;
4015 case AMDGPU::V_FMAC_F64_e32:
4016 case AMDGPU::V_FMAC_F64_e64:
4017 return AMDGPU::V_FMA_F64_e64;
4027 unsigned Opc =
MI.getOpcode();
4031 if (NewMFMAOpc != -1) {
4034 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4035 MIB.
add(
MI.getOperand(
I));
4041 if (Def.isEarlyClobber() && Def.isReg() &&
4046 auto UpdateDefIndex = [&](
LiveRange &LR) {
4047 auto *S = LR.find(OldIndex);
4048 if (S != LR.end() && S->start == OldIndex) {
4049 assert(S->valno && S->valno->def == OldIndex);
4050 S->start = NewIndex;
4051 S->valno->def = NewIndex;
4055 for (
auto &SR : LI.subranges())
4066 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4076 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4077 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4078 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4082 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4083 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4084 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4085 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4086 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4087 bool Src0Literal =
false;
4092 case AMDGPU::V_MAC_F16_e64:
4093 case AMDGPU::V_FMAC_F16_e64:
4094 case AMDGPU::V_FMAC_F16_t16_e64:
4095 case AMDGPU::V_FMAC_F16_fake16_e64:
4096 case AMDGPU::V_MAC_F32_e64:
4097 case AMDGPU::V_MAC_LEGACY_F32_e64:
4098 case AMDGPU::V_FMAC_F32_e64:
4099 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4100 case AMDGPU::V_FMAC_F64_e64:
4102 case AMDGPU::V_MAC_F16_e32:
4103 case AMDGPU::V_FMAC_F16_e32:
4104 case AMDGPU::V_MAC_F32_e32:
4105 case AMDGPU::V_MAC_LEGACY_F32_e32:
4106 case AMDGPU::V_FMAC_F32_e32:
4107 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4108 case AMDGPU::V_FMAC_F64_e32: {
4109 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4110 AMDGPU::OpName::src0);
4137 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4138 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4140 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4141 !RI.isSGPRReg(
MBB.getParent()->getRegInfo(), Src0->
getReg()))) {
4143 const auto killDef = [&]() ->
void {
4148 if (
MRI.hasOneNonDBGUse(DefReg)) {
4150 DefMI->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4151 DefMI->getOperand(0).setIsDead(
true);
4152 for (
unsigned I =
DefMI->getNumOperands() - 1;
I != 0; --
I)
4165 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4167 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4168 MIOp.setIsUndef(
true);
4169 MIOp.setReg(DummyReg);
4218 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4239 if (Src0Literal && !ST.hasVOP3Literal())
4259 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4270 switch (
MI.getOpcode()) {
4271 case AMDGPU::S_SET_GPR_IDX_ON:
4272 case AMDGPU::S_SET_GPR_IDX_MODE:
4273 case AMDGPU::S_SET_GPR_IDX_OFF:
4291 if (
MI.isTerminator() ||
MI.isPosition())
4295 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4298 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4304 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4305 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4306 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4307 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4308 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4313 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4314 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4315 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4323 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4332 if (
MI.memoperands_empty())
4337 unsigned AS = Memop->getAddrSpace();
4338 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4339 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4340 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4341 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4355 unsigned Opcode =
MI.getOpcode();
4370 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4371 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4372 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4375 if (
MI.isCall() ||
MI.isInlineAsm())
4391 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4392 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4393 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4394 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4402 if (
MI.isMetaInstruction())
4406 if (
MI.isCopyLike()) {
4407 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4411 return MI.readsRegister(AMDGPU::EXEC, &RI);
4422 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4426 switch (Imm.getBitWidth()) {
4432 ST.hasInv2PiInlineImm());
4435 ST.hasInv2PiInlineImm());
4437 return ST.has16BitInsts() &&
4439 ST.hasInv2PiInlineImm());
4446 APInt IntImm = Imm.bitcastToAPInt();
4448 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4456 return ST.has16BitInsts() &&
4459 return ST.has16BitInsts() &&
4469 switch (OperandType) {
4479 int32_t Trunc =
static_cast<int32_t
>(Imm);
4519 int16_t Trunc =
static_cast<int16_t
>(Imm);
4520 return ST.has16BitInsts() &&
4529 int16_t Trunc =
static_cast<int16_t
>(Imm);
4530 return ST.has16BitInsts() &&
4581 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4587 return ST.hasVOP3Literal();
4591 int64_t ImmVal)
const {
4594 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4595 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4596 AMDGPU::OpName::src2))
4598 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4610 "unexpected imm-like operand kind");
4623 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4641 AMDGPU::OpName
OpName)
const {
4643 return Mods && Mods->
getImm();
4656 switch (
MI.getOpcode()) {
4657 default:
return false;
4659 case AMDGPU::V_ADDC_U32_e64:
4660 case AMDGPU::V_SUBB_U32_e64:
4661 case AMDGPU::V_SUBBREV_U32_e64: {
4669 case AMDGPU::V_MAC_F16_e64:
4670 case AMDGPU::V_MAC_F32_e64:
4671 case AMDGPU::V_MAC_LEGACY_F32_e64:
4672 case AMDGPU::V_FMAC_F16_e64:
4673 case AMDGPU::V_FMAC_F16_t16_e64:
4674 case AMDGPU::V_FMAC_F16_fake16_e64:
4675 case AMDGPU::V_FMAC_F32_e64:
4676 case AMDGPU::V_FMAC_F64_e64:
4677 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4683 case AMDGPU::V_CNDMASK_B32_e64:
4689 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4719 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4728 unsigned Op32)
const {
4742 Inst32.
add(
MI.getOperand(
I));
4746 int Idx =
MI.getNumExplicitDefs();
4748 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4753 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4775 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4783 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4786 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4787 AMDGPU::SReg_64RegClass.contains(Reg);
4793 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4805 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4815 switch (MO.getReg()) {
4817 case AMDGPU::VCC_LO:
4818 case AMDGPU::VCC_HI:
4820 case AMDGPU::FLAT_SCR:
4833 switch (
MI.getOpcode()) {
4834 case AMDGPU::V_READLANE_B32:
4835 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4836 case AMDGPU::V_WRITELANE_B32:
4837 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4844 if (
MI.isPreISelOpcode() ||
4845 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4860 if (
SubReg.getReg().isPhysical())
4863 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4874 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
4875 ErrInfo =
"illegal copy from vector register to SGPR";
4893 if (!
MRI.isSSA() &&
MI.isCopy())
4894 return verifyCopy(
MI,
MRI, ErrInfo);
4896 if (SIInstrInfo::isGenericOpcode(Opcode))
4899 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4900 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4901 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4903 if (Src0Idx == -1) {
4905 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4906 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4907 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4908 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4913 if (!
Desc.isVariadic() &&
4914 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4915 ErrInfo =
"Instruction has wrong number of operands.";
4919 if (
MI.isInlineAsm()) {
4932 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4933 ErrInfo =
"inlineasm operand has incorrect register class.";
4941 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4942 ErrInfo =
"missing memory operand from image instruction.";
4947 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4950 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4951 "all fp values to integers.";
4955 int RegClass =
Desc.operands()[i].RegClass;
4958 switch (OpInfo.OperandType) {
4960 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4961 ErrInfo =
"Illegal immediate value for operand.";
4995 ErrInfo =
"Illegal immediate value for operand.";
5002 ErrInfo =
"Expected inline constant for operand.";
5017 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5018 ErrInfo =
"Expected immediate, but got non-immediate";
5027 if (OpInfo.isGenericType())
5042 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5044 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5046 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5047 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5054 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5055 ErrInfo =
"Subtarget requires even aligned vector registers";
5060 if (RegClass != -1) {
5061 if (Reg.isVirtual())
5066 ErrInfo =
"Operand has incorrect register class.";
5074 if (!ST.hasSDWA()) {
5075 ErrInfo =
"SDWA is not supported on this target";
5079 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5080 AMDGPU::OpName::dst_sel}) {
5084 int64_t Imm = MO->
getImm();
5086 ErrInfo =
"Invalid SDWA selection";
5091 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5093 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5098 if (!ST.hasSDWAScalar()) {
5100 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5101 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5108 "Only reg allowed as operands in SDWA instructions on GFX9+";
5114 if (!ST.hasSDWAOmod()) {
5117 if (OMod !=
nullptr &&
5119 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5124 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5125 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5126 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5127 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5130 unsigned Mods = Src0ModsMO->
getImm();
5133 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5139 if (
isVOPC(BasicOpcode)) {
5140 if (!ST.hasSDWASdst() && DstIdx != -1) {
5143 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5144 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5147 }
else if (!ST.hasSDWAOutModsVOPC()) {
5150 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5151 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5157 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5158 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5165 if (DstUnused && DstUnused->isImm() &&
5168 if (!Dst.isReg() || !Dst.isTied()) {
5169 ErrInfo =
"Dst register should have tied register";
5174 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5177 "Dst register should be tied to implicit use of preserved register";
5181 ErrInfo =
"Dst register should use same physical register as preserved";
5188 if (
isImage(Opcode) && !
MI.mayStore()) {
5200 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5208 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5212 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5213 if (RegCount > DstSize) {
5214 ErrInfo =
"Image instruction returns too many registers for dst "
5223 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5224 unsigned ConstantBusCount = 0;
5225 bool UsesLiteral =
false;
5228 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5232 LiteralVal = &
MI.getOperand(ImmIdx);
5241 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5252 }
else if (!MO.
isFI()) {
5259 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5269 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5270 return !RI.regsOverlap(SGPRUsed, SGPR);
5279 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5280 Opcode != AMDGPU::V_WRITELANE_B32) {
5281 ErrInfo =
"VOP* instruction violates constant bus restriction";
5285 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5286 ErrInfo =
"VOP3 instruction uses literal";
5293 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5294 unsigned SGPRCount = 0;
5297 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5305 if (MO.
getReg() != SGPRUsed)
5310 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5311 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5318 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5319 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5326 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5336 ErrInfo =
"ABS not allowed in VOP3B instructions";
5349 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5356 if (
Desc.isBranch()) {
5358 ErrInfo =
"invalid branch target for SOPK instruction";
5365 ErrInfo =
"invalid immediate for SOPK instruction";
5370 ErrInfo =
"invalid immediate for SOPK instruction";
5377 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5378 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5379 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5380 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5381 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5382 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5384 const unsigned StaticNumOps =
5385 Desc.getNumOperands() +
Desc.implicit_uses().size();
5386 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5391 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5392 ErrInfo =
"missing implicit register operands";
5398 if (!Dst->isUse()) {
5399 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5404 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5405 UseOpIdx != StaticNumOps + 1) {
5406 ErrInfo =
"movrel implicit operands should be tied";
5413 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5415 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5416 ErrInfo =
"src0 should be subreg of implicit vector use";
5424 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5425 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5431 if (
MI.mayStore() &&
5436 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5437 ErrInfo =
"scalar stores must use m0 as offset register";
5443 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5445 if (
Offset->getImm() != 0) {
5446 ErrInfo =
"subtarget does not support offsets in flat instructions";
5451 if (
isDS(
MI) && !ST.hasGDS()) {
5453 if (GDSOp && GDSOp->
getImm() != 0) {
5454 ErrInfo =
"GDS is not supported on this subtarget";
5462 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5463 AMDGPU::OpName::vaddr0);
5464 AMDGPU::OpName RSrcOpName =
5465 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5466 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5474 ErrInfo =
"dim is out of range";
5479 if (ST.hasR128A16()) {
5481 IsA16 = R128A16->
getImm() != 0;
5482 }
else if (ST.hasA16()) {
5484 IsA16 = A16->
getImm() != 0;
5487 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5489 unsigned AddrWords =
5492 unsigned VAddrWords;
5494 VAddrWords = RsrcIdx - VAddr0Idx;
5495 if (ST.hasPartialNSAEncoding() &&
5497 unsigned LastVAddrIdx = RsrcIdx - 1;
5498 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5506 if (VAddrWords != AddrWords) {
5508 <<
" but got " << VAddrWords <<
"\n");
5509 ErrInfo =
"bad vaddr size";
5519 unsigned DC = DppCt->
getImm();
5520 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5521 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5522 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5523 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5524 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5525 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5526 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5527 ErrInfo =
"Invalid dpp_ctrl value";
5530 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5532 ErrInfo =
"Invalid dpp_ctrl value: "
5533 "wavefront shifts are not supported on GFX10+";
5536 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5538 ErrInfo =
"Invalid dpp_ctrl value: "
5539 "broadcasts are not supported on GFX10+";
5542 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5544 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5545 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5546 !ST.hasGFX90AInsts()) {
5547 ErrInfo =
"Invalid dpp_ctrl value: "
5548 "row_newbroadcast/row_share is not supported before "
5552 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5553 ErrInfo =
"Invalid dpp_ctrl value: "
5554 "row_share and row_xmask are not supported before GFX10";
5559 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5562 ErrInfo =
"Invalid dpp_ctrl value: "
5563 "DP ALU dpp only support row_newbcast";
5570 AMDGPU::OpName DataName =
5571 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5577 if (ST.hasGFX90AInsts()) {
5578 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5579 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5580 ErrInfo =
"Invalid register class: "
5581 "vdata and vdst should be both VGPR or AGPR";
5584 if (
Data && Data2 &&
5586 ErrInfo =
"Invalid register class: "
5587 "both data operands should be VGPR or AGPR";
5591 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5593 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5594 ErrInfo =
"Invalid register class: "
5595 "agpr loads and stores not supported on this GPU";
5601 if (ST.needsAlignedVGPRs()) {
5602 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5607 if (Reg.isPhysical())
5608 return !(RI.getHWRegIndex(Reg) & 1);
5610 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5611 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5614 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5615 Opcode == AMDGPU::DS_GWS_BARRIER) {
5617 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5618 ErrInfo =
"Subtarget requires even aligned vector registers "
5619 "for DS_GWS instructions";
5625 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5626 ErrInfo =
"Subtarget requires even aligned vector registers "
5627 "for vaddr operand of image instructions";
5633 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5635 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5636 ErrInfo =
"Invalid register class: "
5637 "v_accvgpr_write with an SGPR is not supported on this GPU";
5642 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5645 ErrInfo =
"pseudo expects only physical SGPRs";
5652 if (!ST.hasScaleOffset()) {
5653 ErrInfo =
"Subtarget does not support offset scaling";
5657 ErrInfo =
"Instruction does not support offset scaling";
5666 for (
unsigned I = 0;
I < 3; ++
I) {
5679 switch (
MI.getOpcode()) {
5680 default:
return AMDGPU::INSTRUCTION_LIST_END;
5681 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5682 case AMDGPU::COPY:
return AMDGPU::COPY;
5683 case AMDGPU::PHI:
return AMDGPU::PHI;
5684 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5685 case AMDGPU::WQM:
return AMDGPU::WQM;
5686 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5687 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5688 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5689 case AMDGPU::S_MOV_B32: {
5691 return MI.getOperand(1).isReg() ||
5692 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5693 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5695 case AMDGPU::S_ADD_I32:
5696 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5697 case AMDGPU::S_ADDC_U32:
5698 return AMDGPU::V_ADDC_U32_e32;
5699 case AMDGPU::S_SUB_I32:
5700 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5703 case AMDGPU::S_ADD_U32:
5704 return AMDGPU::V_ADD_CO_U32_e32;
5705 case AMDGPU::S_SUB_U32:
5706 return AMDGPU::V_SUB_CO_U32_e32;
5707 case AMDGPU::S_ADD_U64_PSEUDO:
5708 return AMDGPU::V_ADD_U64_PSEUDO;
5709 case AMDGPU::S_SUB_U64_PSEUDO:
5710 return AMDGPU::V_SUB_U64_PSEUDO;
5711 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5712 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5713 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5714 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5715 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5716 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5717 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5718 case AMDGPU::S_XNOR_B32:
5719 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5720 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5721 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5722 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5723 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5724 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5725 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5726 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5727 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5728 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5729 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5730 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5731 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5732 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5733 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5734 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5735 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5736 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5737 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5738 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5739 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5740 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5741 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5742 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5743 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5744 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5745 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5746 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5747 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5748 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5749 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5750 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5751 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5752 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5753 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5754 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5755 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5756 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5757 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5758 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5759 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5760 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5761 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5762 case AMDGPU::S_CVT_F32_F16:
5763 case AMDGPU::S_CVT_HI_F32_F16:
5764 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5765 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5766 case AMDGPU::S_CVT_F16_F32:
5767 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5768 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5769 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5770 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5771 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5772 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5773 case AMDGPU::S_CEIL_F16:
5774 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5775 : AMDGPU::V_CEIL_F16_fake16_e64;
5776 case AMDGPU::S_FLOOR_F16:
5777 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5778 : AMDGPU::V_FLOOR_F16_fake16_e64;
5779 case AMDGPU::S_TRUNC_F16:
5780 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5781 : AMDGPU::V_TRUNC_F16_fake16_e64;
5782 case AMDGPU::S_RNDNE_F16:
5783 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5784 : AMDGPU::V_RNDNE_F16_fake16_e64;
5785 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5786 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5787 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5788 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5789 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5790 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5791 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5792 case AMDGPU::S_ADD_F16:
5793 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5794 : AMDGPU::V_ADD_F16_fake16_e64;
5795 case AMDGPU::S_SUB_F16:
5796 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5797 : AMDGPU::V_SUB_F16_fake16_e64;
5798 case AMDGPU::S_MIN_F16:
5799 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5800 : AMDGPU::V_MIN_F16_fake16_e64;
5801 case AMDGPU::S_MAX_F16:
5802 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5803 : AMDGPU::V_MAX_F16_fake16_e64;
5804 case AMDGPU::S_MINIMUM_F16:
5805 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5806 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5807 case AMDGPU::S_MAXIMUM_F16:
5808 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5809 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5810 case AMDGPU::S_MUL_F16:
5811 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5812 : AMDGPU::V_MUL_F16_fake16_e64;
5813 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5814 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5815 case AMDGPU::S_FMAC_F16:
5816 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5817 : AMDGPU::V_FMAC_F16_fake16_e64;
5818 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5819 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5820 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5821 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5822 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5823 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5824 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5825 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5826 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5827 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5828 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5829 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5830 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5831 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5832 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5833 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5834 case AMDGPU::S_CMP_LT_F16:
5835 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5836 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5837 case AMDGPU::S_CMP_EQ_F16:
5838 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5839 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5840 case AMDGPU::S_CMP_LE_F16:
5841 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5842 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5843 case AMDGPU::S_CMP_GT_F16:
5844 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5845 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5846 case AMDGPU::S_CMP_LG_F16:
5847 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5848 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5849 case AMDGPU::S_CMP_GE_F16:
5850 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5851 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5852 case AMDGPU::S_CMP_O_F16:
5853 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5854 : AMDGPU::V_CMP_O_F16_fake16_e64;
5855 case AMDGPU::S_CMP_U_F16:
5856 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5857 : AMDGPU::V_CMP_U_F16_fake16_e64;
5858 case AMDGPU::S_CMP_NGE_F16:
5859 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5860 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5861 case AMDGPU::S_CMP_NLG_F16:
5862 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5863 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5864 case AMDGPU::S_CMP_NGT_F16:
5865 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5866 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5867 case AMDGPU::S_CMP_NLE_F16:
5868 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5869 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5870 case AMDGPU::S_CMP_NEQ_F16:
5871 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5872 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5873 case AMDGPU::S_CMP_NLT_F16:
5874 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5875 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5876 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5877 case AMDGPU::V_S_EXP_F16_e64:
5878 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5879 : AMDGPU::V_EXP_F16_fake16_e64;
5880 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5881 case AMDGPU::V_S_LOG_F16_e64:
5882 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5883 : AMDGPU::V_LOG_F16_fake16_e64;
5884 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5885 case AMDGPU::V_S_RCP_F16_e64:
5886 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5887 : AMDGPU::V_RCP_F16_fake16_e64;
5888 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5889 case AMDGPU::V_S_RSQ_F16_e64:
5890 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5891 : AMDGPU::V_RSQ_F16_fake16_e64;
5892 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5893 case AMDGPU::V_S_SQRT_F16_e64:
5894 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5895 : AMDGPU::V_SQRT_F16_fake16_e64;
5898 "Unexpected scalar opcode without corresponding vector one!");
5947 "Not a whole wave func");
5950 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
5951 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
5962 case AMDGPU::AV_32RegClassID:
5963 RCID = AMDGPU::VGPR_32RegClassID;
5965 case AMDGPU::AV_64RegClassID:
5966 RCID = AMDGPU::VReg_64RegClassID;
5968 case AMDGPU::AV_96RegClassID:
5969 RCID = AMDGPU::VReg_96RegClassID;
5971 case AMDGPU::AV_128RegClassID:
5972 RCID = AMDGPU::VReg_128RegClassID;
5974 case AMDGPU::AV_160RegClassID:
5975 RCID = AMDGPU::VReg_160RegClassID;
5977 case AMDGPU::AV_512RegClassID:
5978 RCID = AMDGPU::VReg_512RegClassID;
5993 auto RegClass = TID.
operands()[OpNum].RegClass;
5996 return RI.getRegClass(RegClass);
6002 unsigned OpNo)
const {
6004 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6005 Desc.operands()[OpNo].RegClass == -1) {
6008 if (Reg.isVirtual()) {
6010 MI.getParent()->getParent()->getRegInfo();
6011 return MRI.getRegClass(Reg);
6013 return RI.getPhysRegBaseClass(Reg);
6016 unsigned RCID =
Desc.operands()[OpNo].RegClass;
6025 unsigned RCID =
get(
MI.getOpcode()).operands()[
OpIdx].RegClass;
6027 unsigned Size = RI.getRegSizeInBits(*RC);
6028 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6029 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6030 : AMDGPU::V_MOV_B32_e32;
6032 Opcode = AMDGPU::COPY;
6033 else if (RI.isSGPRClass(RC))
6034 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6048 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6054 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6065 if (SubIdx == AMDGPU::sub0)
6067 if (SubIdx == AMDGPU::sub1)
6079void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6095 if (Reg.isPhysical())
6105 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6108 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6115 unsigned Opc =
MI.getOpcode();
6121 constexpr const AMDGPU::OpName OpNames[] = {
6122 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6125 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6126 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6136 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6137 if (IsAGPR && !ST.hasMAIInsts())
6139 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6143 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6144 const int DataIdx = AMDGPU::getNamedOperandIdx(
6145 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6146 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6147 MI.getOperand(DataIdx).isReg() &&
6148 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6150 if ((
int)
OpIdx == DataIdx) {
6151 if (VDstIdx != -1 &&
6152 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6155 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6156 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6157 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6162 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6163 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6183 constexpr const unsigned NumOps = 3;
6184 constexpr const AMDGPU::OpName OpNames[
NumOps * 2] = {
6185 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6186 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6187 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6192 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6195 MO = &
MI.getOperand(SrcIdx);
6202 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6206 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6210 return !OpSel && !OpSelHi;
6220 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) :
nullptr;
6229 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6230 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6234 if (!LiteralLimit--)
6244 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6252 if (--ConstantBusLimit <= 0)
6264 if (!LiteralLimit--)
6266 if (--ConstantBusLimit <= 0)
6272 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6276 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6278 !
Op.isIdenticalTo(*MO))
6288 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6302 bool Is64BitOp = Is64BitFPOp ||
6309 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6318 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6336 bool IsGFX950Only = ST.hasGFX950Insts();
6337 bool IsGFX940Only = ST.hasGFX940Insts();
6339 if (!IsGFX950Only && !IsGFX940Only)
6357 unsigned Opcode =
MI.getOpcode();
6359 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6360 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6361 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6362 case AMDGPU::V_MQSAD_U32_U8_e64:
6363 case AMDGPU::V_PK_ADD_F16:
6364 case AMDGPU::V_PK_ADD_F32:
6365 case AMDGPU::V_PK_ADD_I16:
6366 case AMDGPU::V_PK_ADD_U16:
6367 case AMDGPU::V_PK_ASHRREV_I16:
6368 case AMDGPU::V_PK_FMA_F16:
6369 case AMDGPU::V_PK_FMA_F32:
6370 case AMDGPU::V_PK_FMAC_F16_e32:
6371 case AMDGPU::V_PK_FMAC_F16_e64:
6372 case AMDGPU::V_PK_LSHLREV_B16:
6373 case AMDGPU::V_PK_LSHRREV_B16:
6374 case AMDGPU::V_PK_MAD_I16:
6375 case AMDGPU::V_PK_MAD_U16:
6376 case AMDGPU::V_PK_MAX_F16:
6377 case AMDGPU::V_PK_MAX_I16:
6378 case AMDGPU::V_PK_MAX_U16:
6379 case AMDGPU::V_PK_MIN_F16:
6380 case AMDGPU::V_PK_MIN_I16:
6381 case AMDGPU::V_PK_MIN_U16:
6382 case AMDGPU::V_PK_MOV_B32:
6383 case AMDGPU::V_PK_MUL_F16:
6384 case AMDGPU::V_PK_MUL_F32:
6385 case AMDGPU::V_PK_MUL_LO_U16:
6386 case AMDGPU::V_PK_SUB_I16:
6387 case AMDGPU::V_PK_SUB_U16:
6388 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6397 unsigned Opc =
MI.getOpcode();
6400 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6403 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6409 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6416 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6419 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6425 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6435 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6436 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6437 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6449 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6451 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6463 if (HasImplicitSGPR || !
MI.isCommutable()) {
6480 if (CommutedOpc == -1) {
6485 MI.setDesc(
get(CommutedOpc));
6489 bool Src0Kill = Src0.
isKill();
6493 else if (Src1.
isReg()) {
6508 unsigned Opc =
MI.getOpcode();
6511 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6512 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6513 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6516 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6517 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6518 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6519 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6520 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6521 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6522 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6526 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6527 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6532 if (VOP3Idx[2] != -1) {
6534 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6535 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6544 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6545 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6547 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6549 SGPRsUsed.
insert(SGPRReg);
6553 for (
int Idx : VOP3Idx) {
6562 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6574 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6581 if (ConstantBusLimit > 0) {
6593 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6594 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6600 for (
unsigned I = 0;
I < 3; ++
I) {
6613 SRC = RI.getCommonSubClass(SRC, DstRC);
6616 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6618 if (RI.hasAGPRs(VRC)) {
6619 VRC = RI.getEquivalentVGPRClass(VRC);
6620 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6622 get(TargetOpcode::COPY), NewSrcReg)
6629 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6635 for (
unsigned i = 0; i < SubRegs; ++i) {
6636 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6638 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6639 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6645 get(AMDGPU::REG_SEQUENCE), DstReg);
6646 for (
unsigned i = 0; i < SubRegs; ++i) {
6648 MIB.
addImm(RI.getSubRegFromChannel(i));
6661 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6663 SBase->setReg(SGPR);
6666 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6674 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6675 if (OldSAddrIdx < 0)
6691 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6692 if (NewVAddrIdx < 0)
6695 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6699 if (OldVAddrIdx >= 0) {
6701 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6713 if (OldVAddrIdx == NewVAddrIdx) {
6716 MRI.removeRegOperandFromUseList(&NewVAddr);
6717 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6721 MRI.removeRegOperandFromUseList(&NewVAddr);
6722 MRI.addRegOperandToUseList(&NewVAddr);
6724 assert(OldSAddrIdx == NewVAddrIdx);
6726 if (OldVAddrIdx >= 0) {
6727 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6728 AMDGPU::OpName::vdst_in);
6732 if (NewVDstIn != -1) {
6733 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6739 if (NewVDstIn != -1) {
6740 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6761 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6781 unsigned OpSubReg =
Op.getSubReg();
6784 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6790 Register DstReg =
MRI.createVirtualRegister(DstRC);
6800 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6803 bool ImpDef = Def->isImplicitDef();
6804 while (!ImpDef && Def && Def->isCopy()) {
6805 if (Def->getOperand(1).getReg().isPhysical())
6807 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6808 ImpDef = Def && Def->isImplicitDef();
6810 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6829 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6835 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6836 unsigned NumSubRegs =
RegSize / 32;
6837 Register VScalarOp = ScalarOp->getReg();
6839 if (NumSubRegs == 1) {
6840 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6842 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6845 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6847 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6853 CondReg = NewCondReg;
6855 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6863 ScalarOp->setReg(CurReg);
6864 ScalarOp->setIsKill();
6868 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6869 "Unhandled register size");
6871 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6873 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6875 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6878 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6879 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6882 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6883 .
addReg(VScalarOp, VScalarOpUndef,
6884 TRI->getSubRegFromChannel(Idx + 1));
6890 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6891 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6897 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6898 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6901 if (NumSubRegs <= 2)
6902 Cmp.addReg(VScalarOp);
6904 Cmp.addReg(VScalarOp, VScalarOpUndef,
6905 TRI->getSubRegFromChannel(Idx, 2));
6909 CondReg = NewCondReg;
6911 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6919 const auto *SScalarOpRC =
6920 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6921 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6925 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6926 unsigned Channel = 0;
6927 for (
Register Piece : ReadlanePieces) {
6928 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6932 ScalarOp->setReg(SScalarOp);
6933 ScalarOp->setIsKill();
6937 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6938 MRI.setSimpleHint(SaveExec, CondReg);
6969 if (!Begin.isValid())
6971 if (!End.isValid()) {
6977 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6985 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
6986 std::numeric_limits<unsigned>::max()) !=
6989 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6995 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7004 for (
auto I = Begin;
I != AfterMI;
I++) {
7005 for (
auto &MO :
I->all_uses())
7006 MRI.clearKillFlags(MO.getReg());
7031 MBB.addSuccessor(LoopBB);
7041 for (
auto &Succ : RemainderBB->
successors()) {
7065static std::tuple<unsigned, unsigned>
7073 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7074 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7077 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7078 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7079 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7080 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7081 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7098 .
addImm(AMDGPU::sub0_sub1)
7104 return std::tuple(RsrcPtr, NewSRsrc);
7141 if (
MI.getOpcode() == AMDGPU::PHI) {
7143 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7144 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7147 MRI.getRegClass(
MI.getOperand(i).getReg());
7148 if (RI.hasVectorRegisters(OpRC)) {
7162 VRC = &AMDGPU::VReg_1RegClass;
7165 ? RI.getEquivalentAGPRClass(SRC)
7166 : RI.getEquivalentVGPRClass(SRC);
7169 ? RI.getEquivalentAGPRClass(VRC)
7170 : RI.getEquivalentVGPRClass(VRC);
7178 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7180 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7196 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7199 if (RI.hasVGPRs(DstRC)) {
7203 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7205 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7223 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7228 if (DstRC != Src0RC) {
7237 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7239 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7245 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7246 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7247 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7248 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7249 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7250 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7251 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7253 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7266 ? AMDGPU::OpName::rsrc
7267 : AMDGPU::OpName::srsrc;
7269 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7272 AMDGPU::OpName SampOpName =
7273 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7275 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7282 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7284 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7288 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7289 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7294 while (Start->getOpcode() != FrameSetupOpcode)
7297 while (End->getOpcode() != FrameDestroyOpcode)
7301 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7302 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7310 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7312 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7314 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7324 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7325 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7326 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7327 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7329 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7336 bool isSoffsetLegal =
true;
7338 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7339 if (SoffsetIdx != -1) {
7342 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7343 isSoffsetLegal =
false;
7347 bool isRsrcLegal =
true;
7349 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7350 if (RsrcIdx != -1) {
7353 isRsrcLegal =
false;
7357 if (isRsrcLegal && isSoffsetLegal)
7381 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7382 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7383 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7385 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7386 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7387 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7389 unsigned RsrcPtr, NewSRsrc;
7396 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7403 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7417 }
else if (!VAddr && ST.hasAddr64()) {
7421 "FIXME: Need to emit flat atomics here");
7423 unsigned RsrcPtr, NewSRsrc;
7426 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7449 MIB.
addImm(CPol->getImm());
7454 MIB.
addImm(TFE->getImm());
7474 MI.removeFromParent();
7479 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7481 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7485 if (!isSoffsetLegal) {
7497 if (!isSoffsetLegal) {
7509 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7510 if (RsrcIdx != -1) {
7511 DeferredList.insert(
MI);
7516 return DeferredList.contains(
MI);
7526 if (!ST.useRealTrue16Insts())
7529 unsigned Opcode =
MI.getOpcode();
7533 OpIdx >=
get(Opcode).getNumOperands() ||
7534 get(Opcode).operands()[
OpIdx].RegClass == -1)
7538 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7542 if (!RI.isVGPRClass(CurrRC))
7545 unsigned RCID =
get(Opcode).operands()[
OpIdx].RegClass;
7547 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7548 Op.setSubReg(AMDGPU::lo16);
7549 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7551 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7552 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7559 Op.setReg(NewDstReg);
7571 while (!Worklist.
empty()) {
7585 "Deferred MachineInstr are not supposed to re-populate worklist");
7603 case AMDGPU::S_ADD_I32:
7604 case AMDGPU::S_SUB_I32: {
7608 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7616 case AMDGPU::S_MUL_U64:
7617 if (ST.hasVectorMulU64()) {
7618 NewOpcode = AMDGPU::V_MUL_U64_e64;
7622 splitScalarSMulU64(Worklist, Inst, MDT);
7626 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7627 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7630 splitScalarSMulPseudo(Worklist, Inst, MDT);
7634 case AMDGPU::S_AND_B64:
7635 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7639 case AMDGPU::S_OR_B64:
7640 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7644 case AMDGPU::S_XOR_B64:
7645 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7649 case AMDGPU::S_NAND_B64:
7650 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7654 case AMDGPU::S_NOR_B64:
7655 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7659 case AMDGPU::S_XNOR_B64:
7660 if (ST.hasDLInsts())
7661 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7663 splitScalar64BitXnor(Worklist, Inst, MDT);
7667 case AMDGPU::S_ANDN2_B64:
7668 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7672 case AMDGPU::S_ORN2_B64:
7673 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7677 case AMDGPU::S_BREV_B64:
7678 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7682 case AMDGPU::S_NOT_B64:
7683 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7687 case AMDGPU::S_BCNT1_I32_B64:
7688 splitScalar64BitBCNT(Worklist, Inst);
7692 case AMDGPU::S_BFE_I64:
7693 splitScalar64BitBFE(Worklist, Inst);
7697 case AMDGPU::S_FLBIT_I32_B64:
7698 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7701 case AMDGPU::S_FF1_I32_B64:
7702 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7706 case AMDGPU::S_LSHL_B32:
7707 if (ST.hasOnlyRevVALUShifts()) {
7708 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7712 case AMDGPU::S_ASHR_I32:
7713 if (ST.hasOnlyRevVALUShifts()) {
7714 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7718 case AMDGPU::S_LSHR_B32:
7719 if (ST.hasOnlyRevVALUShifts()) {
7720 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7724 case AMDGPU::S_LSHL_B64:
7725 if (ST.hasOnlyRevVALUShifts()) {
7727 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7728 : AMDGPU::V_LSHLREV_B64_e64;
7732 case AMDGPU::S_ASHR_I64:
7733 if (ST.hasOnlyRevVALUShifts()) {
7734 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7738 case AMDGPU::S_LSHR_B64:
7739 if (ST.hasOnlyRevVALUShifts()) {
7740 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7745 case AMDGPU::S_ABS_I32:
7746 lowerScalarAbs(Worklist, Inst);
7750 case AMDGPU::S_CBRANCH_SCC0:
7751 case AMDGPU::S_CBRANCH_SCC1: {
7754 bool IsSCC = CondReg == AMDGPU::SCC;
7762 case AMDGPU::S_BFE_U64:
7763 case AMDGPU::S_BFM_B64:
7766 case AMDGPU::S_PACK_LL_B32_B16:
7767 case AMDGPU::S_PACK_LH_B32_B16:
7768 case AMDGPU::S_PACK_HL_B32_B16:
7769 case AMDGPU::S_PACK_HH_B32_B16:
7770 movePackToVALU(Worklist,
MRI, Inst);
7774 case AMDGPU::S_XNOR_B32:
7775 lowerScalarXnor(Worklist, Inst);
7779 case AMDGPU::S_NAND_B32:
7780 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7784 case AMDGPU::S_NOR_B32:
7785 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7789 case AMDGPU::S_ANDN2_B32:
7790 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7794 case AMDGPU::S_ORN2_B32:
7795 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7803 case AMDGPU::S_ADD_CO_PSEUDO:
7804 case AMDGPU::S_SUB_CO_PSEUDO: {
7805 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7806 ? AMDGPU::V_ADDC_U32_e64
7807 : AMDGPU::V_SUBB_U32_e64;
7808 const auto *CarryRC = RI.getWaveMaskRegClass();
7811 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7812 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7819 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7830 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7834 case AMDGPU::S_UADDO_PSEUDO:
7835 case AMDGPU::S_USUBO_PSEUDO: {
7842 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7843 ? AMDGPU::V_ADD_CO_U32_e64
7844 : AMDGPU::V_SUB_CO_U32_e64;
7846 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7847 Register DestReg =
MRI.createVirtualRegister(NewRC);
7855 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7862 case AMDGPU::S_CSELECT_B32:
7863 case AMDGPU::S_CSELECT_B64:
7864 lowerSelect(Worklist, Inst, MDT);
7867 case AMDGPU::S_CMP_EQ_I32:
7868 case AMDGPU::S_CMP_LG_I32:
7869 case AMDGPU::S_CMP_GT_I32:
7870 case AMDGPU::S_CMP_GE_I32:
7871 case AMDGPU::S_CMP_LT_I32:
7872 case AMDGPU::S_CMP_LE_I32:
7873 case AMDGPU::S_CMP_EQ_U32:
7874 case AMDGPU::S_CMP_LG_U32:
7875 case AMDGPU::S_CMP_GT_U32:
7876 case AMDGPU::S_CMP_GE_U32:
7877 case AMDGPU::S_CMP_LT_U32:
7878 case AMDGPU::S_CMP_LE_U32:
7879 case AMDGPU::S_CMP_EQ_U64:
7880 case AMDGPU::S_CMP_LG_U64:
7881 case AMDGPU::S_CMP_LT_F32:
7882 case AMDGPU::S_CMP_EQ_F32:
7883 case AMDGPU::S_CMP_LE_F32:
7884 case AMDGPU::S_CMP_GT_F32:
7885 case AMDGPU::S_CMP_LG_F32:
7886 case AMDGPU::S_CMP_GE_F32:
7887 case AMDGPU::S_CMP_O_F32:
7888 case AMDGPU::S_CMP_U_F32:
7889 case AMDGPU::S_CMP_NGE_F32:
7890 case AMDGPU::S_CMP_NLG_F32:
7891 case AMDGPU::S_CMP_NGT_F32:
7892 case AMDGPU::S_CMP_NLE_F32:
7893 case AMDGPU::S_CMP_NEQ_F32:
7894 case AMDGPU::S_CMP_NLT_F32: {
7895 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7899 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7913 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7917 case AMDGPU::S_CMP_LT_F16:
7918 case AMDGPU::S_CMP_EQ_F16:
7919 case AMDGPU::S_CMP_LE_F16:
7920 case AMDGPU::S_CMP_GT_F16:
7921 case AMDGPU::S_CMP_LG_F16:
7922 case AMDGPU::S_CMP_GE_F16:
7923 case AMDGPU::S_CMP_O_F16:
7924 case AMDGPU::S_CMP_U_F16:
7925 case AMDGPU::S_CMP_NGE_F16:
7926 case AMDGPU::S_CMP_NLG_F16:
7927 case AMDGPU::S_CMP_NGT_F16:
7928 case AMDGPU::S_CMP_NLE_F16:
7929 case AMDGPU::S_CMP_NEQ_F16:
7930 case AMDGPU::S_CMP_NLT_F16: {
7931 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7953 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7957 case AMDGPU::S_CVT_HI_F32_F16: {
7959 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7960 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7961 if (ST.useRealTrue16Insts()) {
7966 .
addReg(TmpReg, 0, AMDGPU::hi16)
7982 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7986 case AMDGPU::S_MINIMUM_F32:
7987 case AMDGPU::S_MAXIMUM_F32: {
7989 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8000 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8004 case AMDGPU::S_MINIMUM_F16:
8005 case AMDGPU::S_MAXIMUM_F16: {
8007 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8008 ? &AMDGPU::VGPR_16RegClass
8009 : &AMDGPU::VGPR_32RegClass);
8021 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8025 case AMDGPU::V_S_EXP_F16_e64:
8026 case AMDGPU::V_S_LOG_F16_e64:
8027 case AMDGPU::V_S_RCP_F16_e64:
8028 case AMDGPU::V_S_RSQ_F16_e64:
8029 case AMDGPU::V_S_SQRT_F16_e64: {
8031 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8032 ? &AMDGPU::VGPR_16RegClass
8033 : &AMDGPU::VGPR_32RegClass);
8045 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8051 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8059 if (NewOpcode == Opcode) {
8068 if (
MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
8070 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
8074 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8076 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8094 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8096 MRI.replaceRegWith(DstReg, NewDstReg);
8097 MRI.clearKillFlags(NewDstReg);
8111 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8115 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8116 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8117 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8119 get(AMDGPU::IMPLICIT_DEF), Undef);
8121 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8127 MRI.replaceRegWith(DstReg, NewDstReg);
8128 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8130 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8133 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8134 MRI.replaceRegWith(DstReg, NewDstReg);
8135 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8140 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8141 MRI.replaceRegWith(DstReg, NewDstReg);
8143 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8153 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8154 AMDGPU::OpName::src0_modifiers) >= 0)
8158 NewInstr->addOperand(Src);
8161 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8164 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8166 NewInstr.addImm(
Size);
8167 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8171 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8176 "Scalar BFE is only implemented for constant width and offset");
8184 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8185 AMDGPU::OpName::src1_modifiers) >= 0)
8187 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8189 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8190 AMDGPU::OpName::src2_modifiers) >= 0)
8192 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8194 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8196 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8198 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8204 NewInstr->addOperand(
Op);
8211 if (
Op.getReg() == AMDGPU::SCC) {
8213 if (
Op.isDef() && !
Op.isDead())
8214 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8216 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8221 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8222 Register DstReg = NewInstr->getOperand(0).getReg();
8227 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8228 MRI.replaceRegWith(DstReg, NewDstReg);
8237 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8241std::pair<bool, MachineBasicBlock *>
8253 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8256 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8258 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8259 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8267 MRI.replaceRegWith(OldDstReg, ResultReg);
8270 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8271 return std::pair(
true, NewBB);
8274 return std::pair(
false,
nullptr);
8291 bool IsSCC = (CondReg == AMDGPU::SCC);
8299 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8305 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8306 NewCondReg =
MRI.createVirtualRegister(TC);
8310 bool CopyFound =
false;
8311 for (MachineInstr &CandI :
8314 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8316 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8318 .
addReg(CandI.getOperand(1).getReg());
8330 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8338 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8339 MachineInstr *NewInst;
8340 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8341 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8354 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8356 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8368 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8369 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8371 unsigned SubOp = ST.hasAddNoCarry() ?
8372 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8382 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8383 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8397 if (ST.hasDLInsts()) {
8398 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8406 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8407 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8413 bool Src0IsSGPR = Src0.
isReg() &&
8414 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8415 bool Src1IsSGPR = Src1.
isReg() &&
8416 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8418 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8419 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8429 }
else if (Src1IsSGPR) {
8443 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8447 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8453 unsigned Opcode)
const {
8463 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8464 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8476 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8477 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8482 unsigned Opcode)
const {
8492 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8493 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8505 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8506 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8521 const MCInstrDesc &InstDesc =
get(Opcode);
8522 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8524 &AMDGPU::SGPR_32RegClass;
8526 const TargetRegisterClass *Src0SubRC =
8527 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8530 AMDGPU::sub0, Src0SubRC);
8532 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8533 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8534 const TargetRegisterClass *NewDestSubRC =
8535 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8537 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8538 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8541 AMDGPU::sub1, Src0SubRC);
8543 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8544 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8549 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8556 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8558 Worklist.
insert(&LoHalf);
8559 Worklist.
insert(&HiHalf);
8565 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8576 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8577 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8578 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8586 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8587 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8588 const TargetRegisterClass *Src0SubRC =
8589 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8590 if (RI.isSGPRClass(Src0SubRC))
8591 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8592 const TargetRegisterClass *Src1SubRC =
8593 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8594 if (RI.isSGPRClass(Src1SubRC))
8595 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8599 MachineOperand Op0L =
8601 MachineOperand Op1L =
8603 MachineOperand Op0H =
8605 MachineOperand Op1H =
8623 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8624 MachineInstr *Op1L_Op0H =
8629 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8630 MachineInstr *Op1H_Op0L =
8635 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8636 MachineInstr *Carry =
8641 MachineInstr *LoHalf =
8646 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8651 MachineInstr *HiHalf =
8662 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8674 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8685 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8686 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8687 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8695 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8696 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8697 const TargetRegisterClass *Src0SubRC =
8698 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8699 if (RI.isSGPRClass(Src0SubRC))
8700 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8701 const TargetRegisterClass *Src1SubRC =
8702 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8703 if (RI.isSGPRClass(Src1SubRC))
8704 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8708 MachineOperand Op0L =
8710 MachineOperand Op1L =
8714 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8715 ? AMDGPU::V_MUL_HI_U32_e64
8716 : AMDGPU::V_MUL_HI_I32_e64;
8717 MachineInstr *HiHalf =
8720 MachineInstr *LoHalf =
8731 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8739 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8755 const MCInstrDesc &InstDesc =
get(Opcode);
8756 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8758 &AMDGPU::SGPR_32RegClass;
8760 const TargetRegisterClass *Src0SubRC =
8761 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8762 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8764 &AMDGPU::SGPR_32RegClass;
8766 const TargetRegisterClass *Src1SubRC =
8767 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8770 AMDGPU::sub0, Src0SubRC);
8772 AMDGPU::sub0, Src1SubRC);
8774 AMDGPU::sub1, Src0SubRC);
8776 AMDGPU::sub1, Src1SubRC);
8778 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8779 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8780 const TargetRegisterClass *NewDestSubRC =
8781 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8783 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8784 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8788 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8789 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8793 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8800 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8802 Worklist.
insert(&LoHalf);
8803 Worklist.
insert(&HiHalf);
8806 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8822 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8824 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8826 MachineOperand* Op0;
8827 MachineOperand* Op1;
8840 Register NewDest =
MRI.createVirtualRegister(DestRC);
8846 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8862 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
8863 const TargetRegisterClass *SrcRC = Src.isReg() ?
8864 MRI.getRegClass(Src.getReg()) :
8865 &AMDGPU::SGPR_32RegClass;
8867 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8868 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8870 const TargetRegisterClass *SrcSubRC =
8871 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8874 AMDGPU::sub0, SrcSubRC);
8876 AMDGPU::sub1, SrcSubRC);
8882 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8886 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8905 Offset == 0 &&
"Not implemented");
8908 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8909 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8910 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8927 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8928 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8933 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8934 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8938 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8941 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8946 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8947 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8966 const MCInstrDesc &InstDesc =
get(Opcode);
8968 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8969 unsigned OpcodeAdd =
8970 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8972 const TargetRegisterClass *SrcRC =
8973 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8974 const TargetRegisterClass *SrcSubRC =
8975 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8977 MachineOperand SrcRegSub0 =
8979 MachineOperand SrcRegSub1 =
8982 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8983 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8984 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8985 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8992 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8998 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9000 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9002 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9005void SIInstrInfo::addUsersToMoveToVALUWorklist(
9009 MachineInstr &
UseMI = *MO.getParent();
9013 switch (
UseMI.getOpcode()) {
9016 case AMDGPU::SOFT_WQM:
9017 case AMDGPU::STRICT_WWM:
9018 case AMDGPU::STRICT_WQM:
9019 case AMDGPU::REG_SEQUENCE:
9021 case AMDGPU::INSERT_SUBREG:
9024 OpNo = MO.getOperandNo();
9029 MRI.constrainRegClass(DstReg, OpRC);
9031 if (!RI.hasVectorRegisters(OpRC))
9042 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9049 case AMDGPU::S_PACK_LL_B32_B16: {
9050 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9051 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9068 case AMDGPU::S_PACK_LH_B32_B16: {
9069 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9078 case AMDGPU::S_PACK_HL_B32_B16: {
9079 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9089 case AMDGPU::S_PACK_HH_B32_B16: {
9090 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9091 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9108 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9109 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9118 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9119 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9120 SmallVector<MachineInstr *, 4> CopyToDelete;
9123 for (MachineInstr &
MI :
9127 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9130 MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9131 Register DestReg =
MI.getOperand(0).getReg();
9133 MRI.replaceRegWith(DestReg, NewCond);
9138 MI.getOperand(SCCIdx).setReg(NewCond);
9144 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9147 for (
auto &Copy : CopyToDelete)
9148 Copy->eraseFromParent();
9156void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9162 for (MachineInstr &
MI :
9165 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9167 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9176 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9184 case AMDGPU::REG_SEQUENCE:
9185 case AMDGPU::INSERT_SUBREG:
9187 case AMDGPU::SOFT_WQM:
9188 case AMDGPU::STRICT_WWM:
9189 case AMDGPU::STRICT_WQM: {
9191 if (RI.isAGPRClass(SrcRC)) {
9192 if (RI.isAGPRClass(NewDstRC))
9197 case AMDGPU::REG_SEQUENCE:
9198 case AMDGPU::INSERT_SUBREG:
9199 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9202 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9208 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9211 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9225 int OpIndices[3])
const {
9226 const MCInstrDesc &
Desc =
MI.getDesc();
9242 const MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9244 for (
unsigned i = 0; i < 3; ++i) {
9245 int Idx = OpIndices[i];
9249 const MachineOperand &MO =
MI.getOperand(Idx);
9255 const TargetRegisterClass *OpRC =
9256 RI.getRegClass(
Desc.operands()[Idx].RegClass);
9257 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9263 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9264 if (RI.isSGPRClass(RegRC))
9282 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9283 SGPRReg = UsedSGPRs[0];
9286 if (!SGPRReg && UsedSGPRs[1]) {
9287 if (UsedSGPRs[1] == UsedSGPRs[2])
9288 SGPRReg = UsedSGPRs[1];
9295 AMDGPU::OpName OperandName)
const {
9296 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9299 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9303 return &
MI.getOperand(Idx);
9317 if (ST.isAmdHsaOS()) {
9320 RsrcDataFormat |= (1ULL << 56);
9325 RsrcDataFormat |= (2ULL << 59);
9328 return RsrcDataFormat;
9338 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9343 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9350 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9356 unsigned Opc =
MI.getOpcode();
9362 return get(
Opc).mayLoad() &&
9367 int &FrameIndex)
const {
9369 if (!Addr || !Addr->
isFI())
9380 int &FrameIndex)
const {
9388 int &FrameIndex)
const {
9402 int &FrameIndex)
const {
9419 while (++
I != E &&
I->isInsideBundle()) {
9420 assert(!
I->isBundle() &&
"No nested bundle!");
9428 unsigned Opc =
MI.getOpcode();
9430 unsigned DescSize =
Desc.getSize();
9435 unsigned Size = DescSize;
9439 if (
MI.isBranch() && ST.hasOffset3fBug())
9450 bool HasLiteral =
false;
9451 unsigned LiteralSize = 4;
9452 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9457 if (ST.has64BitLiterals()) {
9458 switch (OpInfo.OperandType) {
9474 return HasLiteral ? DescSize + LiteralSize : DescSize;
9479 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9483 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9484 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9488 case TargetOpcode::BUNDLE:
9490 case TargetOpcode::INLINEASM:
9491 case TargetOpcode::INLINEASM_BR: {
9493 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9497 if (
MI.isMetaInstruction())
9501 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9504 unsigned LoInstOpcode = D16Info->LoOp;
9506 DescSize =
Desc.getSize();
9517 if (
MI.memoperands_empty())
9529 static const std::pair<int, const char *> TargetIndices[] = {
9567std::pair<unsigned, unsigned>
9574 static const std::pair<unsigned, const char *> TargetFlags[] = {
9592 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9607 return AMDGPU::WWM_COPY;
9609 return AMDGPU::COPY;
9621 bool IsNullOrVectorRegister =
true;
9624 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9629 return IsNullOrVectorRegister &&
9631 (Opcode == AMDGPU::IMPLICIT_DEF &&
9633 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9634 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9642 if (ST.hasAddNoCarry())
9646 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9647 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9658 if (ST.hasAddNoCarry())
9665 *RI.getBoolRC(),
I,
false,
9678 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9679 case AMDGPU::SI_KILL_I1_TERMINATOR:
9688 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9689 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9690 case AMDGPU::SI_KILL_I1_PSEUDO:
9691 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9703 const unsigned OffsetBits =
9705 return (1 << OffsetBits) - 1;
9712 if (
MI.isInlineAsm())
9715 for (
auto &
Op :
MI.implicit_operands()) {
9716 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9717 Op.setReg(AMDGPU::VCC_LO);
9726 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9730 const auto RCID =
MI.getDesc().operands()[Idx].RegClass;
9731 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9748 if (Imm <= MaxImm + 64) {
9750 Overflow = Imm - MaxImm;
9777 if (ST.hasRestrictedSOffset())
9820 if (!ST.hasFlatInstOffsets())
9828 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9840std::pair<int64_t, int64_t>
9843 int64_t RemainderOffset = COffsetVal;
9844 int64_t ImmField = 0;
9849 if (AllowNegative) {
9851 int64_t
D = 1LL << NumBits;
9852 RemainderOffset = (COffsetVal /
D) *
D;
9853 ImmField = COffsetVal - RemainderOffset;
9855 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9857 (ImmField % 4) != 0) {
9859 RemainderOffset += ImmField % 4;
9860 ImmField -= ImmField % 4;
9862 }
else if (COffsetVal >= 0) {
9864 RemainderOffset = COffsetVal - ImmField;
9868 assert(RemainderOffset + ImmField == COffsetVal);
9869 return {ImmField, RemainderOffset};
9873 if (ST.hasNegativeScratchOffsetBug() &&
9881 switch (ST.getGeneration()) {
9907 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9908 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9909 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9910 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9911 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9912 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9913 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9914 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9921#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9922 case OPCODE##_dpp: \
9923 case OPCODE##_e32: \
9924 case OPCODE##_e64: \
9925 case OPCODE##_e64_dpp: \
9940 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9941 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9942 case AMDGPU::V_FMA_F16_gfx9_e64:
9943 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9944 case AMDGPU::V_INTERP_P2_F16:
9945 case AMDGPU::V_MAD_F16_e64:
9946 case AMDGPU::V_MAD_U16_e64:
9947 case AMDGPU::V_MAD_I16_e64:
9969 switch (ST.getGeneration()) {
9982 if (
isMAI(Opcode)) {
9990 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
9997 if (ST.hasGFX90AInsts()) {
9999 if (ST.hasGFX940Insts())
10030 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10031 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10032 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10044 switch (
MI.getOpcode()) {
10046 case AMDGPU::REG_SEQUENCE:
10050 case AMDGPU::INSERT_SUBREG:
10051 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10068 if (!
P.Reg.isVirtual())
10072 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10073 while (
auto *
MI = DefInst) {
10075 switch (
MI->getOpcode()) {
10077 case AMDGPU::V_MOV_B32_e32: {
10078 auto &Op1 =
MI->getOperand(1);
10083 DefInst =
MRI.getVRegDef(RSR.Reg);
10091 DefInst =
MRI.getVRegDef(RSR.Reg);
10104 assert(
MRI.isSSA() &&
"Must be run on SSA");
10106 auto *
TRI =
MRI.getTargetRegisterInfo();
10107 auto *DefBB =
DefMI.getParent();
10111 if (
UseMI.getParent() != DefBB)
10114 const int MaxInstScan = 20;
10118 auto E =
UseMI.getIterator();
10119 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10120 if (
I->isDebugInstr())
10123 if (++NumInst > MaxInstScan)
10126 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10136 assert(
MRI.isSSA() &&
"Must be run on SSA");
10138 auto *
TRI =
MRI.getTargetRegisterInfo();
10139 auto *DefBB =
DefMI.getParent();
10141 const int MaxUseScan = 10;
10144 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10145 auto &UseInst = *
Use.getParent();
10148 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10151 if (++NumUse > MaxUseScan)
10158 const int MaxInstScan = 20;
10162 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10165 if (
I->isDebugInstr())
10168 if (++NumInst > MaxInstScan)
10181 if (Reg == VReg && --NumUse == 0)
10183 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10192 auto Cur =
MBB.begin();
10193 if (Cur !=
MBB.end())
10195 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10198 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10207 if (InsPt !=
MBB.end() &&
10208 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10209 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10210 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10211 InsPt->definesRegister(Src,
nullptr)) {
10215 .
addReg(Src, 0, SrcSubReg)
10240 if (isFullCopyInstr(
MI)) {
10241 Register DstReg =
MI.getOperand(0).getReg();
10242 Register SrcReg =
MI.getOperand(1).getReg();
10249 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10253 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10264 unsigned *PredCost)
const {
10265 if (
MI.isBundle()) {
10268 unsigned Lat = 0,
Count = 0;
10269 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10271 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10273 return Lat +
Count - 1;
10276 return SchedModel.computeInstrLatency(&
MI);
10282 unsigned Opcode =
MI.getOpcode();
10287 :
MI.getOperand(1).getReg();
10288 LLT DstTy =
MRI.getType(Dst);
10289 LLT SrcTy =
MRI.getType(Src);
10291 unsigned SrcAS = SrcTy.getAddressSpace();
10294 ST.hasGloballyAddressableScratch()
10302 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10303 return HandleAddrSpaceCast(
MI);
10306 auto IID = GI->getIntrinsicID();
10313 case Intrinsic::amdgcn_addrspacecast_nonnull:
10314 return HandleAddrSpaceCast(
MI);
10315 case Intrinsic::amdgcn_if:
10316 case Intrinsic::amdgcn_else:
10330 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10331 Opcode == AMDGPU::G_SEXTLOAD) {
10332 if (
MI.memoperands_empty())
10336 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10337 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10345 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10346 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10347 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10360 unsigned opcode =
MI.getOpcode();
10361 if (opcode == AMDGPU::V_READLANE_B32 ||
10362 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10363 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10366 if (isCopyInstr(
MI)) {
10370 RI.getPhysRegBaseClass(srcOp.
getReg());
10378 if (
MI.isPreISelOpcode())
10393 if (
MI.memoperands_empty())
10397 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10398 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10413 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10415 if (!
SrcOp.isReg())
10419 if (!Reg || !
SrcOp.readsReg())
10425 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10452 F,
"ds_ordered_count unsupported for this calling conv"));
10466 Register &SrcReg2, int64_t &CmpMask,
10467 int64_t &CmpValue)
const {
10468 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10471 switch (
MI.getOpcode()) {
10474 case AMDGPU::S_CMP_EQ_U32:
10475 case AMDGPU::S_CMP_EQ_I32:
10476 case AMDGPU::S_CMP_LG_U32:
10477 case AMDGPU::S_CMP_LG_I32:
10478 case AMDGPU::S_CMP_LT_U32:
10479 case AMDGPU::S_CMP_LT_I32:
10480 case AMDGPU::S_CMP_GT_U32:
10481 case AMDGPU::S_CMP_GT_I32:
10482 case AMDGPU::S_CMP_LE_U32:
10483 case AMDGPU::S_CMP_LE_I32:
10484 case AMDGPU::S_CMP_GE_U32:
10485 case AMDGPU::S_CMP_GE_I32:
10486 case AMDGPU::S_CMP_EQ_U64:
10487 case AMDGPU::S_CMP_LG_U64:
10488 SrcReg =
MI.getOperand(0).getReg();
10489 if (
MI.getOperand(1).isReg()) {
10490 if (
MI.getOperand(1).getSubReg())
10492 SrcReg2 =
MI.getOperand(1).getReg();
10494 }
else if (
MI.getOperand(1).isImm()) {
10496 CmpValue =
MI.getOperand(1).getImm();
10502 case AMDGPU::S_CMPK_EQ_U32:
10503 case AMDGPU::S_CMPK_EQ_I32:
10504 case AMDGPU::S_CMPK_LG_U32:
10505 case AMDGPU::S_CMPK_LG_I32:
10506 case AMDGPU::S_CMPK_LT_U32:
10507 case AMDGPU::S_CMPK_LT_I32:
10508 case AMDGPU::S_CMPK_GT_U32:
10509 case AMDGPU::S_CMPK_GT_I32:
10510 case AMDGPU::S_CMPK_LE_U32:
10511 case AMDGPU::S_CMPK_LE_I32:
10512 case AMDGPU::S_CMPK_GE_U32:
10513 case AMDGPU::S_CMPK_GE_I32:
10514 SrcReg =
MI.getOperand(0).getReg();
10516 CmpValue =
MI.getOperand(1).getImm();
10525 Register SrcReg2, int64_t CmpMask,
10534 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10535 this](int64_t ExpectedValue,
unsigned SrcSize,
10536 bool IsReversible,
bool IsSigned) ->
bool {
10561 if (!Def || Def->getParent() != CmpInstr.
getParent())
10564 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10565 Def->getOpcode() != AMDGPU::S_AND_B64)
10569 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10580 SrcOp = &Def->getOperand(2);
10581 else if (isMask(&Def->getOperand(2)))
10582 SrcOp = &Def->getOperand(1);
10590 if (IsSigned && BitNo == SrcSize - 1)
10593 ExpectedValue <<= BitNo;
10595 bool IsReversedCC =
false;
10596 if (CmpValue != ExpectedValue) {
10599 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10604 Register DefReg = Def->getOperand(0).getReg();
10605 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10608 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10610 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10611 I->killsRegister(AMDGPU::SCC, &RI))
10616 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10620 if (!
MRI->use_nodbg_empty(DefReg)) {
10628 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10629 : AMDGPU::S_BITCMP1_B32
10630 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10631 : AMDGPU::S_BITCMP1_B64;
10636 Def->eraseFromParent();
10644 case AMDGPU::S_CMP_EQ_U32:
10645 case AMDGPU::S_CMP_EQ_I32:
10646 case AMDGPU::S_CMPK_EQ_U32:
10647 case AMDGPU::S_CMPK_EQ_I32:
10648 return optimizeCmpAnd(1, 32,
true,
false);
10649 case AMDGPU::S_CMP_GE_U32:
10650 case AMDGPU::S_CMPK_GE_U32:
10651 return optimizeCmpAnd(1, 32,
false,
false);
10652 case AMDGPU::S_CMP_GE_I32:
10653 case AMDGPU::S_CMPK_GE_I32:
10654 return optimizeCmpAnd(1, 32,
false,
true);
10655 case AMDGPU::S_CMP_EQ_U64:
10656 return optimizeCmpAnd(1, 64,
true,
false);
10657 case AMDGPU::S_CMP_LG_U32:
10658 case AMDGPU::S_CMP_LG_I32:
10659 case AMDGPU::S_CMPK_LG_U32:
10660 case AMDGPU::S_CMPK_LG_I32:
10661 return optimizeCmpAnd(0, 32,
true,
false);
10662 case AMDGPU::S_CMP_GT_U32:
10663 case AMDGPU::S_CMPK_GT_U32:
10664 return optimizeCmpAnd(0, 32,
false,
false);
10665 case AMDGPU::S_CMP_GT_I32:
10666 case AMDGPU::S_CMPK_GT_I32:
10667 return optimizeCmpAnd(0, 32,
false,
true);
10668 case AMDGPU::S_CMP_LG_U64:
10669 return optimizeCmpAnd(0, 64,
true,
false);
10676 AMDGPU::OpName
OpName)
const {
10677 if (!ST.needsAlignedVGPRs())
10680 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10692 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10694 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10697 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10698 : &AMDGPU::VReg_64_Align2RegClass);
10700 .
addReg(DataReg, 0,
Op.getSubReg())
10705 Op.setSubReg(AMDGPU::sub0);
10727 unsigned Opcode =
MI.getOpcode();
10733 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10734 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
10737 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.