33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
76 unsigned N =
Node->getNumOperands();
77 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
90 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
152bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
155 if (
MI.isCompare()) {
166 switch (
Use.getOpcode()) {
167 case AMDGPU::S_AND_SAVEEXEC_B32:
168 case AMDGPU::S_AND_SAVEEXEC_B64:
170 case AMDGPU::S_AND_B32:
171 case AMDGPU::S_AND_B64:
172 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
182 switch (
MI.getOpcode()) {
185 case AMDGPU::V_READFIRSTLANE_B32:
202 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
207 for (
auto Op :
MI.uses()) {
208 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
214 if (FromCycle ==
nullptr)
220 while (FromCycle && !FromCycle->
contains(ToCycle)) {
240 int64_t &Offset1)
const {
248 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
252 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
268 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
269 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
270 if (Offset0Idx == -1 || Offset1Idx == -1)
277 Offset0Idx -=
get(Opc0).NumDefs;
278 Offset1Idx -=
get(Opc1).NumDefs;
308 if (!Load0Offset || !Load1Offset)
325 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
326 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
328 if (OffIdx0 == -1 || OffIdx1 == -1)
334 OffIdx0 -=
get(Opc0).NumDefs;
335 OffIdx1 -=
get(Opc1).NumDefs;
354 case AMDGPU::DS_READ2ST64_B32:
355 case AMDGPU::DS_READ2ST64_B64:
356 case AMDGPU::DS_WRITE2ST64_B32:
357 case AMDGPU::DS_WRITE2ST64_B64:
372 OffsetIsScalable =
false;
389 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
391 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
392 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
405 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
406 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
407 if (Offset0 + 1 != Offset1)
418 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
426 Offset = EltSize * Offset0;
428 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
429 if (DataOpIdx == -1) {
430 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
448 if (BaseOp && !BaseOp->
isFI())
456 if (SOffset->
isReg())
462 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
464 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
473 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
474 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
476 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
477 if (VAddr0Idx >= 0) {
479 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
486 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
501 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
518 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
520 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
537 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
545 if (MO1->getAddrSpace() != MO2->getAddrSpace())
548 const auto *Base1 = MO1->getValue();
549 const auto *Base2 = MO2->getValue();
550 if (!Base1 || !Base2)
558 return Base1 == Base2;
562 int64_t Offset1,
bool OffsetIsScalable1,
564 int64_t Offset2,
bool OffsetIsScalable2,
565 unsigned ClusterSize,
566 unsigned NumBytes)
const {
579 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
598 const unsigned LoadSize = NumBytes / ClusterSize;
599 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
600 return NumDWords <= MaxMemoryClusterDWords;
614 int64_t Offset0, int64_t Offset1,
615 unsigned NumLoads)
const {
616 assert(Offset1 > Offset0 &&
617 "Second offset should be larger than first offset!");
622 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
629 const char *Msg =
"illegal VGPR to SGPR copy") {
650 assert((
TII.getSubtarget().hasMAIInsts() &&
651 !
TII.getSubtarget().hasGFX90AInsts()) &&
652 "Expected GFX908 subtarget.");
655 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
656 "Source register of the copy should be either an SGPR or an AGPR.");
659 "Destination register of the copy should be an AGPR.");
668 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
671 if (!Def->modifiesRegister(SrcReg, &RI))
674 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
675 Def->getOperand(0).getReg() != SrcReg)
682 bool SafeToPropagate =
true;
685 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
686 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
687 SafeToPropagate =
false;
689 if (!SafeToPropagate)
692 for (
auto I = Def;
I !=
MI; ++
I)
693 I->clearRegisterKills(DefOp.
getReg(), &RI);
702 if (ImpUseSuperReg) {
703 Builder.addReg(ImpUseSuperReg,
721 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
724 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
725 "VGPR used for an intermediate copy should have been reserved.");
740 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
741 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
742 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
749 if (ImpUseSuperReg) {
750 UseBuilder.
addReg(ImpUseSuperReg,
771 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
772 int16_t SubIdx = BaseIndices[Idx];
773 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
774 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
775 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
776 unsigned Opcode = AMDGPU::S_MOV_B32;
779 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
780 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
781 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
785 DestSubReg = RI.getSubReg(DestReg, SubIdx);
786 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
787 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
788 Opcode = AMDGPU::S_MOV_B64;
803 assert(FirstMI && LastMI);
811 LastMI->addRegisterKilled(SrcReg, &RI);
817 Register SrcReg,
bool KillSrc,
bool RenamableDest,
818 bool RenamableSrc)
const {
820 unsigned Size = RI.getRegSizeInBits(*RC);
822 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (((
Size == 16) != (SrcSize == 16))) {
830 assert(ST.useRealTrue16Insts());
835 if (DestReg == SrcReg) {
841 RC = RI.getPhysRegBaseClass(DestReg);
842 Size = RI.getRegSizeInBits(*RC);
843 SrcRC = RI.getPhysRegBaseClass(SrcReg);
844 SrcSize = RI.getRegSizeInBits(*SrcRC);
848 if (RC == &AMDGPU::VGPR_32RegClass) {
850 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
851 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
852 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
853 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
859 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
860 RC == &AMDGPU::SReg_32RegClass) {
861 if (SrcReg == AMDGPU::SCC) {
868 if (DestReg == AMDGPU::VCC_LO) {
869 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
883 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
893 if (RC == &AMDGPU::SReg_64RegClass) {
894 if (SrcReg == AMDGPU::SCC) {
901 if (DestReg == AMDGPU::VCC) {
902 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
916 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
926 if (DestReg == AMDGPU::SCC) {
929 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
933 assert(ST.hasScalarCompareEq64());
947 if (RC == &AMDGPU::AGPR_32RegClass) {
948 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
949 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
955 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
964 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
971 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
972 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
974 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
975 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
976 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
977 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
980 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
981 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
994 if (IsAGPRDst || IsAGPRSrc) {
995 if (!DstLow || !SrcLow) {
997 "Cannot use hi16 subreg with an AGPR!");
1004 if (ST.useRealTrue16Insts()) {
1010 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1011 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1023 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1024 if (!DstLow || !SrcLow) {
1026 "Cannot use hi16 subreg on VI!");
1049 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1050 if (ST.hasMovB64()) {
1055 if (ST.hasPkMovB32()) {
1071 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1072 if (RI.isSGPRClass(RC)) {
1073 if (!RI.isSGPRClass(SrcRC)) {
1077 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1083 unsigned EltSize = 4;
1084 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1085 if (RI.isAGPRClass(RC)) {
1086 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1087 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1088 else if (RI.hasVGPRs(SrcRC) ||
1089 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1090 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1092 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1093 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1094 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1095 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1096 (RI.isProperlyAlignedRC(*RC) &&
1097 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1099 if (ST.hasMovB64()) {
1100 Opcode = AMDGPU::V_MOV_B64_e32;
1102 }
else if (ST.hasPkMovB32()) {
1103 Opcode = AMDGPU::V_PK_MOV_B32;
1113 std::unique_ptr<RegScavenger> RS;
1114 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1115 RS = std::make_unique<RegScavenger>();
1121 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1122 const bool CanKillSuperReg = KillSrc && !Overlap;
1124 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1127 SubIdx = SubIndices[Idx];
1129 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1130 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1131 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1132 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1134 bool IsFirstSubreg = Idx == 0;
1135 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1137 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1141 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1142 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1188 return &AMDGPU::VGPR_32RegClass;
1200 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1201 "Not a VGPR32 reg");
1203 if (
Cond.size() == 1) {
1204 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1213 }
else if (
Cond.size() == 2) {
1214 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1216 case SIInstrInfo::SCC_TRUE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::SCC_FALSE: {
1228 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1238 case SIInstrInfo::VCCNZ: {
1241 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1252 case SIInstrInfo::VCCZ: {
1255 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1266 case SIInstrInfo::EXECNZ: {
1267 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1268 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1279 case SIInstrInfo::EXECZ: {
1280 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1281 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1306 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1319 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1329 int64_t &ImmVal)
const {
1330 switch (
MI.getOpcode()) {
1331 case AMDGPU::V_MOV_B32_e32:
1332 case AMDGPU::S_MOV_B32:
1333 case AMDGPU::S_MOVK_I32:
1334 case AMDGPU::S_MOV_B64:
1335 case AMDGPU::V_MOV_B64_e32:
1336 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1337 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1338 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1339 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1340 case AMDGPU::V_MOV_B64_PSEUDO: {
1344 return MI.getOperand(0).getReg() == Reg;
1349 case AMDGPU::S_BREV_B32:
1350 case AMDGPU::V_BFREV_B32_e32:
1351 case AMDGPU::V_BFREV_B32_e64: {
1355 return MI.getOperand(0).getReg() == Reg;
1360 case AMDGPU::S_NOT_B32:
1361 case AMDGPU::V_NOT_B32_e32:
1362 case AMDGPU::V_NOT_B32_e64: {
1365 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1366 return MI.getOperand(0).getReg() == Reg;
1378 if (RI.isAGPRClass(DstRC))
1379 return AMDGPU::COPY;
1380 if (RI.getRegSizeInBits(*DstRC) == 16) {
1383 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1385 if (RI.getRegSizeInBits(*DstRC) == 32)
1386 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1387 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1388 return AMDGPU::S_MOV_B64;
1389 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1390 return AMDGPU::V_MOV_B64_PSEUDO;
1391 return AMDGPU::COPY;
1396 bool IsIndirectSrc)
const {
1397 if (IsIndirectSrc) {
1399 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1401 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1403 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1405 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1407 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1409 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1411 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1420 if (VecSize <= 1024)
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1427 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1429 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1431 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1433 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1435 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1437 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1439 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1448 if (VecSize <= 1024)
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1456 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1458 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1460 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1462 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1464 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1466 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1468 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1477 if (VecSize <= 1024)
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1485 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1487 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1489 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1491 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1493 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1495 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1497 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1506 if (VecSize <= 1024)
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1521 if (VecSize <= 1024)
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1529 bool IsSGPR)
const {
1541 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1548 return AMDGPU::SI_SPILL_S32_SAVE;
1550 return AMDGPU::SI_SPILL_S64_SAVE;
1552 return AMDGPU::SI_SPILL_S96_SAVE;
1554 return AMDGPU::SI_SPILL_S128_SAVE;
1556 return AMDGPU::SI_SPILL_S160_SAVE;
1558 return AMDGPU::SI_SPILL_S192_SAVE;
1560 return AMDGPU::SI_SPILL_S224_SAVE;
1562 return AMDGPU::SI_SPILL_S256_SAVE;
1564 return AMDGPU::SI_SPILL_S288_SAVE;
1566 return AMDGPU::SI_SPILL_S320_SAVE;
1568 return AMDGPU::SI_SPILL_S352_SAVE;
1570 return AMDGPU::SI_SPILL_S384_SAVE;
1572 return AMDGPU::SI_SPILL_S512_SAVE;
1574 return AMDGPU::SI_SPILL_S1024_SAVE;
1583 return AMDGPU::SI_SPILL_V16_SAVE;
1585 return AMDGPU::SI_SPILL_V32_SAVE;
1587 return AMDGPU::SI_SPILL_V64_SAVE;
1589 return AMDGPU::SI_SPILL_V96_SAVE;
1591 return AMDGPU::SI_SPILL_V128_SAVE;
1593 return AMDGPU::SI_SPILL_V160_SAVE;
1595 return AMDGPU::SI_SPILL_V192_SAVE;
1597 return AMDGPU::SI_SPILL_V224_SAVE;
1599 return AMDGPU::SI_SPILL_V256_SAVE;
1601 return AMDGPU::SI_SPILL_V288_SAVE;
1603 return AMDGPU::SI_SPILL_V320_SAVE;
1605 return AMDGPU::SI_SPILL_V352_SAVE;
1607 return AMDGPU::SI_SPILL_V384_SAVE;
1609 return AMDGPU::SI_SPILL_V512_SAVE;
1611 return AMDGPU::SI_SPILL_V1024_SAVE;
1620 return AMDGPU::SI_SPILL_AV32_SAVE;
1622 return AMDGPU::SI_SPILL_AV64_SAVE;
1624 return AMDGPU::SI_SPILL_AV96_SAVE;
1626 return AMDGPU::SI_SPILL_AV128_SAVE;
1628 return AMDGPU::SI_SPILL_AV160_SAVE;
1630 return AMDGPU::SI_SPILL_AV192_SAVE;
1632 return AMDGPU::SI_SPILL_AV224_SAVE;
1634 return AMDGPU::SI_SPILL_AV256_SAVE;
1636 return AMDGPU::SI_SPILL_AV288_SAVE;
1638 return AMDGPU::SI_SPILL_AV320_SAVE;
1640 return AMDGPU::SI_SPILL_AV352_SAVE;
1642 return AMDGPU::SI_SPILL_AV384_SAVE;
1644 return AMDGPU::SI_SPILL_AV512_SAVE;
1646 return AMDGPU::SI_SPILL_AV1024_SAVE;
1653 bool IsVectorSuperClass) {
1658 if (IsVectorSuperClass)
1659 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1661 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1667 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1674 if (ST.hasMAIInsts())
1694 FrameInfo.getObjectAlign(FrameIndex));
1695 unsigned SpillSize =
TRI->getSpillSize(*RC);
1698 if (RI.isSGPRClass(RC)) {
1700 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1701 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1702 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1710 if (SrcReg.
isVirtual() && SpillSize == 4) {
1711 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1720 if (RI.spillSGPRToVGPR())
1740 return AMDGPU::SI_SPILL_S32_RESTORE;
1742 return AMDGPU::SI_SPILL_S64_RESTORE;
1744 return AMDGPU::SI_SPILL_S96_RESTORE;
1746 return AMDGPU::SI_SPILL_S128_RESTORE;
1748 return AMDGPU::SI_SPILL_S160_RESTORE;
1750 return AMDGPU::SI_SPILL_S192_RESTORE;
1752 return AMDGPU::SI_SPILL_S224_RESTORE;
1754 return AMDGPU::SI_SPILL_S256_RESTORE;
1756 return AMDGPU::SI_SPILL_S288_RESTORE;
1758 return AMDGPU::SI_SPILL_S320_RESTORE;
1760 return AMDGPU::SI_SPILL_S352_RESTORE;
1762 return AMDGPU::SI_SPILL_S384_RESTORE;
1764 return AMDGPU::SI_SPILL_S512_RESTORE;
1766 return AMDGPU::SI_SPILL_S1024_RESTORE;
1775 return AMDGPU::SI_SPILL_V16_RESTORE;
1777 return AMDGPU::SI_SPILL_V32_RESTORE;
1779 return AMDGPU::SI_SPILL_V64_RESTORE;
1781 return AMDGPU::SI_SPILL_V96_RESTORE;
1783 return AMDGPU::SI_SPILL_V128_RESTORE;
1785 return AMDGPU::SI_SPILL_V160_RESTORE;
1787 return AMDGPU::SI_SPILL_V192_RESTORE;
1789 return AMDGPU::SI_SPILL_V224_RESTORE;
1791 return AMDGPU::SI_SPILL_V256_RESTORE;
1793 return AMDGPU::SI_SPILL_V288_RESTORE;
1795 return AMDGPU::SI_SPILL_V320_RESTORE;
1797 return AMDGPU::SI_SPILL_V352_RESTORE;
1799 return AMDGPU::SI_SPILL_V384_RESTORE;
1801 return AMDGPU::SI_SPILL_V512_RESTORE;
1803 return AMDGPU::SI_SPILL_V1024_RESTORE;
1812 return AMDGPU::SI_SPILL_AV32_RESTORE;
1814 return AMDGPU::SI_SPILL_AV64_RESTORE;
1816 return AMDGPU::SI_SPILL_AV96_RESTORE;
1818 return AMDGPU::SI_SPILL_AV128_RESTORE;
1820 return AMDGPU::SI_SPILL_AV160_RESTORE;
1822 return AMDGPU::SI_SPILL_AV192_RESTORE;
1824 return AMDGPU::SI_SPILL_AV224_RESTORE;
1826 return AMDGPU::SI_SPILL_AV256_RESTORE;
1828 return AMDGPU::SI_SPILL_AV288_RESTORE;
1830 return AMDGPU::SI_SPILL_AV320_RESTORE;
1832 return AMDGPU::SI_SPILL_AV352_RESTORE;
1834 return AMDGPU::SI_SPILL_AV384_RESTORE;
1836 return AMDGPU::SI_SPILL_AV512_RESTORE;
1838 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1845 bool IsVectorSuperClass) {
1850 if (IsVectorSuperClass)
1851 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1853 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1859 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1866 if (ST.hasMAIInsts())
1869 assert(!RI.isAGPRClass(RC));
1884 unsigned SpillSize =
TRI->getSpillSize(*RC);
1891 FrameInfo.getObjectAlign(FrameIndex));
1893 if (RI.isSGPRClass(RC)) {
1895 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1896 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1897 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1902 if (DestReg.
isVirtual() && SpillSize == 4) {
1904 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1907 if (RI.spillSGPRToVGPR())
1933 unsigned Quantity)
const {
1935 while (Quantity > 0) {
1936 unsigned Arg = std::min(Quantity, 8u);
1943 auto *MF =
MBB.getParent();
1946 assert(Info->isEntryFunction());
1948 if (
MBB.succ_empty()) {
1949 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1950 if (HasNoTerminator) {
1951 if (Info->returnsVoid()) {
1965 constexpr unsigned DoorbellIDMask = 0x3ff;
1966 constexpr unsigned ECQueueWaveAbort = 0x400;
1972 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1973 ContBB =
MBB.splitAt(
MI,
false);
1977 MBB.addSuccessor(TrapBB);
1984 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1988 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1991 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1992 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1996 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1997 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1998 .
addUse(DoorbellRegMasked)
1999 .
addImm(ECQueueWaveAbort);
2000 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2001 .
addUse(SetWaveAbortBit);
2004 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2019 switch (
MI.getOpcode()) {
2021 if (
MI.isMetaInstruction())
2026 return MI.getOperand(0).getImm() + 1;
2036 switch (
MI.getOpcode()) {
2038 case AMDGPU::S_MOV_B64_term:
2041 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2044 case AMDGPU::S_MOV_B32_term:
2047 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2050 case AMDGPU::S_XOR_B64_term:
2053 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2056 case AMDGPU::S_XOR_B32_term:
2059 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2061 case AMDGPU::S_OR_B64_term:
2064 MI.setDesc(
get(AMDGPU::S_OR_B64));
2066 case AMDGPU::S_OR_B32_term:
2069 MI.setDesc(
get(AMDGPU::S_OR_B32));
2072 case AMDGPU::S_ANDN2_B64_term:
2075 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2078 case AMDGPU::S_ANDN2_B32_term:
2081 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2084 case AMDGPU::S_AND_B64_term:
2087 MI.setDesc(
get(AMDGPU::S_AND_B64));
2090 case AMDGPU::S_AND_B32_term:
2093 MI.setDesc(
get(AMDGPU::S_AND_B32));
2096 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2099 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2102 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2105 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2108 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2109 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2112 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2113 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2114 MI.getMF()->getRegInfo().constrainRegClass(
MI.getOperand(0).getReg(),
2115 &AMDGPU::SReg_32_XM0RegClass);
2117 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2121 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2124 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2127 int64_t Imm =
MI.getOperand(1).getImm();
2129 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2130 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2137 MI.eraseFromParent();
2143 case AMDGPU::V_MOV_B64_PSEUDO: {
2145 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2146 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2151 if (ST.hasMovB64()) {
2152 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2157 if (
SrcOp.isImm()) {
2159 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2160 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2182 if (ST.hasPkMovB32() &&
2203 MI.eraseFromParent();
2206 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2210 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2214 if (ST.has64BitLiterals()) {
2215 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2221 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2226 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2227 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2229 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2230 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2237 MI.eraseFromParent();
2240 case AMDGPU::V_SET_INACTIVE_B32: {
2244 .
add(
MI.getOperand(3))
2245 .
add(
MI.getOperand(4))
2246 .
add(
MI.getOperand(1))
2247 .
add(
MI.getOperand(2))
2248 .
add(
MI.getOperand(5));
2249 MI.eraseFromParent();
2252 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2253 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2254 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2255 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2256 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2257 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2258 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2259 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2260 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2261 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2262 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2263 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2268 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2269 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2270 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2271 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2272 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2273 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2274 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2275 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2276 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2284 if (RI.hasVGPRs(EltRC)) {
2285 Opc = AMDGPU::V_MOVRELD_B32_e32;
2287 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2288 : AMDGPU::S_MOVRELD_B32;
2293 bool IsUndef =
MI.getOperand(1).isUndef();
2294 unsigned SubReg =
MI.getOperand(3).getImm();
2295 assert(VecReg ==
MI.getOperand(1).getReg());
2300 .
add(
MI.getOperand(2))
2304 const int ImpDefIdx =
2306 const int ImpUseIdx = ImpDefIdx + 1;
2308 MI.eraseFromParent();
2311 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2312 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2313 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2314 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2315 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2316 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2317 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2318 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2319 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2320 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2321 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2322 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2323 assert(ST.useVGPRIndexMode());
2325 bool IsUndef =
MI.getOperand(1).isUndef();
2334 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2338 .
add(
MI.getOperand(2))
2343 const int ImpDefIdx =
2345 const int ImpUseIdx = ImpDefIdx + 1;
2352 MI.eraseFromParent();
2355 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2356 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2357 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2358 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2359 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2360 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2361 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2362 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2363 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2364 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2365 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2366 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2367 assert(ST.useVGPRIndexMode());
2370 bool IsUndef =
MI.getOperand(1).isUndef();
2388 MI.eraseFromParent();
2391 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2394 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2395 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2414 if (ST.hasGetPCZeroExtension()) {
2418 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2425 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2435 MI.eraseFromParent();
2438 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2448 Op.setOffset(
Op.getOffset() + 4);
2450 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2454 MI.eraseFromParent();
2457 case AMDGPU::ENTER_STRICT_WWM: {
2463 case AMDGPU::ENTER_STRICT_WQM: {
2470 MI.eraseFromParent();
2473 case AMDGPU::EXIT_STRICT_WWM:
2474 case AMDGPU::EXIT_STRICT_WQM: {
2480 case AMDGPU::SI_RETURN: {
2494 MI.eraseFromParent();
2498 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2499 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2500 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2503 case AMDGPU::S_GETPC_B64_pseudo:
2504 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2505 if (ST.hasGetPCZeroExtension()) {
2507 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2516 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2517 assert(ST.hasBF16PackedInsts());
2518 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2542 case AMDGPU::S_LOAD_DWORDX16_IMM:
2543 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2556 for (
auto &CandMO :
I->operands()) {
2557 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2565 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2569 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2573 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2575 unsigned NewOpcode = -1;
2576 if (SubregSize == 256)
2577 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2578 else if (SubregSize == 128)
2579 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2586 MRI.setRegClass(DestReg, NewRC);
2589 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2594 MI->getOperand(0).setReg(DestReg);
2595 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2599 OffsetMO->
setImm(FinalOffset);
2605 MI->setMemRefs(*MF, NewMMOs);
2618std::pair<MachineInstr*, MachineInstr*>
2620 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2622 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2625 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2626 return std::pair(&
MI,
nullptr);
2637 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2639 if (Dst.isPhysical()) {
2640 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2643 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2647 for (
unsigned I = 1;
I <= 2; ++
I) {
2650 if (
SrcOp.isImm()) {
2652 Imm.ashrInPlace(Part * 32);
2653 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2657 if (Src.isPhysical())
2658 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2665 MovDPP.addImm(MO.getImm());
2667 Split[Part] = MovDPP;
2671 if (Dst.isVirtual())
2678 MI.eraseFromParent();
2679 return std::pair(Split[0], Split[1]);
2682std::optional<DestSourcePair>
2684 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2687 return std::nullopt;
2691 AMDGPU::OpName Src0OpName,
2693 AMDGPU::OpName Src1OpName)
const {
2700 "All commutable instructions have both src0 and src1 modifiers");
2702 int Src0ModsVal = Src0Mods->
getImm();
2703 int Src1ModsVal = Src1Mods->
getImm();
2705 Src1Mods->
setImm(Src0ModsVal);
2706 Src0Mods->
setImm(Src1ModsVal);
2715 bool IsKill = RegOp.
isKill();
2717 bool IsUndef = RegOp.
isUndef();
2718 bool IsDebug = RegOp.
isDebug();
2720 if (NonRegOp.
isImm())
2722 else if (NonRegOp.
isFI())
2743 int64_t NonRegVal = NonRegOp1.
getImm();
2746 NonRegOp2.
setImm(NonRegVal);
2753 unsigned OpIdx1)
const {
2758 unsigned Opc =
MI.getOpcode();
2759 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2769 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2772 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2777 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2783 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2798 unsigned Src1Idx)
const {
2799 assert(!NewMI &&
"this should never be used");
2801 unsigned Opc =
MI.getOpcode();
2803 if (CommutedOpcode == -1)
2806 if (Src0Idx > Src1Idx)
2809 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2810 static_cast<int>(Src0Idx) &&
2811 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2812 static_cast<int>(Src1Idx) &&
2813 "inconsistency with findCommutedOpIndices");
2838 Src1, AMDGPU::OpName::src1_modifiers);
2841 AMDGPU::OpName::src1_sel);
2853 unsigned &SrcOpIdx0,
2854 unsigned &SrcOpIdx1)
const {
2859 unsigned &SrcOpIdx0,
2860 unsigned &SrcOpIdx1)
const {
2861 if (!
Desc.isCommutable())
2864 unsigned Opc =
Desc.getOpcode();
2865 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2869 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2873 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2877 int64_t BrOffset)
const {
2894 return MI.getOperand(0).getMBB();
2899 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2900 MI.getOpcode() == AMDGPU::SI_LOOP)
2912 "new block should be inserted for expanding unconditional branch");
2915 "restore block should be inserted for restoring clobbered registers");
2923 if (ST.hasAddPC64Inst()) {
2925 MCCtx.createTempSymbol(
"offset",
true);
2929 MCCtx.createTempSymbol(
"post_addpc",
true);
2930 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2934 Offset->setVariableValue(OffsetExpr);
2938 assert(RS &&
"RegScavenger required for long branching");
2942 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2946 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2947 ST.hasVALUReadSGPRHazard();
2948 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2949 if (FlushSGPRWrites)
2957 ApplyHazardWorkarounds();
2960 MCCtx.createTempSymbol(
"post_getpc",
true);
2964 MCCtx.createTempSymbol(
"offset_lo",
true);
2966 MCCtx.createTempSymbol(
"offset_hi",
true);
2969 .
addReg(PCReg, 0, AMDGPU::sub0)
2973 .
addReg(PCReg, 0, AMDGPU::sub1)
2975 ApplyHazardWorkarounds();
3016 if (LongBranchReservedReg) {
3018 Scav = LongBranchReservedReg;
3027 MRI.replaceRegWith(PCReg, Scav);
3028 MRI.clearVirtRegs();
3034 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3035 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3036 MRI.clearVirtRegs();
3051unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3053 case SIInstrInfo::SCC_TRUE:
3054 return AMDGPU::S_CBRANCH_SCC1;
3055 case SIInstrInfo::SCC_FALSE:
3056 return AMDGPU::S_CBRANCH_SCC0;
3057 case SIInstrInfo::VCCNZ:
3058 return AMDGPU::S_CBRANCH_VCCNZ;
3059 case SIInstrInfo::VCCZ:
3060 return AMDGPU::S_CBRANCH_VCCZ;
3061 case SIInstrInfo::EXECNZ:
3062 return AMDGPU::S_CBRANCH_EXECNZ;
3063 case SIInstrInfo::EXECZ:
3064 return AMDGPU::S_CBRANCH_EXECZ;
3070SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3072 case AMDGPU::S_CBRANCH_SCC0:
3074 case AMDGPU::S_CBRANCH_SCC1:
3076 case AMDGPU::S_CBRANCH_VCCNZ:
3078 case AMDGPU::S_CBRANCH_VCCZ:
3080 case AMDGPU::S_CBRANCH_EXECNZ:
3082 case AMDGPU::S_CBRANCH_EXECZ:
3094 bool AllowModify)
const {
3095 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3097 TBB =
I->getOperand(0).getMBB();
3101 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3102 if (Pred == INVALID_BR)
3107 Cond.push_back(
I->getOperand(1));
3111 if (
I ==
MBB.end()) {
3117 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3119 FBB =
I->getOperand(0).getMBB();
3129 bool AllowModify)
const {
3137 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3138 switch (
I->getOpcode()) {
3139 case AMDGPU::S_MOV_B64_term:
3140 case AMDGPU::S_XOR_B64_term:
3141 case AMDGPU::S_OR_B64_term:
3142 case AMDGPU::S_ANDN2_B64_term:
3143 case AMDGPU::S_AND_B64_term:
3144 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3145 case AMDGPU::S_MOV_B32_term:
3146 case AMDGPU::S_XOR_B32_term:
3147 case AMDGPU::S_OR_B32_term:
3148 case AMDGPU::S_ANDN2_B32_term:
3149 case AMDGPU::S_AND_B32_term:
3150 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3153 case AMDGPU::SI_ELSE:
3154 case AMDGPU::SI_KILL_I1_TERMINATOR:
3155 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3172 int *BytesRemoved)
const {
3174 unsigned RemovedSize = 0;
3177 if (
MI.isBranch() ||
MI.isReturn()) {
3179 MI.eraseFromParent();
3185 *BytesRemoved = RemovedSize;
3202 int *BytesAdded)
const {
3203 if (!FBB &&
Cond.empty()) {
3207 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3214 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3226 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3244 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3251 if (
Cond.size() != 2) {
3255 if (
Cond[0].isImm()) {
3266 Register FalseReg,
int &CondCycles,
3267 int &TrueCycles,
int &FalseCycles)
const {
3273 if (
MRI.getRegClass(FalseReg) != RC)
3277 CondCycles = TrueCycles = FalseCycles = NumInsts;
3280 return RI.hasVGPRs(RC) && NumInsts <= 6;
3288 if (
MRI.getRegClass(FalseReg) != RC)
3294 if (NumInsts % 2 == 0)
3297 CondCycles = TrueCycles = FalseCycles = NumInsts;
3298 return RI.isSGPRClass(RC);
3309 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3310 if (Pred == VCCZ || Pred == SCC_FALSE) {
3311 Pred =
static_cast<BranchPredicate
>(-Pred);
3317 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3319 if (DstSize == 32) {
3321 if (Pred == SCC_TRUE) {
3336 if (DstSize == 64 && Pred == SCC_TRUE) {
3346 static const int16_t Sub0_15[] = {
3347 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3348 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3349 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3350 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3353 static const int16_t Sub0_15_64[] = {
3354 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3355 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3356 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3357 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3360 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3362 const int16_t *SubIndices = Sub0_15;
3363 int NElts = DstSize / 32;
3367 if (Pred == SCC_TRUE) {
3369 SelOp = AMDGPU::S_CSELECT_B32;
3370 EltRC = &AMDGPU::SGPR_32RegClass;
3372 SelOp = AMDGPU::S_CSELECT_B64;
3373 EltRC = &AMDGPU::SGPR_64RegClass;
3374 SubIndices = Sub0_15_64;
3380 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3385 for (
int Idx = 0; Idx != NElts; ++Idx) {
3386 Register DstElt =
MRI.createVirtualRegister(EltRC);
3389 unsigned SubIdx = SubIndices[Idx];
3392 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3395 .
addReg(FalseReg, 0, SubIdx)
3396 .
addReg(TrueReg, 0, SubIdx);
3400 .
addReg(TrueReg, 0, SubIdx)
3401 .
addReg(FalseReg, 0, SubIdx);
3413 switch (
MI.getOpcode()) {
3414 case AMDGPU::V_MOV_B16_t16_e32:
3415 case AMDGPU::V_MOV_B16_t16_e64:
3416 case AMDGPU::V_MOV_B32_e32:
3417 case AMDGPU::V_MOV_B32_e64:
3418 case AMDGPU::V_MOV_B64_PSEUDO:
3419 case AMDGPU::V_MOV_B64_e32:
3420 case AMDGPU::V_MOV_B64_e64:
3421 case AMDGPU::S_MOV_B32:
3422 case AMDGPU::S_MOV_B64:
3423 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3425 case AMDGPU::WWM_COPY:
3426 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3427 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3428 case AMDGPU::V_ACCVGPR_MOV_B32:
3429 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3430 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3438 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3439 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3440 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3443 unsigned Opc =
MI.getOpcode();
3445 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3447 MI.removeOperand(Idx);
3452 unsigned SubRegIndex) {
3453 switch (SubRegIndex) {
3454 case AMDGPU::NoSubRegister:
3464 case AMDGPU::sub1_lo16:
3466 case AMDGPU::sub1_hi16:
3469 return std::nullopt;
3477 case AMDGPU::V_MAC_F16_e32:
3478 case AMDGPU::V_MAC_F16_e64:
3479 case AMDGPU::V_MAD_F16_e64:
3480 return AMDGPU::V_MADAK_F16;
3481 case AMDGPU::V_MAC_F32_e32:
3482 case AMDGPU::V_MAC_F32_e64:
3483 case AMDGPU::V_MAD_F32_e64:
3484 return AMDGPU::V_MADAK_F32;
3485 case AMDGPU::V_FMAC_F32_e32:
3486 case AMDGPU::V_FMAC_F32_e64:
3487 case AMDGPU::V_FMA_F32_e64:
3488 return AMDGPU::V_FMAAK_F32;
3489 case AMDGPU::V_FMAC_F16_e32:
3490 case AMDGPU::V_FMAC_F16_e64:
3491 case AMDGPU::V_FMAC_F16_t16_e64:
3492 case AMDGPU::V_FMAC_F16_fake16_e64:
3493 case AMDGPU::V_FMA_F16_e64:
3494 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3495 ? AMDGPU::V_FMAAK_F16_t16
3496 : AMDGPU::V_FMAAK_F16_fake16
3497 : AMDGPU::V_FMAAK_F16;
3498 case AMDGPU::V_FMAC_F64_e32:
3499 case AMDGPU::V_FMAC_F64_e64:
3500 case AMDGPU::V_FMA_F64_e64:
3501 return AMDGPU::V_FMAAK_F64;
3509 case AMDGPU::V_MAC_F16_e32:
3510 case AMDGPU::V_MAC_F16_e64:
3511 case AMDGPU::V_MAD_F16_e64:
3512 return AMDGPU::V_MADMK_F16;
3513 case AMDGPU::V_MAC_F32_e32:
3514 case AMDGPU::V_MAC_F32_e64:
3515 case AMDGPU::V_MAD_F32_e64:
3516 return AMDGPU::V_MADMK_F32;
3517 case AMDGPU::V_FMAC_F32_e32:
3518 case AMDGPU::V_FMAC_F32_e64:
3519 case AMDGPU::V_FMA_F32_e64:
3520 return AMDGPU::V_FMAMK_F32;
3521 case AMDGPU::V_FMAC_F16_e32:
3522 case AMDGPU::V_FMAC_F16_e64:
3523 case AMDGPU::V_FMAC_F16_t16_e64:
3524 case AMDGPU::V_FMAC_F16_fake16_e64:
3525 case AMDGPU::V_FMA_F16_e64:
3526 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3527 ? AMDGPU::V_FMAMK_F16_t16
3528 : AMDGPU::V_FMAMK_F16_fake16
3529 : AMDGPU::V_FMAMK_F16;
3530 case AMDGPU::V_FMAC_F64_e32:
3531 case AMDGPU::V_FMAC_F64_e64:
3532 case AMDGPU::V_FMA_F64_e64:
3533 return AMDGPU::V_FMAMK_F64;
3545 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3547 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3550 if (
Opc == AMDGPU::COPY) {
3551 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3558 if (HasMultipleUses) {
3561 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3564 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3572 if (ImmDefSize == 32 &&
3577 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3578 RI.getSubRegIdxSize(UseSubReg) == 16;
3581 if (RI.hasVGPRs(DstRC))
3584 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3590 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3597 for (
unsigned MovOp :
3598 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3599 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3607 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3611 if (MovDstPhysReg) {
3615 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3622 if (MovDstPhysReg) {
3623 if (!MovDstRC->
contains(MovDstPhysReg))
3625 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3639 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3647 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3651 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3653 UseMI.getOperand(0).setReg(MovDstPhysReg);
3658 UseMI.setDesc(NewMCID);
3659 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3660 UseMI.addImplicitDefUseOperands(*MF);
3664 if (HasMultipleUses)
3667 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3668 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3669 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3670 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3671 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3672 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3673 Opc == AMDGPU::V_FMAC_F64_e64) {
3682 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3697 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3698 if (!RegSrc->
isReg())
3700 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3701 ST.getConstantBusLimit(
Opc) < 2)
3704 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3716 if (Def && Def->isMoveImmediate() &&
3727 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3728 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3738 unsigned SrcSubReg = RegSrc->
getSubReg();
3743 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3744 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3745 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3746 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3747 UseMI.untieRegOperand(
3748 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3755 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3757 DefMI.eraseFromParent();
3764 if (ST.getConstantBusLimit(
Opc) < 2) {
3767 bool Src0Inlined =
false;
3768 if (Src0->
isReg()) {
3773 if (Def && Def->isMoveImmediate() &&
3778 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3785 if (Src1->
isReg() && !Src0Inlined) {
3788 if (Def && Def->isMoveImmediate() &&
3792 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3805 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3806 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3812 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3813 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3814 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3815 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3816 UseMI.untieRegOperand(
3817 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3819 const std::optional<int64_t> SubRegImm =
3833 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3835 DefMI.eraseFromParent();
3847 if (BaseOps1.
size() != BaseOps2.
size())
3849 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3850 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3858 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3859 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3860 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3862 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3865bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3868 int64_t Offset0, Offset1;
3871 bool Offset0IsScalable, Offset1IsScalable;
3885 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3886 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3893 "MIa must load from or modify a memory location");
3895 "MIb must load from or modify a memory location");
3914 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3921 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3931 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3945 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3956 if (
Reg.isPhysical())
3958 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3960 Imm = Def->getOperand(1).getImm();
3980 unsigned NumOps =
MI.getNumOperands();
3983 if (
Op.isReg() &&
Op.isKill())
3991 case AMDGPU::V_MAC_F16_e32:
3992 case AMDGPU::V_MAC_F16_e64:
3993 return AMDGPU::V_MAD_F16_e64;
3994 case AMDGPU::V_MAC_F32_e32:
3995 case AMDGPU::V_MAC_F32_e64:
3996 return AMDGPU::V_MAD_F32_e64;
3997 case AMDGPU::V_MAC_LEGACY_F32_e32:
3998 case AMDGPU::V_MAC_LEGACY_F32_e64:
3999 return AMDGPU::V_MAD_LEGACY_F32_e64;
4000 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4001 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4002 return AMDGPU::V_FMA_LEGACY_F32_e64;
4003 case AMDGPU::V_FMAC_F16_e32:
4004 case AMDGPU::V_FMAC_F16_e64:
4005 case AMDGPU::V_FMAC_F16_t16_e64:
4006 case AMDGPU::V_FMAC_F16_fake16_e64:
4007 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4008 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4009 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4010 : AMDGPU::V_FMA_F16_gfx9_e64;
4011 case AMDGPU::V_FMAC_F32_e32:
4012 case AMDGPU::V_FMAC_F32_e64:
4013 return AMDGPU::V_FMA_F32_e64;
4014 case AMDGPU::V_FMAC_F64_e32:
4015 case AMDGPU::V_FMAC_F64_e64:
4016 return AMDGPU::V_FMA_F64_e64;
4026 unsigned Opc =
MI.getOpcode();
4030 if (NewMFMAOpc != -1) {
4033 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4034 MIB.
add(
MI.getOperand(
I));
4040 if (Def.isEarlyClobber() && Def.isReg() &&
4045 auto UpdateDefIndex = [&](
LiveRange &LR) {
4046 auto *S = LR.find(OldIndex);
4047 if (S != LR.end() && S->start == OldIndex) {
4048 assert(S->valno && S->valno->def == OldIndex);
4049 S->start = NewIndex;
4050 S->valno->def = NewIndex;
4054 for (
auto &SR : LI.subranges())
4065 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4075 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4076 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4077 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4081 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4082 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4083 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4084 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4085 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4086 bool Src0Literal =
false;
4091 case AMDGPU::V_MAC_F16_e64:
4092 case AMDGPU::V_FMAC_F16_e64:
4093 case AMDGPU::V_FMAC_F16_t16_e64:
4094 case AMDGPU::V_FMAC_F16_fake16_e64:
4095 case AMDGPU::V_MAC_F32_e64:
4096 case AMDGPU::V_MAC_LEGACY_F32_e64:
4097 case AMDGPU::V_FMAC_F32_e64:
4098 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4099 case AMDGPU::V_FMAC_F64_e64:
4101 case AMDGPU::V_MAC_F16_e32:
4102 case AMDGPU::V_FMAC_F16_e32:
4103 case AMDGPU::V_MAC_F32_e32:
4104 case AMDGPU::V_MAC_LEGACY_F32_e32:
4105 case AMDGPU::V_FMAC_F32_e32:
4106 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4107 case AMDGPU::V_FMAC_F64_e32: {
4108 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4109 AMDGPU::OpName::src0);
4136 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4137 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4139 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4140 !RI.isSGPRReg(
MBB.getParent()->getRegInfo(), Src0->
getReg()))) {
4142 const auto killDef = [&]() ->
void {
4147 if (
MRI.hasOneNonDBGUse(DefReg)) {
4149 DefMI->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4150 DefMI->getOperand(0).setIsDead(
true);
4151 for (
unsigned I =
DefMI->getNumOperands() - 1;
I != 0; --
I)
4164 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4166 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4167 MIOp.setIsUndef(
true);
4168 MIOp.setReg(DummyReg);
4217 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4238 if (Src0Literal && !ST.hasVOP3Literal())
4258 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4269 switch (
MI.getOpcode()) {
4270 case AMDGPU::S_SET_GPR_IDX_ON:
4271 case AMDGPU::S_SET_GPR_IDX_MODE:
4272 case AMDGPU::S_SET_GPR_IDX_OFF:
4290 if (
MI.isTerminator() ||
MI.isPosition())
4294 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4297 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4303 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4304 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4305 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4306 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4307 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4312 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4313 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4314 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4322 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4331 if (
MI.memoperands_empty())
4336 unsigned AS = Memop->getAddrSpace();
4337 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4338 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4339 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4340 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4354 unsigned Opcode =
MI.getOpcode();
4369 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4370 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4371 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4374 if (
MI.isCall() ||
MI.isInlineAsm())
4390 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4391 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4392 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4393 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4401 if (
MI.isMetaInstruction())
4405 if (
MI.isCopyLike()) {
4406 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4410 return MI.readsRegister(AMDGPU::EXEC, &RI);
4421 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4425 switch (Imm.getBitWidth()) {
4431 ST.hasInv2PiInlineImm());
4434 ST.hasInv2PiInlineImm());
4436 return ST.has16BitInsts() &&
4438 ST.hasInv2PiInlineImm());
4445 APInt IntImm = Imm.bitcastToAPInt();
4447 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4455 return ST.has16BitInsts() &&
4458 return ST.has16BitInsts() &&
4468 switch (OperandType) {
4478 int32_t Trunc =
static_cast<int32_t
>(Imm);
4518 int16_t Trunc =
static_cast<int16_t
>(Imm);
4519 return ST.has16BitInsts() &&
4528 int16_t Trunc =
static_cast<int16_t
>(Imm);
4529 return ST.has16BitInsts() &&
4580 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4586 return ST.hasVOP3Literal();
4590 int64_t ImmVal)
const {
4593 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4594 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4595 AMDGPU::OpName::src2))
4597 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4609 "unexpected imm-like operand kind");
4622 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4640 AMDGPU::OpName
OpName)
const {
4642 return Mods && Mods->
getImm();
4655 switch (
MI.getOpcode()) {
4656 default:
return false;
4658 case AMDGPU::V_ADDC_U32_e64:
4659 case AMDGPU::V_SUBB_U32_e64:
4660 case AMDGPU::V_SUBBREV_U32_e64: {
4668 case AMDGPU::V_MAC_F16_e64:
4669 case AMDGPU::V_MAC_F32_e64:
4670 case AMDGPU::V_MAC_LEGACY_F32_e64:
4671 case AMDGPU::V_FMAC_F16_e64:
4672 case AMDGPU::V_FMAC_F16_t16_e64:
4673 case AMDGPU::V_FMAC_F16_fake16_e64:
4674 case AMDGPU::V_FMAC_F32_e64:
4675 case AMDGPU::V_FMAC_F64_e64:
4676 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4682 case AMDGPU::V_CNDMASK_B32_e64:
4688 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4718 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4727 unsigned Op32)
const {
4741 Inst32.
add(
MI.getOperand(
I));
4745 int Idx =
MI.getNumExplicitDefs();
4747 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4752 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4774 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4782 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4785 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4786 AMDGPU::SReg_64RegClass.contains(Reg);
4792 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4804 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4814 switch (MO.getReg()) {
4816 case AMDGPU::VCC_LO:
4817 case AMDGPU::VCC_HI:
4819 case AMDGPU::FLAT_SCR:
4832 switch (
MI.getOpcode()) {
4833 case AMDGPU::V_READLANE_B32:
4834 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4835 case AMDGPU::V_WRITELANE_B32:
4836 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4843 if (
MI.isPreISelOpcode() ||
4844 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4859 if (
SubReg.getReg().isPhysical())
4862 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4873 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
4874 ErrInfo =
"illegal copy from vector register to SGPR";
4892 if (!
MRI.isSSA() &&
MI.isCopy())
4893 return verifyCopy(
MI,
MRI, ErrInfo);
4895 if (SIInstrInfo::isGenericOpcode(Opcode))
4898 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4899 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4900 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4902 if (Src0Idx == -1) {
4904 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4905 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4906 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4907 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4912 if (!
Desc.isVariadic() &&
4913 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4914 ErrInfo =
"Instruction has wrong number of operands.";
4918 if (
MI.isInlineAsm()) {
4931 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4932 ErrInfo =
"inlineasm operand has incorrect register class.";
4940 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4941 ErrInfo =
"missing memory operand from image instruction.";
4946 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4949 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4950 "all fp values to integers.";
4954 int RegClass =
Desc.operands()[i].RegClass;
4957 switch (OpInfo.OperandType) {
4959 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4960 ErrInfo =
"Illegal immediate value for operand.";
4994 ErrInfo =
"Illegal immediate value for operand.";
5001 ErrInfo =
"Expected inline constant for operand.";
5016 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5017 ErrInfo =
"Expected immediate, but got non-immediate";
5026 if (OpInfo.isGenericType())
5041 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5043 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5045 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5046 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5053 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5054 ErrInfo =
"Subtarget requires even aligned vector registers";
5059 if (RegClass != -1) {
5060 if (Reg.isVirtual())
5065 ErrInfo =
"Operand has incorrect register class.";
5073 if (!ST.hasSDWA()) {
5074 ErrInfo =
"SDWA is not supported on this target";
5078 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5079 AMDGPU::OpName::dst_sel}) {
5083 int64_t Imm = MO->
getImm();
5085 ErrInfo =
"Invalid SDWA selection";
5090 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5092 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5097 if (!ST.hasSDWAScalar()) {
5099 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5100 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5107 "Only reg allowed as operands in SDWA instructions on GFX9+";
5113 if (!ST.hasSDWAOmod()) {
5116 if (OMod !=
nullptr &&
5118 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5123 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5124 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5125 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5126 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5129 unsigned Mods = Src0ModsMO->
getImm();
5132 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5138 if (
isVOPC(BasicOpcode)) {
5139 if (!ST.hasSDWASdst() && DstIdx != -1) {
5142 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5143 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5146 }
else if (!ST.hasSDWAOutModsVOPC()) {
5149 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5150 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5156 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5157 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5164 if (DstUnused && DstUnused->isImm() &&
5167 if (!Dst.isReg() || !Dst.isTied()) {
5168 ErrInfo =
"Dst register should have tied register";
5173 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5176 "Dst register should be tied to implicit use of preserved register";
5180 ErrInfo =
"Dst register should use same physical register as preserved";
5187 if (
isImage(Opcode) && !
MI.mayStore()) {
5199 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5207 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5211 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5212 if (RegCount > DstSize) {
5213 ErrInfo =
"Image instruction returns too many registers for dst "
5222 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5223 unsigned ConstantBusCount = 0;
5224 bool UsesLiteral =
false;
5227 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5231 LiteralVal = &
MI.getOperand(ImmIdx);
5240 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5251 }
else if (!MO.
isFI()) {
5258 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5268 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5269 return !RI.regsOverlap(SGPRUsed, SGPR);
5278 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5279 Opcode != AMDGPU::V_WRITELANE_B32) {
5280 ErrInfo =
"VOP* instruction violates constant bus restriction";
5284 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5285 ErrInfo =
"VOP3 instruction uses literal";
5292 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5293 unsigned SGPRCount = 0;
5296 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5304 if (MO.
getReg() != SGPRUsed)
5309 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5310 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5317 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5318 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5325 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5335 ErrInfo =
"ABS not allowed in VOP3B instructions";
5348 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5355 if (
Desc.isBranch()) {
5357 ErrInfo =
"invalid branch target for SOPK instruction";
5364 ErrInfo =
"invalid immediate for SOPK instruction";
5369 ErrInfo =
"invalid immediate for SOPK instruction";
5376 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5377 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5378 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5379 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5380 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5381 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5383 const unsigned StaticNumOps =
5384 Desc.getNumOperands() +
Desc.implicit_uses().size();
5385 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5390 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5391 ErrInfo =
"missing implicit register operands";
5397 if (!Dst->isUse()) {
5398 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5403 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5404 UseOpIdx != StaticNumOps + 1) {
5405 ErrInfo =
"movrel implicit operands should be tied";
5412 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5414 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5415 ErrInfo =
"src0 should be subreg of implicit vector use";
5423 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5424 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5430 if (
MI.mayStore() &&
5435 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5436 ErrInfo =
"scalar stores must use m0 as offset register";
5442 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5444 if (
Offset->getImm() != 0) {
5445 ErrInfo =
"subtarget does not support offsets in flat instructions";
5450 if (
isDS(
MI) && !ST.hasGDS()) {
5452 if (GDSOp && GDSOp->
getImm() != 0) {
5453 ErrInfo =
"GDS is not supported on this subtarget";
5461 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5462 AMDGPU::OpName::vaddr0);
5463 AMDGPU::OpName RSrcOpName =
5464 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5465 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5473 ErrInfo =
"dim is out of range";
5478 if (ST.hasR128A16()) {
5480 IsA16 = R128A16->
getImm() != 0;
5481 }
else if (ST.hasA16()) {
5483 IsA16 = A16->
getImm() != 0;
5486 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5488 unsigned AddrWords =
5491 unsigned VAddrWords;
5493 VAddrWords = RsrcIdx - VAddr0Idx;
5494 if (ST.hasPartialNSAEncoding() &&
5496 unsigned LastVAddrIdx = RsrcIdx - 1;
5497 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5505 if (VAddrWords != AddrWords) {
5507 <<
" but got " << VAddrWords <<
"\n");
5508 ErrInfo =
"bad vaddr size";
5518 unsigned DC = DppCt->
getImm();
5519 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5520 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5521 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5522 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5523 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5524 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5525 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5526 ErrInfo =
"Invalid dpp_ctrl value";
5529 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5531 ErrInfo =
"Invalid dpp_ctrl value: "
5532 "wavefront shifts are not supported on GFX10+";
5535 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5537 ErrInfo =
"Invalid dpp_ctrl value: "
5538 "broadcasts are not supported on GFX10+";
5541 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5543 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5544 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5545 !ST.hasGFX90AInsts()) {
5546 ErrInfo =
"Invalid dpp_ctrl value: "
5547 "row_newbroadcast/row_share is not supported before "
5551 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5552 ErrInfo =
"Invalid dpp_ctrl value: "
5553 "row_share and row_xmask are not supported before GFX10";
5558 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5561 ErrInfo =
"Invalid dpp_ctrl value: "
5562 "DP ALU dpp only support row_newbcast";
5569 AMDGPU::OpName DataName =
5570 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5576 if (ST.hasGFX90AInsts()) {
5577 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5578 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5579 ErrInfo =
"Invalid register class: "
5580 "vdata and vdst should be both VGPR or AGPR";
5583 if (
Data && Data2 &&
5585 ErrInfo =
"Invalid register class: "
5586 "both data operands should be VGPR or AGPR";
5590 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5592 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5593 ErrInfo =
"Invalid register class: "
5594 "agpr loads and stores not supported on this GPU";
5600 if (ST.needsAlignedVGPRs()) {
5601 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5606 if (Reg.isPhysical())
5607 return !(RI.getHWRegIndex(Reg) & 1);
5609 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5610 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5613 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5614 Opcode == AMDGPU::DS_GWS_BARRIER) {
5616 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5617 ErrInfo =
"Subtarget requires even aligned vector registers "
5618 "for DS_GWS instructions";
5624 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5625 ErrInfo =
"Subtarget requires even aligned vector registers "
5626 "for vaddr operand of image instructions";
5632 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5634 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5635 ErrInfo =
"Invalid register class: "
5636 "v_accvgpr_write with an SGPR is not supported on this GPU";
5641 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5644 ErrInfo =
"pseudo expects only physical SGPRs";
5651 if (!ST.hasScaleOffset()) {
5652 ErrInfo =
"Subtarget does not support offset scaling";
5656 ErrInfo =
"Instruction does not support offset scaling";
5665 for (
unsigned I = 0;
I < 3; ++
I) {
5678 switch (
MI.getOpcode()) {
5679 default:
return AMDGPU::INSTRUCTION_LIST_END;
5680 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5681 case AMDGPU::COPY:
return AMDGPU::COPY;
5682 case AMDGPU::PHI:
return AMDGPU::PHI;
5683 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5684 case AMDGPU::WQM:
return AMDGPU::WQM;
5685 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5686 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5687 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5688 case AMDGPU::S_MOV_B32: {
5690 return MI.getOperand(1).isReg() ||
5691 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5692 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5694 case AMDGPU::S_ADD_I32:
5695 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5696 case AMDGPU::S_ADDC_U32:
5697 return AMDGPU::V_ADDC_U32_e32;
5698 case AMDGPU::S_SUB_I32:
5699 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5702 case AMDGPU::S_ADD_U32:
5703 return AMDGPU::V_ADD_CO_U32_e32;
5704 case AMDGPU::S_SUB_U32:
5705 return AMDGPU::V_SUB_CO_U32_e32;
5706 case AMDGPU::S_ADD_U64_PSEUDO:
5707 return AMDGPU::V_ADD_U64_PSEUDO;
5708 case AMDGPU::S_SUB_U64_PSEUDO:
5709 return AMDGPU::V_SUB_U64_PSEUDO;
5710 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5711 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5712 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5713 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5714 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5715 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5716 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5717 case AMDGPU::S_XNOR_B32:
5718 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5719 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5720 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5721 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5722 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5723 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5724 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5725 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5726 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5727 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5728 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5729 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5730 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5731 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5732 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5733 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5734 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5735 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5736 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5737 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5738 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5739 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5740 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5741 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5742 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5743 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5744 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5745 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5746 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5747 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5748 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5749 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5750 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5751 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5752 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5753 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5754 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5755 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5756 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5757 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5758 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5759 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5760 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5761 case AMDGPU::S_CVT_F32_F16:
5762 case AMDGPU::S_CVT_HI_F32_F16:
5763 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5764 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5765 case AMDGPU::S_CVT_F16_F32:
5766 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5767 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5768 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5769 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5770 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5771 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5772 case AMDGPU::S_CEIL_F16:
5773 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5774 : AMDGPU::V_CEIL_F16_fake16_e64;
5775 case AMDGPU::S_FLOOR_F16:
5776 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5777 : AMDGPU::V_FLOOR_F16_fake16_e64;
5778 case AMDGPU::S_TRUNC_F16:
5779 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5780 : AMDGPU::V_TRUNC_F16_fake16_e64;
5781 case AMDGPU::S_RNDNE_F16:
5782 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5783 : AMDGPU::V_RNDNE_F16_fake16_e64;
5784 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5785 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5786 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5787 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5788 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5789 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5790 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5791 case AMDGPU::S_ADD_F16:
5792 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5793 : AMDGPU::V_ADD_F16_fake16_e64;
5794 case AMDGPU::S_SUB_F16:
5795 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5796 : AMDGPU::V_SUB_F16_fake16_e64;
5797 case AMDGPU::S_MIN_F16:
5798 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5799 : AMDGPU::V_MIN_F16_fake16_e64;
5800 case AMDGPU::S_MAX_F16:
5801 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5802 : AMDGPU::V_MAX_F16_fake16_e64;
5803 case AMDGPU::S_MINIMUM_F16:
5804 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5805 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5806 case AMDGPU::S_MAXIMUM_F16:
5807 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5808 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5809 case AMDGPU::S_MUL_F16:
5810 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5811 : AMDGPU::V_MUL_F16_fake16_e64;
5812 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5813 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5814 case AMDGPU::S_FMAC_F16:
5815 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5816 : AMDGPU::V_FMAC_F16_fake16_e64;
5817 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5818 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5819 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5820 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5821 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5822 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5823 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5824 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5825 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5826 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5827 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5828 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5829 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5830 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5831 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5832 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5833 case AMDGPU::S_CMP_LT_F16:
5834 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5835 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5836 case AMDGPU::S_CMP_EQ_F16:
5837 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5838 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5839 case AMDGPU::S_CMP_LE_F16:
5840 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5841 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5842 case AMDGPU::S_CMP_GT_F16:
5843 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5844 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5845 case AMDGPU::S_CMP_LG_F16:
5846 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5847 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5848 case AMDGPU::S_CMP_GE_F16:
5849 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5850 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5851 case AMDGPU::S_CMP_O_F16:
5852 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5853 : AMDGPU::V_CMP_O_F16_fake16_e64;
5854 case AMDGPU::S_CMP_U_F16:
5855 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5856 : AMDGPU::V_CMP_U_F16_fake16_e64;
5857 case AMDGPU::S_CMP_NGE_F16:
5858 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5859 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5860 case AMDGPU::S_CMP_NLG_F16:
5861 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5862 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5863 case AMDGPU::S_CMP_NGT_F16:
5864 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5865 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5866 case AMDGPU::S_CMP_NLE_F16:
5867 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5868 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5869 case AMDGPU::S_CMP_NEQ_F16:
5870 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5871 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5872 case AMDGPU::S_CMP_NLT_F16:
5873 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5874 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5875 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5876 case AMDGPU::V_S_EXP_F16_e64:
5877 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5878 : AMDGPU::V_EXP_F16_fake16_e64;
5879 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5880 case AMDGPU::V_S_LOG_F16_e64:
5881 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5882 : AMDGPU::V_LOG_F16_fake16_e64;
5883 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5884 case AMDGPU::V_S_RCP_F16_e64:
5885 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5886 : AMDGPU::V_RCP_F16_fake16_e64;
5887 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5888 case AMDGPU::V_S_RSQ_F16_e64:
5889 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5890 : AMDGPU::V_RSQ_F16_fake16_e64;
5891 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5892 case AMDGPU::V_S_SQRT_F16_e64:
5893 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5894 : AMDGPU::V_SQRT_F16_fake16_e64;
5897 "Unexpected scalar opcode without corresponding vector one!");
5946 "Not a whole wave func");
5949 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
5950 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
5961 case AMDGPU::AV_32RegClassID:
5962 RCID = AMDGPU::VGPR_32RegClassID;
5964 case AMDGPU::AV_64RegClassID:
5965 RCID = AMDGPU::VReg_64RegClassID;
5967 case AMDGPU::AV_96RegClassID:
5968 RCID = AMDGPU::VReg_96RegClassID;
5970 case AMDGPU::AV_128RegClassID:
5971 RCID = AMDGPU::VReg_128RegClassID;
5973 case AMDGPU::AV_160RegClassID:
5974 RCID = AMDGPU::VReg_160RegClassID;
5976 case AMDGPU::AV_512RegClassID:
5977 RCID = AMDGPU::VReg_512RegClassID;
5992 auto RegClass = TID.
operands()[OpNum].RegClass;
5995 return RI.getRegClass(RegClass);
6001 unsigned OpNo)
const {
6003 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6004 Desc.operands()[OpNo].RegClass == -1) {
6007 if (Reg.isVirtual()) {
6009 MI.getParent()->getParent()->getRegInfo();
6010 return MRI.getRegClass(Reg);
6012 return RI.getPhysRegBaseClass(Reg);
6015 unsigned RCID =
Desc.operands()[OpNo].RegClass;
6024 unsigned RCID =
get(
MI.getOpcode()).operands()[
OpIdx].RegClass;
6026 unsigned Size = RI.getRegSizeInBits(*RC);
6027 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6028 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6029 : AMDGPU::V_MOV_B32_e32;
6031 Opcode = AMDGPU::COPY;
6032 else if (RI.isSGPRClass(RC))
6033 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6047 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6053 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6064 if (SubIdx == AMDGPU::sub0)
6066 if (SubIdx == AMDGPU::sub1)
6078void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6094 if (Reg.isPhysical())
6104 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6107 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6114 unsigned Opc =
MI.getOpcode();
6120 constexpr const AMDGPU::OpName OpNames[] = {
6121 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6124 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6125 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6135 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6136 if (IsAGPR && !ST.hasMAIInsts())
6138 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6142 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6143 const int DataIdx = AMDGPU::getNamedOperandIdx(
6144 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6145 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6146 MI.getOperand(DataIdx).isReg() &&
6147 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6149 if ((
int)
OpIdx == DataIdx) {
6150 if (VDstIdx != -1 &&
6151 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6154 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6155 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6156 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6161 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6162 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6182 constexpr const unsigned NumOps = 3;
6183 constexpr const AMDGPU::OpName OpNames[
NumOps * 2] = {
6184 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6185 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6186 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6191 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6194 MO = &
MI.getOperand(SrcIdx);
6201 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6205 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6209 return !OpSel && !OpSelHi;
6219 OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) :
nullptr;
6228 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6229 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6233 if (!LiteralLimit--)
6243 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6251 if (--ConstantBusLimit <= 0)
6263 if (!LiteralLimit--)
6265 if (--ConstantBusLimit <= 0)
6271 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6275 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6277 !
Op.isIdenticalTo(*MO))
6287 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6301 bool Is64BitOp = Is64BitFPOp ||
6308 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6317 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6336 unsigned Opc =
MI.getOpcode();
6339 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6342 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6348 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6355 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6358 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6364 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6381 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6382 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6383 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6395 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6397 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6409 if (HasImplicitSGPR || !
MI.isCommutable()) {
6426 if (CommutedOpc == -1) {
6431 MI.setDesc(
get(CommutedOpc));
6435 bool Src0Kill = Src0.
isKill();
6439 else if (Src1.
isReg()) {
6454 unsigned Opc =
MI.getOpcode();
6457 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6458 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6459 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6462 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6463 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6464 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6465 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6466 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6467 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6468 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6472 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6473 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6478 if (VOP3Idx[2] != -1) {
6480 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6481 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6490 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6491 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6493 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6495 SGPRsUsed.
insert(SGPRReg);
6499 for (
int Idx : VOP3Idx) {
6508 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6520 if (RI.hasAGPRs(RI.getRegClassForReg(
MRI, MO.
getReg())) &&
6526 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6533 if (ConstantBusLimit > 0) {
6545 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6546 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6553 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::scale_src0);
6554 if (ScaleSrc0Idx != -1) {
6556 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::scale_src1);
6567 for (
unsigned I = 0;
I < 3; ++
I) {
6580 SRC = RI.getCommonSubClass(SRC, DstRC);
6583 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6585 if (RI.hasAGPRs(VRC)) {
6586 VRC = RI.getEquivalentVGPRClass(VRC);
6587 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6589 get(TargetOpcode::COPY), NewSrcReg)
6596 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6602 for (
unsigned i = 0; i < SubRegs; ++i) {
6603 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6605 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6606 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6612 get(AMDGPU::REG_SEQUENCE), DstReg);
6613 for (
unsigned i = 0; i < SubRegs; ++i) {
6615 MIB.
addImm(RI.getSubRegFromChannel(i));
6628 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6630 SBase->setReg(SGPR);
6633 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6641 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6642 if (OldSAddrIdx < 0)
6658 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6659 if (NewVAddrIdx < 0)
6662 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6666 if (OldVAddrIdx >= 0) {
6668 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6680 if (OldVAddrIdx == NewVAddrIdx) {
6683 MRI.removeRegOperandFromUseList(&NewVAddr);
6684 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6688 MRI.removeRegOperandFromUseList(&NewVAddr);
6689 MRI.addRegOperandToUseList(&NewVAddr);
6691 assert(OldSAddrIdx == NewVAddrIdx);
6693 if (OldVAddrIdx >= 0) {
6694 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6695 AMDGPU::OpName::vdst_in);
6699 if (NewVDstIn != -1) {
6700 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6706 if (NewVDstIn != -1) {
6707 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6728 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6748 unsigned OpSubReg =
Op.getSubReg();
6751 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6757 Register DstReg =
MRI.createVirtualRegister(DstRC);
6767 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6770 bool ImpDef = Def->isImplicitDef();
6771 while (!ImpDef && Def && Def->isCopy()) {
6772 if (Def->getOperand(1).getReg().isPhysical())
6774 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6775 ImpDef = Def && Def->isImplicitDef();
6777 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6796 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6802 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6803 unsigned NumSubRegs =
RegSize / 32;
6804 Register VScalarOp = ScalarOp->getReg();
6806 if (NumSubRegs == 1) {
6807 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6809 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6812 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6814 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6820 CondReg = NewCondReg;
6822 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6830 ScalarOp->setReg(CurReg);
6831 ScalarOp->setIsKill();
6835 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6836 "Unhandled register size");
6838 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6840 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6842 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6845 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6846 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6849 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6850 .
addReg(VScalarOp, VScalarOpUndef,
6851 TRI->getSubRegFromChannel(Idx + 1));
6857 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6858 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6864 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6865 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6868 if (NumSubRegs <= 2)
6869 Cmp.addReg(VScalarOp);
6871 Cmp.addReg(VScalarOp, VScalarOpUndef,
6872 TRI->getSubRegFromChannel(Idx, 2));
6876 CondReg = NewCondReg;
6878 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6886 const auto *SScalarOpRC =
6887 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6888 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6892 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6893 unsigned Channel = 0;
6894 for (
Register Piece : ReadlanePieces) {
6895 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6899 ScalarOp->setReg(SScalarOp);
6900 ScalarOp->setIsKill();
6904 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6905 MRI.setSimpleHint(SaveExec, CondReg);
6936 if (!Begin.isValid())
6938 if (!End.isValid()) {
6944 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6952 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
6953 std::numeric_limits<unsigned>::max()) !=
6956 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6962 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6971 for (
auto I = Begin;
I != AfterMI;
I++) {
6972 for (
auto &MO :
I->all_uses())
6973 MRI.clearKillFlags(MO.getReg());
6998 MBB.addSuccessor(LoopBB);
7008 for (
auto &Succ : RemainderBB->
successors()) {
7032static std::tuple<unsigned, unsigned>
7040 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7041 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7044 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7045 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7046 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7047 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7048 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7065 .
addImm(AMDGPU::sub0_sub1)
7071 return std::tuple(RsrcPtr, NewSRsrc);
7108 if (
MI.getOpcode() == AMDGPU::PHI) {
7110 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7111 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7114 MRI.getRegClass(
MI.getOperand(i).getReg());
7115 if (RI.hasVectorRegisters(OpRC)) {
7129 VRC = &AMDGPU::VReg_1RegClass;
7132 ? RI.getEquivalentAGPRClass(SRC)
7133 : RI.getEquivalentVGPRClass(SRC);
7136 ? RI.getEquivalentAGPRClass(VRC)
7137 : RI.getEquivalentVGPRClass(VRC);
7145 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7147 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7163 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7166 if (RI.hasVGPRs(DstRC)) {
7170 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7172 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7190 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7195 if (DstRC != Src0RC) {
7204 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7206 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7212 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7213 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7214 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7215 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7216 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7217 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7218 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7220 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7233 ? AMDGPU::OpName::rsrc
7234 : AMDGPU::OpName::srsrc;
7236 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7239 AMDGPU::OpName SampOpName =
7240 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7242 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7249 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7251 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7255 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7256 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7261 while (Start->getOpcode() != FrameSetupOpcode)
7264 while (End->getOpcode() != FrameDestroyOpcode)
7268 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7269 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7277 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7279 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7281 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7291 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7292 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7293 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7294 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7296 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7303 bool isSoffsetLegal =
true;
7305 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7306 if (SoffsetIdx != -1) {
7309 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7310 isSoffsetLegal =
false;
7314 bool isRsrcLegal =
true;
7316 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7317 if (RsrcIdx != -1) {
7320 isRsrcLegal =
false;
7324 if (isRsrcLegal && isSoffsetLegal)
7348 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7349 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7350 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7352 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7353 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7354 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7356 unsigned RsrcPtr, NewSRsrc;
7363 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7370 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7384 }
else if (!VAddr && ST.hasAddr64()) {
7388 "FIXME: Need to emit flat atomics here");
7390 unsigned RsrcPtr, NewSRsrc;
7393 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7416 MIB.
addImm(CPol->getImm());
7421 MIB.
addImm(TFE->getImm());
7441 MI.removeFromParent();
7446 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7448 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7452 if (!isSoffsetLegal) {
7464 if (!isSoffsetLegal) {
7476 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7477 if (RsrcIdx != -1) {
7478 DeferredList.insert(
MI);
7483 return DeferredList.contains(
MI);
7493 if (!ST.useRealTrue16Insts())
7496 unsigned Opcode =
MI.getOpcode();
7500 OpIdx >=
get(Opcode).getNumOperands() ||
7501 get(Opcode).operands()[
OpIdx].RegClass == -1)
7505 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7509 if (!RI.isVGPRClass(CurrRC))
7512 unsigned RCID =
get(Opcode).operands()[
OpIdx].RegClass;
7514 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7515 Op.setSubReg(AMDGPU::lo16);
7516 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7518 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7519 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7526 Op.setReg(NewDstReg);
7538 while (!Worklist.
empty()) {
7552 "Deferred MachineInstr are not supposed to re-populate worklist");
7570 case AMDGPU::S_ADD_I32:
7571 case AMDGPU::S_SUB_I32: {
7575 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7583 case AMDGPU::S_MUL_U64:
7584 if (ST.hasVectorMulU64()) {
7585 NewOpcode = AMDGPU::V_MUL_U64_e64;
7589 splitScalarSMulU64(Worklist, Inst, MDT);
7593 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7594 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7597 splitScalarSMulPseudo(Worklist, Inst, MDT);
7601 case AMDGPU::S_AND_B64:
7602 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7606 case AMDGPU::S_OR_B64:
7607 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7611 case AMDGPU::S_XOR_B64:
7612 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7616 case AMDGPU::S_NAND_B64:
7617 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7621 case AMDGPU::S_NOR_B64:
7622 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7626 case AMDGPU::S_XNOR_B64:
7627 if (ST.hasDLInsts())
7628 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7630 splitScalar64BitXnor(Worklist, Inst, MDT);
7634 case AMDGPU::S_ANDN2_B64:
7635 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7639 case AMDGPU::S_ORN2_B64:
7640 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7644 case AMDGPU::S_BREV_B64:
7645 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7649 case AMDGPU::S_NOT_B64:
7650 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7654 case AMDGPU::S_BCNT1_I32_B64:
7655 splitScalar64BitBCNT(Worklist, Inst);
7659 case AMDGPU::S_BFE_I64:
7660 splitScalar64BitBFE(Worklist, Inst);
7664 case AMDGPU::S_FLBIT_I32_B64:
7665 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7668 case AMDGPU::S_FF1_I32_B64:
7669 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7673 case AMDGPU::S_LSHL_B32:
7674 if (ST.hasOnlyRevVALUShifts()) {
7675 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7679 case AMDGPU::S_ASHR_I32:
7680 if (ST.hasOnlyRevVALUShifts()) {
7681 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7685 case AMDGPU::S_LSHR_B32:
7686 if (ST.hasOnlyRevVALUShifts()) {
7687 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7691 case AMDGPU::S_LSHL_B64:
7692 if (ST.hasOnlyRevVALUShifts()) {
7694 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7695 : AMDGPU::V_LSHLREV_B64_e64;
7699 case AMDGPU::S_ASHR_I64:
7700 if (ST.hasOnlyRevVALUShifts()) {
7701 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7705 case AMDGPU::S_LSHR_B64:
7706 if (ST.hasOnlyRevVALUShifts()) {
7707 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7712 case AMDGPU::S_ABS_I32:
7713 lowerScalarAbs(Worklist, Inst);
7717 case AMDGPU::S_CBRANCH_SCC0:
7718 case AMDGPU::S_CBRANCH_SCC1: {
7721 bool IsSCC = CondReg == AMDGPU::SCC;
7729 case AMDGPU::S_BFE_U64:
7730 case AMDGPU::S_BFM_B64:
7733 case AMDGPU::S_PACK_LL_B32_B16:
7734 case AMDGPU::S_PACK_LH_B32_B16:
7735 case AMDGPU::S_PACK_HL_B32_B16:
7736 case AMDGPU::S_PACK_HH_B32_B16:
7737 movePackToVALU(Worklist,
MRI, Inst);
7741 case AMDGPU::S_XNOR_B32:
7742 lowerScalarXnor(Worklist, Inst);
7746 case AMDGPU::S_NAND_B32:
7747 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7751 case AMDGPU::S_NOR_B32:
7752 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7756 case AMDGPU::S_ANDN2_B32:
7757 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7761 case AMDGPU::S_ORN2_B32:
7762 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7770 case AMDGPU::S_ADD_CO_PSEUDO:
7771 case AMDGPU::S_SUB_CO_PSEUDO: {
7772 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7773 ? AMDGPU::V_ADDC_U32_e64
7774 : AMDGPU::V_SUBB_U32_e64;
7775 const auto *CarryRC = RI.getWaveMaskRegClass();
7778 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7779 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7786 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7797 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7801 case AMDGPU::S_UADDO_PSEUDO:
7802 case AMDGPU::S_USUBO_PSEUDO: {
7809 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7810 ? AMDGPU::V_ADD_CO_U32_e64
7811 : AMDGPU::V_SUB_CO_U32_e64;
7813 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7814 Register DestReg =
MRI.createVirtualRegister(NewRC);
7822 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7829 case AMDGPU::S_CSELECT_B32:
7830 case AMDGPU::S_CSELECT_B64:
7831 lowerSelect(Worklist, Inst, MDT);
7834 case AMDGPU::S_CMP_EQ_I32:
7835 case AMDGPU::S_CMP_LG_I32:
7836 case AMDGPU::S_CMP_GT_I32:
7837 case AMDGPU::S_CMP_GE_I32:
7838 case AMDGPU::S_CMP_LT_I32:
7839 case AMDGPU::S_CMP_LE_I32:
7840 case AMDGPU::S_CMP_EQ_U32:
7841 case AMDGPU::S_CMP_LG_U32:
7842 case AMDGPU::S_CMP_GT_U32:
7843 case AMDGPU::S_CMP_GE_U32:
7844 case AMDGPU::S_CMP_LT_U32:
7845 case AMDGPU::S_CMP_LE_U32:
7846 case AMDGPU::S_CMP_EQ_U64:
7847 case AMDGPU::S_CMP_LG_U64:
7848 case AMDGPU::S_CMP_LT_F32:
7849 case AMDGPU::S_CMP_EQ_F32:
7850 case AMDGPU::S_CMP_LE_F32:
7851 case AMDGPU::S_CMP_GT_F32:
7852 case AMDGPU::S_CMP_LG_F32:
7853 case AMDGPU::S_CMP_GE_F32:
7854 case AMDGPU::S_CMP_O_F32:
7855 case AMDGPU::S_CMP_U_F32:
7856 case AMDGPU::S_CMP_NGE_F32:
7857 case AMDGPU::S_CMP_NLG_F32:
7858 case AMDGPU::S_CMP_NGT_F32:
7859 case AMDGPU::S_CMP_NLE_F32:
7860 case AMDGPU::S_CMP_NEQ_F32:
7861 case AMDGPU::S_CMP_NLT_F32: {
7862 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7866 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7880 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7884 case AMDGPU::S_CMP_LT_F16:
7885 case AMDGPU::S_CMP_EQ_F16:
7886 case AMDGPU::S_CMP_LE_F16:
7887 case AMDGPU::S_CMP_GT_F16:
7888 case AMDGPU::S_CMP_LG_F16:
7889 case AMDGPU::S_CMP_GE_F16:
7890 case AMDGPU::S_CMP_O_F16:
7891 case AMDGPU::S_CMP_U_F16:
7892 case AMDGPU::S_CMP_NGE_F16:
7893 case AMDGPU::S_CMP_NLG_F16:
7894 case AMDGPU::S_CMP_NGT_F16:
7895 case AMDGPU::S_CMP_NLE_F16:
7896 case AMDGPU::S_CMP_NEQ_F16:
7897 case AMDGPU::S_CMP_NLT_F16: {
7898 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7920 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7924 case AMDGPU::S_CVT_HI_F32_F16: {
7926 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7927 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7928 if (ST.useRealTrue16Insts()) {
7933 .
addReg(TmpReg, 0, AMDGPU::hi16)
7949 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7953 case AMDGPU::S_MINIMUM_F32:
7954 case AMDGPU::S_MAXIMUM_F32: {
7956 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7967 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7971 case AMDGPU::S_MINIMUM_F16:
7972 case AMDGPU::S_MAXIMUM_F16: {
7974 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
7975 ? &AMDGPU::VGPR_16RegClass
7976 : &AMDGPU::VGPR_32RegClass);
7988 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7992 case AMDGPU::V_S_EXP_F16_e64:
7993 case AMDGPU::V_S_LOG_F16_e64:
7994 case AMDGPU::V_S_RCP_F16_e64:
7995 case AMDGPU::V_S_RSQ_F16_e64:
7996 case AMDGPU::V_S_SQRT_F16_e64: {
7998 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
7999 ? &AMDGPU::VGPR_16RegClass
8000 : &AMDGPU::VGPR_32RegClass);
8012 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8018 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8026 if (NewOpcode == Opcode) {
8035 if (
MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
8037 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
8041 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8043 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8061 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8063 MRI.replaceRegWith(DstReg, NewDstReg);
8064 MRI.clearKillFlags(NewDstReg);
8078 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8082 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8083 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8084 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8086 get(AMDGPU::IMPLICIT_DEF), Undef);
8088 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8094 MRI.replaceRegWith(DstReg, NewDstReg);
8095 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8097 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8100 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8101 MRI.replaceRegWith(DstReg, NewDstReg);
8102 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8107 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8108 MRI.replaceRegWith(DstReg, NewDstReg);
8110 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8120 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8121 AMDGPU::OpName::src0_modifiers) >= 0)
8125 NewInstr->addOperand(Src);
8128 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8131 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8133 NewInstr.addImm(
Size);
8134 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8138 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8143 "Scalar BFE is only implemented for constant width and offset");
8151 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8152 AMDGPU::OpName::src1_modifiers) >= 0)
8154 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8156 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8157 AMDGPU::OpName::src2_modifiers) >= 0)
8159 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8161 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8163 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8165 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8171 NewInstr->addOperand(
Op);
8178 if (
Op.getReg() == AMDGPU::SCC) {
8180 if (
Op.isDef() && !
Op.isDead())
8181 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8183 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8188 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8189 Register DstReg = NewInstr->getOperand(0).getReg();
8194 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8195 MRI.replaceRegWith(DstReg, NewDstReg);
8204 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8208std::pair<bool, MachineBasicBlock *>
8220 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8223 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8225 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8226 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8234 MRI.replaceRegWith(OldDstReg, ResultReg);
8237 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8238 return std::pair(
true, NewBB);
8241 return std::pair(
false,
nullptr);
8258 bool IsSCC = (CondReg == AMDGPU::SCC);
8266 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8272 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8273 NewCondReg =
MRI.createVirtualRegister(TC);
8277 bool CopyFound =
false;
8278 for (MachineInstr &CandI :
8281 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8283 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8285 .
addReg(CandI.getOperand(1).getReg());
8297 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8305 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8306 MachineInstr *NewInst;
8307 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8308 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8321 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8323 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8335 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8336 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8338 unsigned SubOp = ST.hasAddNoCarry() ?
8339 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8349 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8350 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8364 if (ST.hasDLInsts()) {
8365 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8373 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8374 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8380 bool Src0IsSGPR = Src0.
isReg() &&
8381 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8382 bool Src1IsSGPR = Src1.
isReg() &&
8383 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8385 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8386 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8396 }
else if (Src1IsSGPR) {
8410 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8414 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8420 unsigned Opcode)
const {
8430 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8431 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8443 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8444 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8449 unsigned Opcode)
const {
8459 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8460 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8472 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8473 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8488 const MCInstrDesc &InstDesc =
get(Opcode);
8489 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8491 &AMDGPU::SGPR_32RegClass;
8493 const TargetRegisterClass *Src0SubRC =
8494 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8497 AMDGPU::sub0, Src0SubRC);
8499 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8500 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8501 const TargetRegisterClass *NewDestSubRC =
8502 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8504 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8505 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8508 AMDGPU::sub1, Src0SubRC);
8510 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8511 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8516 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8523 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8525 Worklist.
insert(&LoHalf);
8526 Worklist.
insert(&HiHalf);
8532 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8543 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8544 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8545 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8553 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8554 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8555 const TargetRegisterClass *Src0SubRC =
8556 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8557 if (RI.isSGPRClass(Src0SubRC))
8558 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8559 const TargetRegisterClass *Src1SubRC =
8560 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8561 if (RI.isSGPRClass(Src1SubRC))
8562 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8566 MachineOperand Op0L =
8568 MachineOperand Op1L =
8570 MachineOperand Op0H =
8572 MachineOperand Op1H =
8590 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8591 MachineInstr *Op1L_Op0H =
8596 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8597 MachineInstr *Op1H_Op0L =
8602 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8603 MachineInstr *Carry =
8608 MachineInstr *LoHalf =
8613 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8618 MachineInstr *HiHalf =
8629 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8641 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8652 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8653 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8654 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8662 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8663 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8664 const TargetRegisterClass *Src0SubRC =
8665 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8666 if (RI.isSGPRClass(Src0SubRC))
8667 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8668 const TargetRegisterClass *Src1SubRC =
8669 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8670 if (RI.isSGPRClass(Src1SubRC))
8671 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8675 MachineOperand Op0L =
8677 MachineOperand Op1L =
8681 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8682 ? AMDGPU::V_MUL_HI_U32_e64
8683 : AMDGPU::V_MUL_HI_I32_e64;
8684 MachineInstr *HiHalf =
8687 MachineInstr *LoHalf =
8698 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8706 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8722 const MCInstrDesc &InstDesc =
get(Opcode);
8723 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8725 &AMDGPU::SGPR_32RegClass;
8727 const TargetRegisterClass *Src0SubRC =
8728 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8729 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8731 &AMDGPU::SGPR_32RegClass;
8733 const TargetRegisterClass *Src1SubRC =
8734 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8737 AMDGPU::sub0, Src0SubRC);
8739 AMDGPU::sub0, Src1SubRC);
8741 AMDGPU::sub1, Src0SubRC);
8743 AMDGPU::sub1, Src1SubRC);
8745 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8746 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8747 const TargetRegisterClass *NewDestSubRC =
8748 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8750 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8751 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8755 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8756 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8760 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8767 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8769 Worklist.
insert(&LoHalf);
8770 Worklist.
insert(&HiHalf);
8773 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8789 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8791 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8793 MachineOperand* Op0;
8794 MachineOperand* Op1;
8807 Register NewDest =
MRI.createVirtualRegister(DestRC);
8813 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8829 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
8830 const TargetRegisterClass *SrcRC = Src.isReg() ?
8831 MRI.getRegClass(Src.getReg()) :
8832 &AMDGPU::SGPR_32RegClass;
8834 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8835 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8837 const TargetRegisterClass *SrcSubRC =
8838 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8841 AMDGPU::sub0, SrcSubRC);
8843 AMDGPU::sub1, SrcSubRC);
8849 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8853 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8872 Offset == 0 &&
"Not implemented");
8875 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8876 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8877 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8894 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8895 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8900 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8901 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8905 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8908 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8913 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8914 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8933 const MCInstrDesc &InstDesc =
get(Opcode);
8935 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8936 unsigned OpcodeAdd =
8937 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8939 const TargetRegisterClass *SrcRC =
8940 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8941 const TargetRegisterClass *SrcSubRC =
8942 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8944 MachineOperand SrcRegSub0 =
8946 MachineOperand SrcRegSub1 =
8949 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8950 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8951 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8952 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8959 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8965 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8967 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8969 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8972void SIInstrInfo::addUsersToMoveToVALUWorklist(
8976 MachineInstr &
UseMI = *MO.getParent();
8980 switch (
UseMI.getOpcode()) {
8983 case AMDGPU::SOFT_WQM:
8984 case AMDGPU::STRICT_WWM:
8985 case AMDGPU::STRICT_WQM:
8986 case AMDGPU::REG_SEQUENCE:
8988 case AMDGPU::INSERT_SUBREG:
8991 OpNo = MO.getOperandNo();
9006 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9013 case AMDGPU::S_PACK_LL_B32_B16: {
9014 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9015 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9032 case AMDGPU::S_PACK_LH_B32_B16: {
9033 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9042 case AMDGPU::S_PACK_HL_B32_B16: {
9043 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9053 case AMDGPU::S_PACK_HH_B32_B16: {
9054 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9055 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9072 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9073 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9082 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9083 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9084 SmallVector<MachineInstr *, 4> CopyToDelete;
9087 for (MachineInstr &
MI :
9091 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9094 MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9095 Register DestReg =
MI.getOperand(0).getReg();
9097 MRI.replaceRegWith(DestReg, NewCond);
9102 MI.getOperand(SCCIdx).setReg(NewCond);
9108 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9111 for (
auto &Copy : CopyToDelete)
9112 Copy->eraseFromParent();
9120void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9126 for (MachineInstr &
MI :
9129 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9131 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9140 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9148 case AMDGPU::REG_SEQUENCE:
9149 case AMDGPU::INSERT_SUBREG:
9151 case AMDGPU::SOFT_WQM:
9152 case AMDGPU::STRICT_WWM:
9153 case AMDGPU::STRICT_WQM: {
9155 if (RI.isAGPRClass(SrcRC)) {
9156 if (RI.isAGPRClass(NewDstRC))
9161 case AMDGPU::REG_SEQUENCE:
9162 case AMDGPU::INSERT_SUBREG:
9163 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9166 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9172 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9175 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9189 int OpIndices[3])
const {
9190 const MCInstrDesc &
Desc =
MI.getDesc();
9206 const MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9208 for (
unsigned i = 0; i < 3; ++i) {
9209 int Idx = OpIndices[i];
9213 const MachineOperand &MO =
MI.getOperand(Idx);
9219 const TargetRegisterClass *OpRC =
9220 RI.getRegClass(
Desc.operands()[Idx].RegClass);
9221 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9227 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9228 if (RI.isSGPRClass(RegRC))
9246 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9247 SGPRReg = UsedSGPRs[0];
9250 if (!SGPRReg && UsedSGPRs[1]) {
9251 if (UsedSGPRs[1] == UsedSGPRs[2])
9252 SGPRReg = UsedSGPRs[1];
9259 AMDGPU::OpName OperandName)
const {
9260 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9263 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9267 return &
MI.getOperand(Idx);
9281 if (ST.isAmdHsaOS()) {
9284 RsrcDataFormat |= (1ULL << 56);
9289 RsrcDataFormat |= (2ULL << 59);
9292 return RsrcDataFormat;
9302 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9307 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9314 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9320 unsigned Opc =
MI.getOpcode();
9326 return get(
Opc).mayLoad() &&
9331 int &FrameIndex)
const {
9333 if (!Addr || !Addr->
isFI())
9344 int &FrameIndex)
const {
9352 int &FrameIndex)
const {
9366 int &FrameIndex)
const {
9383 while (++
I != E &&
I->isInsideBundle()) {
9384 assert(!
I->isBundle() &&
"No nested bundle!");
9392 unsigned Opc =
MI.getOpcode();
9394 unsigned DescSize =
Desc.getSize();
9399 unsigned Size = DescSize;
9403 if (
MI.isBranch() && ST.hasOffset3fBug())
9414 bool HasLiteral =
false;
9415 unsigned LiteralSize = 4;
9416 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9421 if (ST.has64BitLiterals()) {
9422 switch (OpInfo.OperandType) {
9438 return HasLiteral ? DescSize + LiteralSize : DescSize;
9443 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9447 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9448 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9452 case TargetOpcode::BUNDLE:
9454 case TargetOpcode::INLINEASM:
9455 case TargetOpcode::INLINEASM_BR: {
9457 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9461 if (
MI.isMetaInstruction())
9465 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9468 unsigned LoInstOpcode = D16Info->LoOp;
9470 DescSize =
Desc.getSize();
9481 if (
MI.memoperands_empty())
9493 static const std::pair<int, const char *> TargetIndices[] = {
9531std::pair<unsigned, unsigned>
9538 static const std::pair<unsigned, const char *> TargetFlags[] = {
9556 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9571 return AMDGPU::WWM_COPY;
9573 return AMDGPU::COPY;
9585 bool IsNullOrVectorRegister =
true;
9588 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9593 return IsNullOrVectorRegister &&
9595 (Opcode == AMDGPU::IMPLICIT_DEF &&
9597 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9598 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9606 if (ST.hasAddNoCarry())
9610 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9611 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9622 if (ST.hasAddNoCarry())
9629 *RI.getBoolRC(),
I,
false,
9642 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9643 case AMDGPU::SI_KILL_I1_TERMINATOR:
9652 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9653 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9654 case AMDGPU::SI_KILL_I1_PSEUDO:
9655 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9667 const unsigned OffsetBits =
9669 return (1 << OffsetBits) - 1;
9676 if (
MI.isInlineAsm())
9679 for (
auto &
Op :
MI.implicit_operands()) {
9680 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9681 Op.setReg(AMDGPU::VCC_LO);
9690 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9694 const auto RCID =
MI.getDesc().operands()[Idx].RegClass;
9695 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9712 if (Imm <= MaxImm + 64) {
9714 Overflow = Imm - MaxImm;
9741 if (ST.hasRestrictedSOffset())
9784 if (!ST.hasFlatInstOffsets())
9792 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9804std::pair<int64_t, int64_t>
9807 int64_t RemainderOffset = COffsetVal;
9808 int64_t ImmField = 0;
9813 if (AllowNegative) {
9815 int64_t
D = 1LL << NumBits;
9816 RemainderOffset = (COffsetVal /
D) *
D;
9817 ImmField = COffsetVal - RemainderOffset;
9819 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9821 (ImmField % 4) != 0) {
9823 RemainderOffset += ImmField % 4;
9824 ImmField -= ImmField % 4;
9826 }
else if (COffsetVal >= 0) {
9828 RemainderOffset = COffsetVal - ImmField;
9832 assert(RemainderOffset + ImmField == COffsetVal);
9833 return {ImmField, RemainderOffset};
9837 if (ST.hasNegativeScratchOffsetBug() &&
9845 switch (ST.getGeneration()) {
9871 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9872 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9873 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9874 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9875 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9876 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9877 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9878 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9885#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9886 case OPCODE##_dpp: \
9887 case OPCODE##_e32: \
9888 case OPCODE##_e64: \
9889 case OPCODE##_e64_dpp: \
9904 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9905 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9906 case AMDGPU::V_FMA_F16_gfx9_e64:
9907 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9908 case AMDGPU::V_INTERP_P2_F16:
9909 case AMDGPU::V_MAD_F16_e64:
9910 case AMDGPU::V_MAD_U16_e64:
9911 case AMDGPU::V_MAD_I16_e64:
9933 switch (ST.getGeneration()) {
9946 if (
isMAI(Opcode)) {
9954 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
9961 if (ST.hasGFX90AInsts()) {
9963 if (ST.hasGFX940Insts())
9994 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9995 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9996 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10008 switch (
MI.getOpcode()) {
10010 case AMDGPU::REG_SEQUENCE:
10014 case AMDGPU::INSERT_SUBREG:
10015 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10032 if (!
P.Reg.isVirtual())
10036 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10037 while (
auto *
MI = DefInst) {
10039 switch (
MI->getOpcode()) {
10041 case AMDGPU::V_MOV_B32_e32: {
10042 auto &Op1 =
MI->getOperand(1);
10047 DefInst =
MRI.getVRegDef(RSR.Reg);
10055 DefInst =
MRI.getVRegDef(RSR.Reg);
10068 assert(
MRI.isSSA() &&
"Must be run on SSA");
10070 auto *
TRI =
MRI.getTargetRegisterInfo();
10071 auto *DefBB =
DefMI.getParent();
10075 if (
UseMI.getParent() != DefBB)
10078 const int MaxInstScan = 20;
10082 auto E =
UseMI.getIterator();
10083 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10084 if (
I->isDebugInstr())
10087 if (++NumInst > MaxInstScan)
10090 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10100 assert(
MRI.isSSA() &&
"Must be run on SSA");
10102 auto *
TRI =
MRI.getTargetRegisterInfo();
10103 auto *DefBB =
DefMI.getParent();
10105 const int MaxUseScan = 10;
10108 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10109 auto &UseInst = *
Use.getParent();
10112 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10115 if (++NumUse > MaxUseScan)
10122 const int MaxInstScan = 20;
10126 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10129 if (
I->isDebugInstr())
10132 if (++NumInst > MaxInstScan)
10145 if (Reg == VReg && --NumUse == 0)
10147 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10156 auto Cur =
MBB.begin();
10157 if (Cur !=
MBB.end())
10159 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10162 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10171 if (InsPt !=
MBB.end() &&
10172 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10173 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10174 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10175 InsPt->definesRegister(Src,
nullptr)) {
10179 .
addReg(Src, 0, SrcSubReg)
10204 if (isFullCopyInstr(
MI)) {
10205 Register DstReg =
MI.getOperand(0).getReg();
10206 Register SrcReg =
MI.getOperand(1).getReg();
10213 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10217 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10228 unsigned *PredCost)
const {
10229 if (
MI.isBundle()) {
10232 unsigned Lat = 0,
Count = 0;
10233 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10235 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10237 return Lat +
Count - 1;
10240 return SchedModel.computeInstrLatency(&
MI);
10246 unsigned Opcode =
MI.getOpcode();
10251 :
MI.getOperand(1).getReg();
10252 LLT DstTy =
MRI.getType(Dst);
10253 LLT SrcTy =
MRI.getType(Src);
10255 unsigned SrcAS = SrcTy.getAddressSpace();
10258 ST.hasGloballyAddressableScratch()
10266 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10267 return HandleAddrSpaceCast(
MI);
10270 auto IID = GI->getIntrinsicID();
10277 case Intrinsic::amdgcn_addrspacecast_nonnull:
10278 return HandleAddrSpaceCast(
MI);
10279 case Intrinsic::amdgcn_if:
10280 case Intrinsic::amdgcn_else:
10294 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10295 Opcode == AMDGPU::G_SEXTLOAD) {
10296 if (
MI.memoperands_empty())
10300 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10301 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10309 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10310 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10311 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10324 unsigned opcode =
MI.getOpcode();
10325 if (opcode == AMDGPU::V_READLANE_B32 ||
10326 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10327 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10330 if (isCopyInstr(
MI)) {
10334 RI.getPhysRegBaseClass(srcOp.
getReg());
10342 if (
MI.isPreISelOpcode())
10357 if (
MI.memoperands_empty())
10361 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10362 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10377 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10379 if (!
SrcOp.isReg())
10383 if (!Reg || !
SrcOp.readsReg())
10389 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10416 F,
"ds_ordered_count unsupported for this calling conv"));
10430 Register &SrcReg2, int64_t &CmpMask,
10431 int64_t &CmpValue)
const {
10432 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10435 switch (
MI.getOpcode()) {
10438 case AMDGPU::S_CMP_EQ_U32:
10439 case AMDGPU::S_CMP_EQ_I32:
10440 case AMDGPU::S_CMP_LG_U32:
10441 case AMDGPU::S_CMP_LG_I32:
10442 case AMDGPU::S_CMP_LT_U32:
10443 case AMDGPU::S_CMP_LT_I32:
10444 case AMDGPU::S_CMP_GT_U32:
10445 case AMDGPU::S_CMP_GT_I32:
10446 case AMDGPU::S_CMP_LE_U32:
10447 case AMDGPU::S_CMP_LE_I32:
10448 case AMDGPU::S_CMP_GE_U32:
10449 case AMDGPU::S_CMP_GE_I32:
10450 case AMDGPU::S_CMP_EQ_U64:
10451 case AMDGPU::S_CMP_LG_U64:
10452 SrcReg =
MI.getOperand(0).getReg();
10453 if (
MI.getOperand(1).isReg()) {
10454 if (
MI.getOperand(1).getSubReg())
10456 SrcReg2 =
MI.getOperand(1).getReg();
10458 }
else if (
MI.getOperand(1).isImm()) {
10460 CmpValue =
MI.getOperand(1).getImm();
10466 case AMDGPU::S_CMPK_EQ_U32:
10467 case AMDGPU::S_CMPK_EQ_I32:
10468 case AMDGPU::S_CMPK_LG_U32:
10469 case AMDGPU::S_CMPK_LG_I32:
10470 case AMDGPU::S_CMPK_LT_U32:
10471 case AMDGPU::S_CMPK_LT_I32:
10472 case AMDGPU::S_CMPK_GT_U32:
10473 case AMDGPU::S_CMPK_GT_I32:
10474 case AMDGPU::S_CMPK_LE_U32:
10475 case AMDGPU::S_CMPK_LE_I32:
10476 case AMDGPU::S_CMPK_GE_U32:
10477 case AMDGPU::S_CMPK_GE_I32:
10478 SrcReg =
MI.getOperand(0).getReg();
10480 CmpValue =
MI.getOperand(1).getImm();
10489 Register SrcReg2, int64_t CmpMask,
10498 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10499 this](int64_t ExpectedValue,
unsigned SrcSize,
10500 bool IsReversible,
bool IsSigned) ->
bool {
10525 if (!Def || Def->getParent() != CmpInstr.
getParent())
10528 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10529 Def->getOpcode() != AMDGPU::S_AND_B64)
10533 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10544 SrcOp = &Def->getOperand(2);
10545 else if (isMask(&Def->getOperand(2)))
10546 SrcOp = &Def->getOperand(1);
10554 if (IsSigned && BitNo == SrcSize - 1)
10557 ExpectedValue <<= BitNo;
10559 bool IsReversedCC =
false;
10560 if (CmpValue != ExpectedValue) {
10563 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10568 Register DefReg = Def->getOperand(0).getReg();
10569 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10572 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10574 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10575 I->killsRegister(AMDGPU::SCC, &RI))
10580 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10584 if (!
MRI->use_nodbg_empty(DefReg)) {
10592 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10593 : AMDGPU::S_BITCMP1_B32
10594 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10595 : AMDGPU::S_BITCMP1_B64;
10600 Def->eraseFromParent();
10608 case AMDGPU::S_CMP_EQ_U32:
10609 case AMDGPU::S_CMP_EQ_I32:
10610 case AMDGPU::S_CMPK_EQ_U32:
10611 case AMDGPU::S_CMPK_EQ_I32:
10612 return optimizeCmpAnd(1, 32,
true,
false);
10613 case AMDGPU::S_CMP_GE_U32:
10614 case AMDGPU::S_CMPK_GE_U32:
10615 return optimizeCmpAnd(1, 32,
false,
false);
10616 case AMDGPU::S_CMP_GE_I32:
10617 case AMDGPU::S_CMPK_GE_I32:
10618 return optimizeCmpAnd(1, 32,
false,
true);
10619 case AMDGPU::S_CMP_EQ_U64:
10620 return optimizeCmpAnd(1, 64,
true,
false);
10621 case AMDGPU::S_CMP_LG_U32:
10622 case AMDGPU::S_CMP_LG_I32:
10623 case AMDGPU::S_CMPK_LG_U32:
10624 case AMDGPU::S_CMPK_LG_I32:
10625 return optimizeCmpAnd(0, 32,
true,
false);
10626 case AMDGPU::S_CMP_GT_U32:
10627 case AMDGPU::S_CMPK_GT_U32:
10628 return optimizeCmpAnd(0, 32,
false,
false);
10629 case AMDGPU::S_CMP_GT_I32:
10630 case AMDGPU::S_CMPK_GT_I32:
10631 return optimizeCmpAnd(0, 32,
false,
true);
10632 case AMDGPU::S_CMP_LG_U64:
10633 return optimizeCmpAnd(0, 64,
true,
false);
10640 AMDGPU::OpName
OpName)
const {
10641 if (!ST.needsAlignedVGPRs())
10644 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10656 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10658 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10661 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10662 : &AMDGPU::VReg_64_Align2RegClass);
10664 .
addReg(DataReg, 0,
Op.getSubReg())
10669 Op.setSubReg(AMDGPU::sub0);
10691 unsigned Opcode =
MI.getOpcode();
10697 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10698 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
10701 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
bool isSpill(uint16_t Opcode) const
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.