32#include "llvm/IR/IntrinsicsAMDGPU.h"
39#define DEBUG_TYPE "si-instr-info"
41#define GET_INSTRINFO_CTOR_DTOR
42#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
56 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
59 "amdgpu-fix-16-bit-physreg-copies",
60 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
75 unsigned N =
Node->getNumOperands();
76 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
88 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
89 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
91 if (Op0Idx == -1 && Op1Idx == -1)
95 if ((Op0Idx == -1 && Op1Idx != -1) ||
96 (Op1Idx == -1 && Op0Idx != -1))
117 return !
MI.memoperands_empty() &&
119 return MMO->isLoad() && MMO->isInvariant();
141 if (!
MI.hasImplicitDef() &&
142 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
143 !
MI.mayRaiseFPException())
151bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
154 if (
MI.isCompare()) {
165 switch (
Use.getOpcode()) {
166 case AMDGPU::S_AND_SAVEEXEC_B32:
167 case AMDGPU::S_AND_SAVEEXEC_B64:
169 case AMDGPU::S_AND_B32:
170 case AMDGPU::S_AND_B64:
171 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
181 switch (
MI.getOpcode()) {
184 case AMDGPU::V_READFIRSTLANE_B32:
201 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
206 for (
auto Op :
MI.uses()) {
207 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
213 if (FromCycle ==
nullptr)
219 while (FromCycle && !FromCycle->
contains(ToCycle)) {
239 int64_t &Offset1)
const {
247 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
251 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
267 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
268 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
269 if (Offset0Idx == -1 || Offset1Idx == -1)
276 Offset0Idx -=
get(Opc0).NumDefs;
277 Offset1Idx -=
get(Opc1).NumDefs;
298 assert(NumOps == 4 || NumOps == 5);
303 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
305 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
307 if (!Load0Offset || !Load1Offset)
324 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
325 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
327 if (OffIdx0 == -1 || OffIdx1 == -1)
333 OffIdx0 -=
get(Opc0).NumDefs;
334 OffIdx1 -=
get(Opc1).NumDefs;
340 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
353 case AMDGPU::DS_READ2ST64_B32:
354 case AMDGPU::DS_READ2ST64_B64:
355 case AMDGPU::DS_WRITE2ST64_B32:
356 case AMDGPU::DS_WRITE2ST64_B64:
371 OffsetIsScalable =
false;
388 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
391 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
404 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
405 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
406 if (Offset0 + 1 != Offset1)
417 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
425 Offset = EltSize * Offset0;
427 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
428 if (DataOpIdx == -1) {
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
447 if (BaseOp && !BaseOp->
isFI())
455 if (SOffset->
isReg())
461 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
472 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
473 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
475 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
476 if (VAddr0Idx >= 0) {
478 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
485 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
500 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
517 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
536 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
544 if (MO1->getAddrSpace() != MO2->getAddrSpace())
547 const auto *Base1 = MO1->getValue();
548 const auto *Base2 = MO2->getValue();
549 if (!Base1 || !Base2)
554 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
557 return Base1 == Base2;
561 int64_t Offset1,
bool OffsetIsScalable1,
563 int64_t Offset2,
bool OffsetIsScalable2,
564 unsigned ClusterSize,
565 unsigned NumBytes)
const {
578 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
597 const unsigned LoadSize = NumBytes / ClusterSize;
598 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
599 return NumDWords <= MaxMemoryClusterDWords;
613 int64_t Offset0, int64_t Offset1,
614 unsigned NumLoads)
const {
615 assert(Offset1 > Offset0 &&
616 "Second offset should be larger than first offset!");
621 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
628 const char *Msg =
"illegal VGPR to SGPR copy") {
649 assert((
TII.getSubtarget().hasMAIInsts() &&
650 !
TII.getSubtarget().hasGFX90AInsts()) &&
651 "Expected GFX908 subtarget.");
654 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
655 "Source register of the copy should be either an SGPR or an AGPR.");
658 "Destination register of the copy should be an AGPR.");
667 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
670 if (!Def->modifiesRegister(SrcReg, &RI))
673 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
674 Def->getOperand(0).getReg() != SrcReg)
681 bool SafeToPropagate =
true;
684 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
685 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
686 SafeToPropagate =
false;
688 if (!SafeToPropagate)
691 for (
auto I = Def;
I !=
MI; ++
I)
692 I->clearRegisterKills(DefOp.
getReg(), &RI);
701 if (ImpUseSuperReg) {
702 Builder.
addReg(ImpUseSuperReg,
720 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
724 "VGPR used for an intermediate copy should have been reserved.");
739 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
740 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
741 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
748 if (ImpUseSuperReg) {
749 UseBuilder.
addReg(ImpUseSuperReg,
771 int16_t SubIdx = BaseIndices[
Idx];
772 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
773 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
774 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
775 unsigned Opcode = AMDGPU::S_MOV_B32;
778 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
779 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
780 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
784 DestSubReg = RI.getSubReg(DestReg, SubIdx);
785 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
786 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
787 Opcode = AMDGPU::S_MOV_B64;
802 assert(FirstMI && LastMI);
810 LastMI->addRegisterKilled(SrcReg, &RI);
816 Register SrcReg,
bool KillSrc,
bool RenamableDest,
817 bool RenamableSrc)
const {
819 unsigned Size = RI.getRegSizeInBits(*RC);
821 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
827 if (((
Size == 16) != (SrcSize == 16))) {
834 if (DestReg == SrcReg) {
840 RC = RI.getPhysRegBaseClass(DestReg);
841 Size = RI.getRegSizeInBits(*RC);
842 SrcRC = RI.getPhysRegBaseClass(SrcReg);
843 SrcSize = RI.getRegSizeInBits(*SrcRC);
847 if (RC == &AMDGPU::VGPR_32RegClass) {
849 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
850 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
851 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
852 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
858 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
859 RC == &AMDGPU::SReg_32RegClass) {
860 if (SrcReg == AMDGPU::SCC) {
867 if (DestReg == AMDGPU::VCC_LO) {
868 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
882 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
892 if (RC == &AMDGPU::SReg_64RegClass) {
893 if (SrcReg == AMDGPU::SCC) {
900 if (DestReg == AMDGPU::VCC) {
901 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
915 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
925 if (DestReg == AMDGPU::SCC) {
928 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
946 if (RC == &AMDGPU::AGPR_32RegClass) {
947 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
948 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
963 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
970 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
971 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
973 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
974 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
975 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
976 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
993 if (IsAGPRDst || IsAGPRSrc) {
994 if (!DstLow || !SrcLow) {
996 "Cannot use hi16 subreg with an AGPR!");
1009 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1010 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1023 if (!DstLow || !SrcLow) {
1025 "Cannot use hi16 subreg on VI!");
1076 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1082 unsigned EltSize = 4;
1083 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1086 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1089 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1091 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1093 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1099 Opcode = AMDGPU::V_MOV_B64_e32;
1102 Opcode = AMDGPU::V_PK_MOV_B32;
1112 std::unique_ptr<RegScavenger> RS;
1113 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1114 RS = std::make_unique<RegScavenger>();
1120 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1121 const bool CanKillSuperReg = KillSrc && !Overlap;
1126 SubIdx = SubIndices[
Idx];
1128 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1129 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1130 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1131 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1133 bool IsFirstSubreg =
Idx == 0;
1134 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1136 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1140 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1141 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1187 return &AMDGPU::VGPR_32RegClass;
1198 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1199 "Not a VGPR32 reg");
1201 if (
Cond.size() == 1) {
1202 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1211 }
else if (
Cond.size() == 2) {
1212 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1214 case SIInstrInfo::SCC_TRUE: {
1215 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1217 : AMDGPU::S_CSELECT_B64), SReg)
1228 case SIInstrInfo::SCC_FALSE: {
1229 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1231 : AMDGPU::S_CSELECT_B64), SReg)
1242 case SIInstrInfo::VCCNZ: {
1245 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1256 case SIInstrInfo::VCCZ: {
1259 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 case SIInstrInfo::EXECNZ: {
1271 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1274 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1277 : AMDGPU::S_CSELECT_B64), SReg)
1288 case SIInstrInfo::EXECZ: {
1289 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1292 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1295 : AMDGPU::S_CSELECT_B64), SReg)
1343 int64_t &ImmVal)
const {
1344 switch (
MI.getOpcode()) {
1345 case AMDGPU::V_MOV_B32_e32:
1346 case AMDGPU::S_MOV_B32:
1347 case AMDGPU::S_MOVK_I32:
1348 case AMDGPU::S_MOV_B64:
1349 case AMDGPU::V_MOV_B64_e32:
1350 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1351 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1352 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1353 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1354 case AMDGPU::V_MOV_B64_PSEUDO: {
1358 return MI.getOperand(0).getReg() == Reg;
1363 case AMDGPU::S_BREV_B32:
1364 case AMDGPU::V_BFREV_B32_e32:
1365 case AMDGPU::V_BFREV_B32_e64: {
1368 ImmVal =
static_cast<int64_t
>(reverseBits<int32_t>(Src0.
getImm()));
1369 return MI.getOperand(0).getReg() == Reg;
1374 case AMDGPU::S_NOT_B32:
1375 case AMDGPU::V_NOT_B32_e32:
1376 case AMDGPU::V_NOT_B32_e64: {
1379 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1380 return MI.getOperand(0).getReg() == Reg;
1393 return AMDGPU::COPY;
1394 if (RI.getRegSizeInBits(*DstRC) == 16) {
1397 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1399 if (RI.getRegSizeInBits(*DstRC) == 32)
1400 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1401 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1402 return AMDGPU::S_MOV_B64;
1403 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1404 return AMDGPU::V_MOV_B64_PSEUDO;
1405 return AMDGPU::COPY;
1410 bool IsIndirectSrc)
const {
1411 if (IsIndirectSrc) {
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1423 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1425 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1427 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1429 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1431 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1433 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1434 if (VecSize <= 1024)
1435 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1451 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1453 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1455 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1457 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1459 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1461 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1462 if (VecSize <= 1024)
1463 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1480 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1482 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1484 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1486 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1488 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1490 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1491 if (VecSize <= 1024)
1492 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1509 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1511 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1513 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1515 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1520 if (VecSize <= 1024)
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1528 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1530 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1532 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1534 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1535 if (VecSize <= 1024)
1536 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1543 bool IsSGPR)
const {
1555 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1562 return AMDGPU::SI_SPILL_S32_SAVE;
1564 return AMDGPU::SI_SPILL_S64_SAVE;
1566 return AMDGPU::SI_SPILL_S96_SAVE;
1568 return AMDGPU::SI_SPILL_S128_SAVE;
1570 return AMDGPU::SI_SPILL_S160_SAVE;
1572 return AMDGPU::SI_SPILL_S192_SAVE;
1574 return AMDGPU::SI_SPILL_S224_SAVE;
1576 return AMDGPU::SI_SPILL_S256_SAVE;
1578 return AMDGPU::SI_SPILL_S288_SAVE;
1580 return AMDGPU::SI_SPILL_S320_SAVE;
1582 return AMDGPU::SI_SPILL_S352_SAVE;
1584 return AMDGPU::SI_SPILL_S384_SAVE;
1586 return AMDGPU::SI_SPILL_S512_SAVE;
1588 return AMDGPU::SI_SPILL_S1024_SAVE;
1597 return AMDGPU::SI_SPILL_V16_SAVE;
1599 return AMDGPU::SI_SPILL_V32_SAVE;
1601 return AMDGPU::SI_SPILL_V64_SAVE;
1603 return AMDGPU::SI_SPILL_V96_SAVE;
1605 return AMDGPU::SI_SPILL_V128_SAVE;
1607 return AMDGPU::SI_SPILL_V160_SAVE;
1609 return AMDGPU::SI_SPILL_V192_SAVE;
1611 return AMDGPU::SI_SPILL_V224_SAVE;
1613 return AMDGPU::SI_SPILL_V256_SAVE;
1615 return AMDGPU::SI_SPILL_V288_SAVE;
1617 return AMDGPU::SI_SPILL_V320_SAVE;
1619 return AMDGPU::SI_SPILL_V352_SAVE;
1621 return AMDGPU::SI_SPILL_V384_SAVE;
1623 return AMDGPU::SI_SPILL_V512_SAVE;
1625 return AMDGPU::SI_SPILL_V1024_SAVE;
1634 return AMDGPU::SI_SPILL_AV32_SAVE;
1636 return AMDGPU::SI_SPILL_AV64_SAVE;
1638 return AMDGPU::SI_SPILL_AV96_SAVE;
1640 return AMDGPU::SI_SPILL_AV128_SAVE;
1642 return AMDGPU::SI_SPILL_AV160_SAVE;
1644 return AMDGPU::SI_SPILL_AV192_SAVE;
1646 return AMDGPU::SI_SPILL_AV224_SAVE;
1648 return AMDGPU::SI_SPILL_AV256_SAVE;
1650 return AMDGPU::SI_SPILL_AV288_SAVE;
1652 return AMDGPU::SI_SPILL_AV320_SAVE;
1654 return AMDGPU::SI_SPILL_AV352_SAVE;
1656 return AMDGPU::SI_SPILL_AV384_SAVE;
1658 return AMDGPU::SI_SPILL_AV512_SAVE;
1660 return AMDGPU::SI_SPILL_AV1024_SAVE;
1667 bool IsVectorSuperClass) {
1672 if (IsVectorSuperClass)
1673 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1675 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1708 FrameInfo.getObjectAlign(FrameIndex));
1709 unsigned SpillSize =
TRI->getSpillSize(*RC);
1714 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1715 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1716 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1724 if (SrcReg.
isVirtual() && SpillSize == 4) {
1725 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1754 return AMDGPU::SI_SPILL_S32_RESTORE;
1756 return AMDGPU::SI_SPILL_S64_RESTORE;
1758 return AMDGPU::SI_SPILL_S96_RESTORE;
1760 return AMDGPU::SI_SPILL_S128_RESTORE;
1762 return AMDGPU::SI_SPILL_S160_RESTORE;
1764 return AMDGPU::SI_SPILL_S192_RESTORE;
1766 return AMDGPU::SI_SPILL_S224_RESTORE;
1768 return AMDGPU::SI_SPILL_S256_RESTORE;
1770 return AMDGPU::SI_SPILL_S288_RESTORE;
1772 return AMDGPU::SI_SPILL_S320_RESTORE;
1774 return AMDGPU::SI_SPILL_S352_RESTORE;
1776 return AMDGPU::SI_SPILL_S384_RESTORE;
1778 return AMDGPU::SI_SPILL_S512_RESTORE;
1780 return AMDGPU::SI_SPILL_S1024_RESTORE;
1789 return AMDGPU::SI_SPILL_V16_RESTORE;
1791 return AMDGPU::SI_SPILL_V32_RESTORE;
1793 return AMDGPU::SI_SPILL_V64_RESTORE;
1795 return AMDGPU::SI_SPILL_V96_RESTORE;
1797 return AMDGPU::SI_SPILL_V128_RESTORE;
1799 return AMDGPU::SI_SPILL_V160_RESTORE;
1801 return AMDGPU::SI_SPILL_V192_RESTORE;
1803 return AMDGPU::SI_SPILL_V224_RESTORE;
1805 return AMDGPU::SI_SPILL_V256_RESTORE;
1807 return AMDGPU::SI_SPILL_V288_RESTORE;
1809 return AMDGPU::SI_SPILL_V320_RESTORE;
1811 return AMDGPU::SI_SPILL_V352_RESTORE;
1813 return AMDGPU::SI_SPILL_V384_RESTORE;
1815 return AMDGPU::SI_SPILL_V512_RESTORE;
1817 return AMDGPU::SI_SPILL_V1024_RESTORE;
1826 return AMDGPU::SI_SPILL_AV32_RESTORE;
1828 return AMDGPU::SI_SPILL_AV64_RESTORE;
1830 return AMDGPU::SI_SPILL_AV96_RESTORE;
1832 return AMDGPU::SI_SPILL_AV128_RESTORE;
1834 return AMDGPU::SI_SPILL_AV160_RESTORE;
1836 return AMDGPU::SI_SPILL_AV192_RESTORE;
1838 return AMDGPU::SI_SPILL_AV224_RESTORE;
1840 return AMDGPU::SI_SPILL_AV256_RESTORE;
1842 return AMDGPU::SI_SPILL_AV288_RESTORE;
1844 return AMDGPU::SI_SPILL_AV320_RESTORE;
1846 return AMDGPU::SI_SPILL_AV352_RESTORE;
1848 return AMDGPU::SI_SPILL_AV384_RESTORE;
1850 return AMDGPU::SI_SPILL_AV512_RESTORE;
1852 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1859 bool IsVectorSuperClass) {
1864 if (IsVectorSuperClass)
1865 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1867 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1898 unsigned SpillSize =
TRI->getSpillSize(*RC);
1905 FrameInfo.getObjectAlign(FrameIndex));
1909 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1910 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1911 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1916 if (DestReg.
isVirtual() && SpillSize == 4) {
1918 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1947 unsigned Quantity)
const {
1949 while (Quantity > 0) {
1950 unsigned Arg = std::min(Quantity, 8u);
1964 if (HasNoTerminator) {
1965 if (
Info->returnsVoid()) {
1979 constexpr unsigned DoorbellIDMask = 0x3ff;
1980 constexpr unsigned ECQueueWaveAbort = 0x400;
1998 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2002 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2005 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2006 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2010 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2011 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2012 .
addUse(DoorbellRegMasked)
2013 .
addImm(ECQueueWaveAbort);
2014 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2015 .
addUse(SetWaveAbortBit);
2018 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2033 switch (
MI.getOpcode()) {
2035 if (
MI.isMetaInstruction())
2040 return MI.getOperand(0).getImm() + 1;
2049 switch (
MI.getOpcode()) {
2051 case AMDGPU::S_MOV_B64_term:
2054 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2057 case AMDGPU::S_MOV_B32_term:
2060 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2063 case AMDGPU::S_XOR_B64_term:
2066 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2069 case AMDGPU::S_XOR_B32_term:
2072 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2074 case AMDGPU::S_OR_B64_term:
2077 MI.setDesc(
get(AMDGPU::S_OR_B64));
2079 case AMDGPU::S_OR_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_OR_B32));
2085 case AMDGPU::S_ANDN2_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2091 case AMDGPU::S_ANDN2_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2097 case AMDGPU::S_AND_B64_term:
2100 MI.setDesc(
get(AMDGPU::S_AND_B64));
2103 case AMDGPU::S_AND_B32_term:
2106 MI.setDesc(
get(AMDGPU::S_AND_B32));
2109 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2112 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2115 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2118 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2121 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2122 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2125 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2126 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2127 MI.getMF()->getRegInfo().constrainRegClass(
MI.getOperand(0).getReg(),
2128 &AMDGPU::SReg_32_XM0RegClass);
2130 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2134 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2137 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2140 int64_t Imm =
MI.getOperand(1).getImm();
2142 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2143 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2145 .
addImm(SignExtend64<32>(Imm))
2148 .
addImm(SignExtend64<32>(Imm >> 32))
2150 MI.eraseFromParent();
2156 case AMDGPU::V_MOV_B64_PSEUDO: {
2158 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2159 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2165 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2170 if (
SrcOp.isImm()) {
2172 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2173 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2216 MI.eraseFromParent();
2219 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2223 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2228 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2234 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2239 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2240 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2242 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2243 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2250 MI.eraseFromParent();
2253 case AMDGPU::V_SET_INACTIVE_B32: {
2257 .
add(
MI.getOperand(3))
2258 .
add(
MI.getOperand(4))
2259 .
add(
MI.getOperand(1))
2260 .
add(
MI.getOperand(2))
2261 .
add(
MI.getOperand(5));
2262 MI.eraseFromParent();
2265 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2266 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2267 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2272 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2273 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2274 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2275 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2276 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2291 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2292 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2293 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2298 Opc = AMDGPU::V_MOVRELD_B32_e32;
2300 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2301 : AMDGPU::S_MOVRELD_B32;
2306 bool IsUndef =
MI.getOperand(1).isUndef();
2307 unsigned SubReg =
MI.getOperand(3).getImm();
2308 assert(VecReg ==
MI.getOperand(1).getReg());
2313 .
add(
MI.getOperand(2))
2317 const int ImpDefIdx =
2319 const int ImpUseIdx = ImpDefIdx + 1;
2321 MI.eraseFromParent();
2324 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2325 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2326 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2327 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2328 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2329 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2330 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2335 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2338 bool IsUndef =
MI.getOperand(1).isUndef();
2347 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2351 .
add(
MI.getOperand(2))
2356 const int ImpDefIdx =
2358 const int ImpUseIdx = ImpDefIdx + 1;
2365 MI.eraseFromParent();
2368 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2369 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2370 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2371 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2372 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2373 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2374 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2375 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2376 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2383 bool IsUndef =
MI.getOperand(1).isUndef();
2401 MI.eraseFromParent();
2404 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2407 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2408 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2431 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2438 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2448 MI.eraseFromParent();
2451 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2461 Op.setOffset(
Op.getOffset() + 4);
2463 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2467 MI.eraseFromParent();
2470 case AMDGPU::ENTER_STRICT_WWM: {
2474 : AMDGPU::S_OR_SAVEEXEC_B64));
2477 case AMDGPU::ENTER_STRICT_WQM: {
2480 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2481 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2482 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2486 MI.eraseFromParent();
2489 case AMDGPU::EXIT_STRICT_WWM:
2490 case AMDGPU::EXIT_STRICT_WQM: {
2493 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2496 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
2497 case AMDGPU::SI_RETURN: {
2511 MI.eraseFromParent();
2515 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2516 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2517 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2520 case AMDGPU::S_GETPC_B64_pseudo:
2521 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2524 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2533 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2535 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2559 case AMDGPU::S_LOAD_DWORDX16_IMM:
2560 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2573 for (
auto &CandMO :
I->operands()) {
2574 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2582 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2590 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2592 unsigned NewOpcode = -1;
2593 if (SubregSize == 256)
2594 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2595 else if (SubregSize == 128)
2596 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2603 MRI.setRegClass(DestReg, NewRC);
2606 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2611 MI->getOperand(0).setReg(DestReg);
2612 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2616 OffsetMO->
setImm(FinalOffset);
2622 MI->setMemRefs(*MF, NewMMOs);
2635std::pair<MachineInstr*, MachineInstr*>
2637 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2639 if (ST.
hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2642 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2643 return std::pair(&
MI,
nullptr);
2654 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2656 if (Dst.isPhysical()) {
2657 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2660 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2664 for (
unsigned I = 1;
I <= 2; ++
I) {
2667 if (
SrcOp.isImm()) {
2669 Imm.ashrInPlace(Part * 32);
2670 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2674 if (Src.isPhysical())
2675 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2682 MovDPP.addImm(MO.getImm());
2684 Split[Part] = MovDPP;
2688 if (Dst.isVirtual())
2695 MI.eraseFromParent();
2696 return std::pair(Split[0], Split[1]);
2699std::optional<DestSourcePair>
2701 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2704 return std::nullopt;
2708 AMDGPU::OpName Src0OpName,
2710 AMDGPU::OpName Src1OpName)
const {
2717 "All commutable instructions have both src0 and src1 modifiers");
2719 int Src0ModsVal = Src0Mods->
getImm();
2720 int Src1ModsVal = Src1Mods->
getImm();
2722 Src1Mods->
setImm(Src0ModsVal);
2723 Src0Mods->
setImm(Src1ModsVal);
2732 bool IsKill = RegOp.
isKill();
2734 bool IsUndef = RegOp.
isUndef();
2735 bool IsDebug = RegOp.
isDebug();
2737 if (NonRegOp.
isImm())
2739 else if (NonRegOp.
isFI())
2760 int64_t NonRegVal = NonRegOp1.
getImm();
2763 NonRegOp2.
setImm(NonRegVal);
2770 unsigned OpIdx1)
const {
2775 unsigned Opc =
MI.getOpcode();
2776 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2786 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2789 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2794 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2800 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2815 unsigned Src1Idx)
const {
2816 assert(!NewMI &&
"this should never be used");
2818 unsigned Opc =
MI.getOpcode();
2820 if (CommutedOpcode == -1)
2823 if (Src0Idx > Src1Idx)
2826 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2827 static_cast<int>(Src0Idx) &&
2828 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2829 static_cast<int>(Src1Idx) &&
2830 "inconsistency with findCommutedOpIndices");
2855 Src1, AMDGPU::OpName::src1_modifiers);
2858 AMDGPU::OpName::src1_sel);
2870 unsigned &SrcOpIdx0,
2871 unsigned &SrcOpIdx1)
const {
2876 unsigned &SrcOpIdx0,
2877 unsigned &SrcOpIdx1)
const {
2878 if (!
Desc.isCommutable())
2881 unsigned Opc =
Desc.getOpcode();
2882 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2886 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2890 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2894 int64_t BrOffset)
const {
2911 return MI.getOperand(0).getMBB();
2916 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2917 MI.getOpcode() == AMDGPU::SI_LOOP)
2929 "new block should be inserted for expanding unconditional branch");
2932 "restore block should be inserted for restoring clobbered registers");
2942 MCCtx.createTempSymbol(
"offset",
true);
2946 MCCtx.createTempSymbol(
"post_addpc",
true);
2947 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2951 Offset->setVariableValue(OffsetExpr);
2955 assert(RS &&
"RegScavenger required for long branching");
2959 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2965 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2966 if (FlushSGPRWrites)
2974 ApplyHazardWorkarounds();
2977 MCCtx.createTempSymbol(
"post_getpc",
true);
2981 MCCtx.createTempSymbol(
"offset_lo",
true);
2983 MCCtx.createTempSymbol(
"offset_hi",
true);
2986 .
addReg(PCReg, 0, AMDGPU::sub0)
2990 .
addReg(PCReg, 0, AMDGPU::sub1)
2992 ApplyHazardWorkarounds();
3033 if (LongBranchReservedReg) {
3035 Scav = LongBranchReservedReg;
3044 MRI.replaceRegWith(PCReg, Scav);
3045 MRI.clearVirtRegs();
3051 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3052 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3053 MRI.clearVirtRegs();
3068unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3070 case SIInstrInfo::SCC_TRUE:
3071 return AMDGPU::S_CBRANCH_SCC1;
3072 case SIInstrInfo::SCC_FALSE:
3073 return AMDGPU::S_CBRANCH_SCC0;
3074 case SIInstrInfo::VCCNZ:
3075 return AMDGPU::S_CBRANCH_VCCNZ;
3076 case SIInstrInfo::VCCZ:
3077 return AMDGPU::S_CBRANCH_VCCZ;
3078 case SIInstrInfo::EXECNZ:
3079 return AMDGPU::S_CBRANCH_EXECNZ;
3080 case SIInstrInfo::EXECZ:
3081 return AMDGPU::S_CBRANCH_EXECZ;
3087SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3089 case AMDGPU::S_CBRANCH_SCC0:
3091 case AMDGPU::S_CBRANCH_SCC1:
3093 case AMDGPU::S_CBRANCH_VCCNZ:
3095 case AMDGPU::S_CBRANCH_VCCZ:
3097 case AMDGPU::S_CBRANCH_EXECNZ:
3099 case AMDGPU::S_CBRANCH_EXECZ:
3111 bool AllowModify)
const {
3112 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3114 TBB =
I->getOperand(0).getMBB();
3118 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3119 if (Pred == INVALID_BR)
3124 Cond.push_back(
I->getOperand(1));
3134 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3136 FBB =
I->getOperand(0).getMBB();
3146 bool AllowModify)
const {
3154 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3155 switch (
I->getOpcode()) {
3156 case AMDGPU::S_MOV_B64_term:
3157 case AMDGPU::S_XOR_B64_term:
3158 case AMDGPU::S_OR_B64_term:
3159 case AMDGPU::S_ANDN2_B64_term:
3160 case AMDGPU::S_AND_B64_term:
3161 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3162 case AMDGPU::S_MOV_B32_term:
3163 case AMDGPU::S_XOR_B32_term:
3164 case AMDGPU::S_OR_B32_term:
3165 case AMDGPU::S_ANDN2_B32_term:
3166 case AMDGPU::S_AND_B32_term:
3167 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3170 case AMDGPU::SI_ELSE:
3171 case AMDGPU::SI_KILL_I1_TERMINATOR:
3172 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3189 int *BytesRemoved)
const {
3191 unsigned RemovedSize = 0;
3194 if (
MI.isBranch() ||
MI.isReturn()) {
3196 MI.eraseFromParent();
3202 *BytesRemoved = RemovedSize;
3219 int *BytesAdded)
const {
3220 if (!FBB &&
Cond.empty()) {
3231 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3268 if (
Cond.size() != 2) {
3272 if (
Cond[0].isImm()) {
3283 Register FalseReg,
int &CondCycles,
3284 int &TrueCycles,
int &FalseCycles)
const {
3290 if (
MRI.getRegClass(FalseReg) != RC)
3294 CondCycles = TrueCycles = FalseCycles = NumInsts;
3297 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3305 if (
MRI.getRegClass(FalseReg) != RC)
3311 if (NumInsts % 2 == 0)
3314 CondCycles = TrueCycles = FalseCycles = NumInsts;
3326 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3327 if (Pred == VCCZ || Pred == SCC_FALSE) {
3328 Pred =
static_cast<BranchPredicate
>(-Pred);
3334 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3336 if (DstSize == 32) {
3338 if (Pred == SCC_TRUE) {
3353 if (DstSize == 64 && Pred == SCC_TRUE) {
3363 static const int16_t Sub0_15[] = {
3364 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3365 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3366 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3367 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3370 static const int16_t Sub0_15_64[] = {
3371 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3372 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3373 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3374 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3377 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3379 const int16_t *SubIndices = Sub0_15;
3380 int NElts = DstSize / 32;
3384 if (Pred == SCC_TRUE) {
3386 SelOp = AMDGPU::S_CSELECT_B32;
3387 EltRC = &AMDGPU::SGPR_32RegClass;
3389 SelOp = AMDGPU::S_CSELECT_B64;
3390 EltRC = &AMDGPU::SGPR_64RegClass;
3391 SubIndices = Sub0_15_64;
3397 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3402 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3403 Register DstElt =
MRI.createVirtualRegister(EltRC);
3406 unsigned SubIdx = SubIndices[
Idx];
3409 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3412 .
addReg(FalseReg, 0, SubIdx)
3413 .
addReg(TrueReg, 0, SubIdx);
3417 .
addReg(TrueReg, 0, SubIdx)
3418 .
addReg(FalseReg, 0, SubIdx);
3430 switch (
MI.getOpcode()) {
3431 case AMDGPU::V_MOV_B16_t16_e32:
3432 case AMDGPU::V_MOV_B16_t16_e64:
3433 case AMDGPU::V_MOV_B32_e32:
3434 case AMDGPU::V_MOV_B32_e64:
3435 case AMDGPU::V_MOV_B64_PSEUDO:
3436 case AMDGPU::V_MOV_B64_e32:
3437 case AMDGPU::V_MOV_B64_e64:
3438 case AMDGPU::S_MOV_B32:
3439 case AMDGPU::S_MOV_B64:
3440 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3442 case AMDGPU::WWM_COPY:
3443 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3444 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3445 case AMDGPU::V_ACCVGPR_MOV_B32:
3446 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3447 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3455 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3456 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3457 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3460 unsigned Opc =
MI.getOpcode();
3462 int Idx = AMDGPU::getNamedOperandIdx(
Opc,
Name);
3464 MI.removeOperand(
Idx);
3469 unsigned SubRegIndex) {
3470 switch (SubRegIndex) {
3471 case AMDGPU::NoSubRegister:
3474 return SignExtend64<32>(Imm);
3476 return SignExtend64<32>(Imm >> 32);
3478 return SignExtend64<16>(Imm);
3480 return SignExtend64<16>(Imm >> 16);
3481 case AMDGPU::sub1_lo16:
3482 return SignExtend64<16>(Imm >> 32);
3483 case AMDGPU::sub1_hi16:
3484 return SignExtend64<16>(Imm >> 48);
3486 return std::nullopt;
3494 case AMDGPU::V_MAC_F16_e32:
3495 case AMDGPU::V_MAC_F16_e64:
3496 case AMDGPU::V_MAD_F16_e64:
3497 return AMDGPU::V_MADAK_F16;
3498 case AMDGPU::V_MAC_F32_e32:
3499 case AMDGPU::V_MAC_F32_e64:
3500 case AMDGPU::V_MAD_F32_e64:
3501 return AMDGPU::V_MADAK_F32;
3502 case AMDGPU::V_FMAC_F32_e32:
3503 case AMDGPU::V_FMAC_F32_e64:
3504 case AMDGPU::V_FMA_F32_e64:
3505 return AMDGPU::V_FMAAK_F32;
3506 case AMDGPU::V_FMAC_F16_e32:
3507 case AMDGPU::V_FMAC_F16_e64:
3508 case AMDGPU::V_FMAC_F16_t16_e64:
3509 case AMDGPU::V_FMAC_F16_fake16_e64:
3510 case AMDGPU::V_FMA_F16_e64:
3511 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3512 ? AMDGPU::V_FMAAK_F16_t16
3513 : AMDGPU::V_FMAAK_F16_fake16
3514 : AMDGPU::V_FMAAK_F16;
3515 case AMDGPU::V_FMAC_F64_e32:
3516 case AMDGPU::V_FMAC_F64_e64:
3517 case AMDGPU::V_FMA_F64_e64:
3518 return AMDGPU::V_FMAAK_F64;
3526 case AMDGPU::V_MAC_F16_e32:
3527 case AMDGPU::V_MAC_F16_e64:
3528 case AMDGPU::V_MAD_F16_e64:
3529 return AMDGPU::V_MADMK_F16;
3530 case AMDGPU::V_MAC_F32_e32:
3531 case AMDGPU::V_MAC_F32_e64:
3532 case AMDGPU::V_MAD_F32_e64:
3533 return AMDGPU::V_MADMK_F32;
3534 case AMDGPU::V_FMAC_F32_e32:
3535 case AMDGPU::V_FMAC_F32_e64:
3536 case AMDGPU::V_FMA_F32_e64:
3537 return AMDGPU::V_FMAMK_F32;
3538 case AMDGPU::V_FMAC_F16_e32:
3539 case AMDGPU::V_FMAC_F16_e64:
3540 case AMDGPU::V_FMAC_F16_t16_e64:
3541 case AMDGPU::V_FMAC_F16_fake16_e64:
3542 case AMDGPU::V_FMA_F16_e64:
3543 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3544 ? AMDGPU::V_FMAMK_F16_t16
3545 : AMDGPU::V_FMAMK_F16_fake16
3546 : AMDGPU::V_FMAMK_F16;
3547 case AMDGPU::V_FMAC_F64_e32:
3548 case AMDGPU::V_FMAC_F64_e64:
3549 case AMDGPU::V_FMA_F64_e64:
3550 return AMDGPU::V_FMAMK_F64;
3558 if (!
MRI->hasOneNonDBGUse(Reg))
3565 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3568 if (
Opc == AMDGPU::COPY) {
3569 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3576 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3577 RI.getSubRegIdxSize(UseSubReg) == 16;
3583 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3589 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3596 for (
unsigned MovOp :
3597 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3598 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3606 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3610 if (MovDstPhysReg) {
3614 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3621 if (MovDstPhysReg) {
3622 if (!MovDstRC->
contains(MovDstPhysReg))
3624 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3646 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3650 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3652 UseMI.getOperand(0).setReg(MovDstPhysReg);
3657 UseMI.setDesc(NewMCID);
3658 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3659 UseMI.addImplicitDefUseOperands(*MF);
3663 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3664 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3665 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3666 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3667 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3668 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3669 Opc == AMDGPU::V_FMAC_F64_e64) {
3678 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3693 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3694 if (!RegSrc->
isReg())
3712 if (Def && Def->isMoveImmediate() &&
3723 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3724 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3734 unsigned SrcSubReg = RegSrc->
getSubReg();
3739 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3740 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3741 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3742 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3743 UseMI.untieRegOperand(
3744 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3751 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3753 DefMI.eraseFromParent();
3763 bool Src0Inlined =
false;
3764 if (Src0->
isReg()) {
3769 if (Def && Def->isMoveImmediate() &&
3781 if (Src1->
isReg() && !Src0Inlined) {
3784 if (Def && Def->isMoveImmediate() &&
3801 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3802 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3808 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3809 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3810 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3811 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3812 UseMI.untieRegOperand(
3813 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3815 const std::optional<int64_t> SubRegImm =
3829 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3831 DefMI.eraseFromParent();
3843 if (BaseOps1.
size() != BaseOps2.
size())
3845 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3846 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3854 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3855 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3856 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3858 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3861bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3864 int64_t Offset0, Offset1;
3867 bool Offset0IsScalable, Offset1IsScalable;
3889 "MIa must load from or modify a memory location");
3891 "MIb must load from or modify a memory location");
3910 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3917 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3927 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3941 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3952 if (Reg.isPhysical())
3954 auto *Def =
MRI.getUniqueVRegDef(Reg);
3956 Imm = Def->getOperand(1).getImm();
3976 unsigned NumOps =
MI.getNumOperands();
3977 for (
unsigned I = 1;
I < NumOps; ++
I) {
3979 if (
Op.isReg() &&
Op.isKill())
3987 case AMDGPU::V_MAC_F16_e32:
3988 case AMDGPU::V_MAC_F16_e64:
3989 return AMDGPU::V_MAD_F16_e64;
3990 case AMDGPU::V_MAC_F32_e32:
3991 case AMDGPU::V_MAC_F32_e64:
3992 return AMDGPU::V_MAD_F32_e64;
3993 case AMDGPU::V_MAC_LEGACY_F32_e32:
3994 case AMDGPU::V_MAC_LEGACY_F32_e64:
3995 return AMDGPU::V_MAD_LEGACY_F32_e64;
3996 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3997 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3998 return AMDGPU::V_FMA_LEGACY_F32_e64;
3999 case AMDGPU::V_FMAC_F16_e32:
4000 case AMDGPU::V_FMAC_F16_e64:
4001 case AMDGPU::V_FMAC_F16_t16_e64:
4002 case AMDGPU::V_FMAC_F16_fake16_e64:
4003 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4004 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4005 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4006 : AMDGPU::V_FMA_F16_gfx9_e64;
4007 case AMDGPU::V_FMAC_F32_e32:
4008 case AMDGPU::V_FMAC_F32_e64:
4009 return AMDGPU::V_FMA_F32_e64;
4010 case AMDGPU::V_FMAC_F64_e32:
4011 case AMDGPU::V_FMAC_F64_e64:
4012 return AMDGPU::V_FMA_F64_e64;
4022 unsigned Opc =
MI.getOpcode();
4026 if (NewMFMAOpc != -1) {
4029 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4030 MIB.
add(
MI.getOperand(
I));
4036 if (Def.isEarlyClobber() && Def.isReg() &&
4041 auto UpdateDefIndex = [&](
LiveRange &LR) {
4042 auto *S = LR.
find(OldIndex);
4043 if (S != LR.end() && S->start == OldIndex) {
4044 assert(S->valno && S->valno->def == OldIndex);
4045 S->start = NewIndex;
4046 S->valno->def = NewIndex;
4050 for (
auto &SR : LI.subranges())
4061 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4071 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4072 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4073 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4077 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4078 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4079 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4080 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4081 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4082 bool Src0Literal =
false;
4087 case AMDGPU::V_MAC_F16_e64:
4088 case AMDGPU::V_FMAC_F16_e64:
4089 case AMDGPU::V_FMAC_F16_t16_e64:
4090 case AMDGPU::V_FMAC_F16_fake16_e64:
4091 case AMDGPU::V_MAC_F32_e64:
4092 case AMDGPU::V_MAC_LEGACY_F32_e64:
4093 case AMDGPU::V_FMAC_F32_e64:
4094 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4095 case AMDGPU::V_FMAC_F64_e64:
4097 case AMDGPU::V_MAC_F16_e32:
4098 case AMDGPU::V_FMAC_F16_e32:
4099 case AMDGPU::V_MAC_F32_e32:
4100 case AMDGPU::V_MAC_LEGACY_F32_e32:
4101 case AMDGPU::V_FMAC_F32_e32:
4102 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4103 case AMDGPU::V_FMAC_F64_e32: {
4104 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4105 AMDGPU::OpName::src0);
4132 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4138 const auto killDef = [&]() ->
void {
4143 if (
MRI.hasOneNonDBGUse(DefReg)) {
4160 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4162 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4163 MIOp.setIsUndef(
true);
4164 MIOp.setReg(DummyReg);
4213 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4254 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4265 switch (
MI.getOpcode()) {
4266 case AMDGPU::S_SET_GPR_IDX_ON:
4267 case AMDGPU::S_SET_GPR_IDX_MODE:
4268 case AMDGPU::S_SET_GPR_IDX_OFF:
4286 if (
MI.isTerminator() ||
MI.isPosition())
4290 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4293 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4299 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4300 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4301 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4302 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4303 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4308 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4309 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4310 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4318 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4327 if (
MI.memoperands_empty())
4332 unsigned AS = Memop->getAddrSpace();
4333 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4334 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4335 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4336 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4350 unsigned Opcode =
MI.getOpcode();
4365 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4366 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4367 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4370 if (
MI.isCall() ||
MI.isInlineAsm())
4386 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4387 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4388 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4389 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4397 if (
MI.isMetaInstruction())
4401 if (
MI.isCopyLike()) {
4406 return MI.readsRegister(AMDGPU::EXEC, &RI);
4417 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4421 switch (Imm.getBitWidth()) {
4441 APInt IntImm = Imm.bitcastToAPInt();
4464 switch (OperandType) {
4474 int32_t Trunc =
static_cast<int32_t
>(Imm);
4509 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4514 int16_t Trunc =
static_cast<int16_t
>(Imm);
4523 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4524 int16_t Trunc =
static_cast<int16_t
>(Imm);
4586 int64_t ImmVal)
const {
4590 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4591 AMDGPU::OpName::src2))
4605 "unexpected imm-like operand kind");
4618 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4636 AMDGPU::OpName
OpName)
const {
4638 return Mods && Mods->
getImm();
4651 switch (
MI.getOpcode()) {
4652 default:
return false;
4654 case AMDGPU::V_ADDC_U32_e64:
4655 case AMDGPU::V_SUBB_U32_e64:
4656 case AMDGPU::V_SUBBREV_U32_e64: {
4664 case AMDGPU::V_MAC_F16_e64:
4665 case AMDGPU::V_MAC_F32_e64:
4666 case AMDGPU::V_MAC_LEGACY_F32_e64:
4667 case AMDGPU::V_FMAC_F16_e64:
4668 case AMDGPU::V_FMAC_F16_t16_e64:
4669 case AMDGPU::V_FMAC_F16_fake16_e64:
4670 case AMDGPU::V_FMAC_F32_e64:
4671 case AMDGPU::V_FMAC_F64_e64:
4672 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4678 case AMDGPU::V_CNDMASK_B32_e64:
4714 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4723 unsigned Op32)
const {
4737 Inst32.
add(
MI.getOperand(
I));
4741 int Idx =
MI.getNumExplicitDefs();
4743 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4748 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4770 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4778 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4781 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4782 AMDGPU::SReg_64RegClass.contains(Reg);
4810 switch (MO.getReg()) {
4812 case AMDGPU::VCC_LO:
4813 case AMDGPU::VCC_HI:
4815 case AMDGPU::FLAT_SCR:
4828 switch (
MI.getOpcode()) {
4829 case AMDGPU::V_READLANE_B32:
4830 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4831 case AMDGPU::V_WRITELANE_B32:
4832 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4839 if (
MI.isPreISelOpcode() ||
4840 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4855 if (
SubReg.getReg().isPhysical())
4858 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4870 ErrInfo =
"illegal copy from vector register to SGPR";
4888 if (!
MRI.isSSA() &&
MI.isCopy())
4889 return verifyCopy(
MI,
MRI, ErrInfo);
4891 if (SIInstrInfo::isGenericOpcode(Opcode))
4894 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4895 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4896 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4898 if (Src0Idx == -1) {
4900 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4901 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4902 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4903 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4908 if (!
Desc.isVariadic() &&
4909 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4910 ErrInfo =
"Instruction has wrong number of operands.";
4914 if (
MI.isInlineAsm()) {
4927 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4928 ErrInfo =
"inlineasm operand has incorrect register class.";
4936 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4937 ErrInfo =
"missing memory operand from image instruction.";
4942 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4945 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4946 "all fp values to integers.";
4950 int RegClass =
Desc.operands()[i].RegClass;
4955 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4956 ErrInfo =
"Illegal immediate value for operand.";
4990 ErrInfo =
"Illegal immediate value for operand.";
4997 ErrInfo =
"Expected inline constant for operand.";
5012 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5013 ErrInfo =
"Expected immediate, but got non-immediate";
5041 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5050 ErrInfo =
"Subtarget requires even aligned vector registers";
5055 if (RegClass != -1) {
5056 if (Reg.isVirtual())
5061 ErrInfo =
"Operand has incorrect register class.";
5070 ErrInfo =
"SDWA is not supported on this target";
5074 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5075 AMDGPU::OpName::dst_sel}) {
5079 int64_t Imm = MO->
getImm();
5081 ErrInfo =
"Invalid SDWA selection";
5086 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5088 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5096 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5103 "Only reg allowed as operands in SDWA instructions on GFX9+";
5112 if (OMod !=
nullptr &&
5114 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5119 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5120 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5121 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5122 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5125 unsigned Mods = Src0ModsMO->
getImm();
5128 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5134 if (
isVOPC(BasicOpcode)) {
5138 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5139 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5145 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5146 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5152 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5153 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5160 if (DstUnused && DstUnused->isImm() &&
5163 if (!Dst.isReg() || !Dst.isTied()) {
5164 ErrInfo =
"Dst register should have tied register";
5169 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5172 "Dst register should be tied to implicit use of preserved register";
5176 ErrInfo =
"Dst register should use same physical register as preserved";
5183 if (
isImage(Opcode) && !
MI.mayStore()) {
5203 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5207 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5208 if (RegCount > DstSize) {
5209 ErrInfo =
"Image instruction returns too many registers for dst "
5218 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5219 unsigned ConstantBusCount = 0;
5220 bool UsesLiteral =
false;
5223 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5227 LiteralVal = &
MI.getOperand(ImmIdx);
5236 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5247 }
else if (!MO.
isFI()) {
5254 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5264 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5265 return !RI.regsOverlap(SGPRUsed, SGPR);
5275 Opcode != AMDGPU::V_WRITELANE_B32) {
5276 ErrInfo =
"VOP* instruction violates constant bus restriction";
5281 ErrInfo =
"VOP3 instruction uses literal";
5288 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5289 unsigned SGPRCount = 0;
5292 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5300 if (MO.
getReg() != SGPRUsed)
5306 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5313 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5314 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5321 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5331 ErrInfo =
"ABS not allowed in VOP3B instructions";
5344 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5351 if (
Desc.isBranch()) {
5353 ErrInfo =
"invalid branch target for SOPK instruction";
5359 if (!isUInt<16>(Imm)) {
5360 ErrInfo =
"invalid immediate for SOPK instruction";
5364 if (!isInt<16>(Imm)) {
5365 ErrInfo =
"invalid immediate for SOPK instruction";
5372 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5373 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5374 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5375 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5376 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5377 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5379 const unsigned StaticNumOps =
5380 Desc.getNumOperands() +
Desc.implicit_uses().size();
5381 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5386 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5387 ErrInfo =
"missing implicit register operands";
5393 if (!Dst->isUse()) {
5394 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5399 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5400 UseOpIdx != StaticNumOps + 1) {
5401 ErrInfo =
"movrel implicit operands should be tied";
5408 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5410 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5411 ErrInfo =
"src0 should be subreg of implicit vector use";
5419 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5420 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5426 if (
MI.mayStore() &&
5431 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5432 ErrInfo =
"scalar stores must use m0 as offset register";
5440 if (
Offset->getImm() != 0) {
5441 ErrInfo =
"subtarget does not support offsets in flat instructions";
5448 if (GDSOp && GDSOp->
getImm() != 0) {
5449 ErrInfo =
"GDS is not supported on this subtarget";
5457 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5458 AMDGPU::OpName::vaddr0);
5459 AMDGPU::OpName RSrcOpName =
5460 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5461 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5469 ErrInfo =
"dim is out of range";
5476 IsA16 = R128A16->
getImm() != 0;
5477 }
else if (ST.
hasA16()) {
5479 IsA16 = A16->
getImm() != 0;
5482 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5484 unsigned AddrWords =
5487 unsigned VAddrWords;
5489 VAddrWords = RsrcIdx - VAddr0Idx;
5492 unsigned LastVAddrIdx = RsrcIdx - 1;
5493 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5501 if (VAddrWords != AddrWords) {
5503 <<
" but got " << VAddrWords <<
"\n");
5504 ErrInfo =
"bad vaddr size";
5512 using namespace AMDGPU::DPP;
5514 unsigned DC = DppCt->
getImm();
5515 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5516 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5517 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5518 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5519 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5520 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5521 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5522 ErrInfo =
"Invalid dpp_ctrl value";
5525 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5527 ErrInfo =
"Invalid dpp_ctrl value: "
5528 "wavefront shifts are not supported on GFX10+";
5531 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5533 ErrInfo =
"Invalid dpp_ctrl value: "
5534 "broadcasts are not supported on GFX10+";
5537 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5539 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5540 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5542 ErrInfo =
"Invalid dpp_ctrl value: "
5543 "row_newbroadcast/row_share is not supported before "
5548 ErrInfo =
"Invalid dpp_ctrl value: "
5549 "row_share and row_xmask are not supported before GFX10";
5554 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5557 ErrInfo =
"Invalid dpp_ctrl value: "
5558 "DP ALU dpp only support row_newbcast";
5565 AMDGPU::OpName DataName =
5566 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5575 ErrInfo =
"Invalid register class: "
5576 "vdata and vdst should be both VGPR or AGPR";
5579 if (
Data && Data2 &&
5581 ErrInfo =
"Invalid register class: "
5582 "both data operands should be VGPR or AGPR";
5586 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5589 ErrInfo =
"Invalid register class: "
5590 "agpr loads and stores not supported on this GPU";
5597 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5602 if (Reg.isPhysical())
5609 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5610 Opcode == AMDGPU::DS_GWS_BARRIER) {
5612 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5613 ErrInfo =
"Subtarget requires even aligned vector registers "
5614 "for DS_GWS instructions";
5620 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5621 ErrInfo =
"Subtarget requires even aligned vector registers "
5622 "for vaddr operand of image instructions";
5628 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts()) {
5630 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5631 ErrInfo =
"Invalid register class: "
5632 "v_accvgpr_write with an SGPR is not supported on this GPU";
5637 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5640 ErrInfo =
"pseudo expects only physical SGPRs";
5648 ErrInfo =
"Subtarget does not support offset scaling";
5652 ErrInfo =
"Instruction does not support offset scaling";
5661 for (
unsigned I = 0;
I < 3; ++
I) {
5674 switch (
MI.getOpcode()) {
5675 default:
return AMDGPU::INSTRUCTION_LIST_END;
5676 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5677 case AMDGPU::COPY:
return AMDGPU::COPY;
5678 case AMDGPU::PHI:
return AMDGPU::PHI;
5679 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5680 case AMDGPU::WQM:
return AMDGPU::WQM;
5681 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5682 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5683 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5684 case AMDGPU::S_MOV_B32: {
5686 return MI.getOperand(1).isReg() ||
5688 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5690 case AMDGPU::S_ADD_I32:
5691 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5692 case AMDGPU::S_ADDC_U32:
5693 return AMDGPU::V_ADDC_U32_e32;
5694 case AMDGPU::S_SUB_I32:
5695 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5698 case AMDGPU::S_ADD_U32:
5699 return AMDGPU::V_ADD_CO_U32_e32;
5700 case AMDGPU::S_SUB_U32:
5701 return AMDGPU::V_SUB_CO_U32_e32;
5702 case AMDGPU::S_ADD_U64_PSEUDO:
5703 return AMDGPU::V_ADD_U64_PSEUDO;
5704 case AMDGPU::S_SUB_U64_PSEUDO:
5705 return AMDGPU::V_SUB_U64_PSEUDO;
5706 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5707 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5708 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5709 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5710 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5711 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5712 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5713 case AMDGPU::S_XNOR_B32:
5714 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5715 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5716 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5717 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5718 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5719 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5720 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5721 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5722 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5723 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5724 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5725 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5726 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5727 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5728 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5729 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5730 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5731 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5732 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5733 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5734 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5735 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5736 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5737 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5738 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5739 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5740 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5741 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5742 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5743 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5744 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5745 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5746 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5747 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5748 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5749 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5750 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5751 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5752 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5753 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5754 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5755 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5756 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5757 case AMDGPU::S_CVT_F32_F16:
5758 case AMDGPU::S_CVT_HI_F32_F16:
5760 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5761 case AMDGPU::S_CVT_F16_F32:
5763 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5764 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5765 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5766 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5767 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5768 case AMDGPU::S_CEIL_F16:
5770 : AMDGPU::V_CEIL_F16_fake16_e64;
5771 case AMDGPU::S_FLOOR_F16:
5773 : AMDGPU::V_FLOOR_F16_fake16_e64;
5774 case AMDGPU::S_TRUNC_F16:
5776 : AMDGPU::V_TRUNC_F16_fake16_e64;
5777 case AMDGPU::S_RNDNE_F16:
5779 : AMDGPU::V_RNDNE_F16_fake16_e64;
5780 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5781 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5782 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5783 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5784 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5785 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5786 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5787 case AMDGPU::S_ADD_F16:
5789 : AMDGPU::V_ADD_F16_fake16_e64;
5790 case AMDGPU::S_SUB_F16:
5792 : AMDGPU::V_SUB_F16_fake16_e64;
5793 case AMDGPU::S_MIN_F16:
5795 : AMDGPU::V_MIN_F16_fake16_e64;
5796 case AMDGPU::S_MAX_F16:
5798 : AMDGPU::V_MAX_F16_fake16_e64;
5799 case AMDGPU::S_MINIMUM_F16:
5801 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5802 case AMDGPU::S_MAXIMUM_F16:
5804 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5805 case AMDGPU::S_MUL_F16:
5807 : AMDGPU::V_MUL_F16_fake16_e64;
5808 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5809 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5810 case AMDGPU::S_FMAC_F16:
5812 : AMDGPU::V_FMAC_F16_fake16_e64;
5813 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5814 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5815 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5816 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5817 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5818 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5819 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5820 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5821 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5822 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5823 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5824 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5825 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5826 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5827 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5828 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5829 case AMDGPU::S_CMP_LT_F16:
5831 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5832 case AMDGPU::S_CMP_EQ_F16:
5834 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5835 case AMDGPU::S_CMP_LE_F16:
5837 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5838 case AMDGPU::S_CMP_GT_F16:
5840 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5841 case AMDGPU::S_CMP_LG_F16:
5843 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5844 case AMDGPU::S_CMP_GE_F16:
5846 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5847 case AMDGPU::S_CMP_O_F16:
5849 : AMDGPU::V_CMP_O_F16_fake16_e64;
5850 case AMDGPU::S_CMP_U_F16:
5852 : AMDGPU::V_CMP_U_F16_fake16_e64;
5853 case AMDGPU::S_CMP_NGE_F16:
5855 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5856 case AMDGPU::S_CMP_NLG_F16:
5858 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5859 case AMDGPU::S_CMP_NGT_F16:
5861 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5862 case AMDGPU::S_CMP_NLE_F16:
5864 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5865 case AMDGPU::S_CMP_NEQ_F16:
5867 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5868 case AMDGPU::S_CMP_NLT_F16:
5870 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5871 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5872 case AMDGPU::V_S_EXP_F16_e64:
5874 : AMDGPU::V_EXP_F16_fake16_e64;
5875 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5876 case AMDGPU::V_S_LOG_F16_e64:
5878 : AMDGPU::V_LOG_F16_fake16_e64;
5879 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5880 case AMDGPU::V_S_RCP_F16_e64:
5882 : AMDGPU::V_RCP_F16_fake16_e64;
5883 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5884 case AMDGPU::V_S_RSQ_F16_e64:
5886 : AMDGPU::V_RSQ_F16_fake16_e64;
5887 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5888 case AMDGPU::V_S_SQRT_F16_e64:
5890 : AMDGPU::V_SQRT_F16_fake16_e64;
5893 "Unexpected scalar opcode without corresponding vector one!");
5906 bool IsWave32 = ST.isWave32();
5911 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5912 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5921 const unsigned OrSaveExec =
5922 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5935 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5937 auto ExecRestoreMI =
5946 "Not a whole wave func");
5949 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
5950 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
5959 bool IsAllocatable) {
5960 if ((IsAllocatable || !ST.hasGFX90AInsts()) &&
5965 case AMDGPU::AV_32RegClassID:
5966 RCID = AMDGPU::VGPR_32RegClassID;
5968 case AMDGPU::AV_64RegClassID:
5969 RCID = AMDGPU::VReg_64RegClassID;
5971 case AMDGPU::AV_96RegClassID:
5972 RCID = AMDGPU::VReg_96RegClassID;
5974 case AMDGPU::AV_128RegClassID:
5975 RCID = AMDGPU::VReg_128RegClassID;
5977 case AMDGPU::AV_160RegClassID:
5978 RCID = AMDGPU::VReg_160RegClassID;
5980 case AMDGPU::AV_512RegClassID:
5981 RCID = AMDGPU::VReg_512RegClassID;
5997 auto RegClass = TID.
operands()[OpNum].RegClass;
5998 bool IsAllocatable =
false;
6007 const int VDstIdx = AMDGPU::getNamedOperandIdx(TID.
Opcode,
6008 AMDGPU::OpName::vdst);
6009 const int DataIdx = AMDGPU::getNamedOperandIdx(TID.
Opcode,
6011 : AMDGPU::OpName::vdata);
6012 if (DataIdx != -1) {
6014 TID.
Opcode, AMDGPU::OpName::data1);
6021 unsigned OpNo)
const {
6023 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6024 Desc.operands()[OpNo].RegClass == -1) {
6027 if (Reg.isVirtual()) {
6029 MI.getParent()->getParent()->getRegInfo();
6030 return MRI.getRegClass(Reg);
6032 return RI.getPhysRegBaseClass(Reg);
6035 unsigned RCID =
Desc.operands()[OpNo].RegClass;
6044 unsigned RCID =
get(
MI.getOpcode()).operands()[
OpIdx].RegClass;
6046 unsigned Size = RI.getRegSizeInBits(*RC);
6047 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6048 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6049 : AMDGPU::V_MOV_B32_e32;
6051 Opcode = AMDGPU::COPY;
6053 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6067 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6073 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6084 if (SubIdx == AMDGPU::sub0)
6086 if (SubIdx == AMDGPU::sub1)
6098void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6114 if (Reg.isPhysical())
6125 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
6136 unsigned Opc =
MI.getOpcode();
6142 constexpr const AMDGPU::OpName OpNames[] = {
6143 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6146 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6147 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6164 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6165 const int DataIdx = AMDGPU::getNamedOperandIdx(
6166 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6167 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6168 MI.getOperand(DataIdx).isReg() &&
6169 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6171 if ((
int)
OpIdx == DataIdx) {
6172 if (VDstIdx != -1 &&
6173 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6176 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6177 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6178 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6184 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6204 constexpr const unsigned NumOps = 3;
6205 constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
6206 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6207 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6208 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6213 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6216 MO = &
MI.getOperand(SrcIdx);
6223 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[NumOps + SrcN]);
6227 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6231 return !OpSel && !OpSelHi;
6255 if (!LiteralLimit--)
6265 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6273 if (--ConstantBusLimit <= 0)
6285 if (!LiteralLimit--)
6287 if (--ConstantBusLimit <= 0)
6293 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6297 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6299 !
Op.isIdenticalTo(*MO))
6323 bool Is64BitOp = Is64BitFPOp ||
6339 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6358 unsigned Opc =
MI.getOpcode();
6361 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6364 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6377 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6380 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6386 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6403 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6404 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6405 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6417 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6419 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6431 if (HasImplicitSGPR || !
MI.isCommutable()) {
6448 if (CommutedOpc == -1) {
6453 MI.setDesc(
get(CommutedOpc));
6457 bool Src0Kill = Src0.
isKill();
6461 else if (Src1.
isReg()) {
6476 unsigned Opc =
MI.getOpcode();
6479 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6480 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6481 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6484 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6485 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6486 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6487 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6488 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6489 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6490 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6495 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6500 if (VOP3Idx[2] != -1) {
6503 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6515 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6517 SGPRsUsed.
insert(SGPRReg);
6521 for (
int Idx : VOP3Idx) {
6530 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6555 if (ConstantBusLimit > 0) {
6567 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6568 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6574 for (
unsigned I = 0;
I < 3; ++
I) {
6587 SRC = RI.getCommonSubClass(SRC, DstRC);
6590 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6594 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6596 get(TargetOpcode::COPY), NewSrcReg)
6603 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6609 for (
unsigned i = 0; i < SubRegs; ++i) {
6610 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6612 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6619 get(AMDGPU::REG_SEQUENCE), DstReg);
6620 for (
unsigned i = 0; i < SubRegs; ++i) {
6635 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6637 SBase->setReg(SGPR);
6648 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6649 if (OldSAddrIdx < 0)
6665 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6666 if (NewVAddrIdx < 0)
6669 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6673 if (OldVAddrIdx >= 0) {
6675 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6687 if (OldVAddrIdx == NewVAddrIdx) {
6690 MRI.removeRegOperandFromUseList(&NewVAddr);
6691 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6695 MRI.removeRegOperandFromUseList(&NewVAddr);
6696 MRI.addRegOperandToUseList(&NewVAddr);
6698 assert(OldSAddrIdx == NewVAddrIdx);
6700 if (OldVAddrIdx >= 0) {
6701 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6702 AMDGPU::OpName::vdst_in);
6706 if (NewVDstIn != -1) {
6707 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6713 if (NewVDstIn != -1) {
6714 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6755 unsigned OpSubReg =
Op.getSubReg();
6764 Register DstReg =
MRI.createVirtualRegister(DstRC);
6774 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6777 bool ImpDef = Def->isImplicitDef();
6778 while (!ImpDef && Def && Def->isCopy()) {
6779 if (Def->getOperand(1).getReg().isPhysical())
6781 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6782 ImpDef = Def && Def->isImplicitDef();
6784 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6802 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6803 unsigned SaveExecOpc =
6804 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6805 unsigned XorTermOpc =
6806 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6808 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6809 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6815 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6816 unsigned NumSubRegs =
RegSize / 32;
6817 Register VScalarOp = ScalarOp->getReg();
6819 if (NumSubRegs == 1) {
6820 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6822 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6825 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6827 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6833 CondReg = NewCondReg;
6835 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6843 ScalarOp->setReg(CurReg);
6844 ScalarOp->setIsKill();
6848 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6849 "Unhandled register size");
6851 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6853 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6855 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6858 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6859 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6862 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6863 .
addReg(VScalarOp, VScalarOpUndef,
6864 TRI->getSubRegFromChannel(
Idx + 1));
6870 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6871 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6877 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6878 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6881 if (NumSubRegs <= 2)
6882 Cmp.addReg(VScalarOp);
6884 Cmp.addReg(VScalarOp, VScalarOpUndef,
6885 TRI->getSubRegFromChannel(
Idx, 2));
6889 CondReg = NewCondReg;
6891 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6899 const auto *SScalarOpRC =
6900 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6901 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6905 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6906 unsigned Channel = 0;
6907 for (
Register Piece : ReadlanePieces) {
6908 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6912 ScalarOp->setReg(SScalarOp);
6913 ScalarOp->setIsKill();
6917 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6918 MRI.setSimpleHint(SaveExec, CondReg);
6949 if (!Begin.isValid())
6951 if (!
End.isValid()) {
6956 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6957 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6958 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6967 std::numeric_limits<unsigned>::max()) !=
6970 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6976 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6985 for (
auto I = Begin;
I != AfterMI;
I++) {
6986 for (
auto &MO :
I->all_uses())
6987 MRI.clearKillFlags(MO.getReg());
7022 for (
auto &Succ : RemainderBB->
successors()) {
7045static std::tuple<unsigned, unsigned>
7053 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7054 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7057 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7058 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7059 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7060 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7061 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7078 .
addImm(AMDGPU::sub0_sub1)
7084 return std::tuple(RsrcPtr, NewSRsrc);
7121 if (
MI.getOpcode() == AMDGPU::PHI) {
7123 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7124 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7127 MRI.getRegClass(
MI.getOperand(i).getReg());
7142 VRC = &AMDGPU::VReg_1RegClass;
7158 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7160 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7176 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7183 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7185 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7203 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7208 if (DstRC != Src0RC) {
7217 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7225 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7226 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7227 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7228 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7229 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7230 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7231 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7246 ? AMDGPU::OpName::rsrc
7247 : AMDGPU::OpName::srsrc;
7252 AMDGPU::OpName SampOpName =
7253 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7262 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7268 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7269 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7274 while (Start->getOpcode() != FrameSetupOpcode)
7277 while (
End->getOpcode() != FrameDestroyOpcode)
7281 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
7282 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
7290 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7292 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7294 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7304 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7305 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7306 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7307 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7316 bool isSoffsetLegal =
true;
7318 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7319 if (SoffsetIdx != -1) {
7323 isSoffsetLegal =
false;
7327 bool isRsrcLegal =
true;
7329 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7330 if (RsrcIdx != -1) {
7333 isRsrcLegal =
false;
7337 if (isRsrcLegal && isSoffsetLegal)
7361 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7362 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7363 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7366 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7367 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7369 unsigned RsrcPtr, NewSRsrc;
7376 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7383 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7401 "FIXME: Need to emit flat atomics here");
7403 unsigned RsrcPtr, NewSRsrc;
7406 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7429 MIB.
addImm(CPol->getImm());
7434 MIB.
addImm(TFE->getImm());
7454 MI.removeFromParent();
7459 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7461 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7465 if (!isSoffsetLegal) {
7477 if (!isSoffsetLegal) {
7486 InstrList.insert(
MI);
7489 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7490 if (RsrcIdx != -1) {
7491 DeferredList.insert(
MI);
7496 return DeferredList.contains(
MI);
7509 unsigned Opcode =
MI.getOpcode();
7513 OpIdx >=
get(Opcode).getNumOperands() ||
7514 get(Opcode).operands()[
OpIdx].RegClass == -1)
7518 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7525 unsigned RCID =
get(Opcode).operands()[
OpIdx].RegClass;
7527 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7528 Op.setSubReg(AMDGPU::lo16);
7529 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7531 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7532 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7539 Op.setReg(NewDstReg);
7551 while (!Worklist.
empty()) {
7565 "Deferred MachineInstr are not supposed to re-populate worklist");
7583 case AMDGPU::S_ADD_I32:
7584 case AMDGPU::S_SUB_I32: {
7588 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7596 case AMDGPU::S_MUL_U64:
7598 NewOpcode = AMDGPU::V_MUL_U64_e64;
7602 splitScalarSMulU64(Worklist, Inst, MDT);
7606 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7607 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7610 splitScalarSMulPseudo(Worklist, Inst, MDT);
7614 case AMDGPU::S_AND_B64:
7615 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7619 case AMDGPU::S_OR_B64:
7620 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7624 case AMDGPU::S_XOR_B64:
7625 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7629 case AMDGPU::S_NAND_B64:
7630 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7634 case AMDGPU::S_NOR_B64:
7635 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7639 case AMDGPU::S_XNOR_B64:
7641 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7643 splitScalar64BitXnor(Worklist, Inst, MDT);
7647 case AMDGPU::S_ANDN2_B64:
7648 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7652 case AMDGPU::S_ORN2_B64:
7653 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7657 case AMDGPU::S_BREV_B64:
7658 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7662 case AMDGPU::S_NOT_B64:
7663 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7667 case AMDGPU::S_BCNT1_I32_B64:
7668 splitScalar64BitBCNT(Worklist, Inst);
7672 case AMDGPU::S_BFE_I64:
7673 splitScalar64BitBFE(Worklist, Inst);
7677 case AMDGPU::S_FLBIT_I32_B64:
7678 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7681 case AMDGPU::S_FF1_I32_B64:
7682 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7686 case AMDGPU::S_LSHL_B32:
7688 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7692 case AMDGPU::S_ASHR_I32:
7694 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7698 case AMDGPU::S_LSHR_B32:
7700 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7704 case AMDGPU::S_LSHL_B64:
7707 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7708 : AMDGPU::V_LSHLREV_B64_e64;
7712 case AMDGPU::S_ASHR_I64:
7714 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7718 case AMDGPU::S_LSHR_B64:
7720 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7725 case AMDGPU::S_ABS_I32:
7726 lowerScalarAbs(Worklist, Inst);
7730 case AMDGPU::S_CBRANCH_SCC0:
7731 case AMDGPU::S_CBRANCH_SCC1: {
7734 bool IsSCC = CondReg == AMDGPU::SCC;
7737 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7740 .
addReg(IsSCC ? VCC : CondReg);
7744 case AMDGPU::S_BFE_U64:
7745 case AMDGPU::S_BFM_B64:
7748 case AMDGPU::S_PACK_LL_B32_B16:
7749 case AMDGPU::S_PACK_LH_B32_B16:
7750 case AMDGPU::S_PACK_HL_B32_B16:
7751 case AMDGPU::S_PACK_HH_B32_B16:
7752 movePackToVALU(Worklist,
MRI, Inst);
7756 case AMDGPU::S_XNOR_B32:
7757 lowerScalarXnor(Worklist, Inst);
7761 case AMDGPU::S_NAND_B32:
7762 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7766 case AMDGPU::S_NOR_B32:
7767 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7771 case AMDGPU::S_ANDN2_B32:
7772 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7776 case AMDGPU::S_ORN2_B32:
7777 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7785 case AMDGPU::S_ADD_CO_PSEUDO:
7786 case AMDGPU::S_SUB_CO_PSEUDO: {
7787 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7788 ? AMDGPU::V_ADDC_U32_e64
7789 : AMDGPU::V_SUBB_U32_e64;
7793 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7794 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7812 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7816 case AMDGPU::S_UADDO_PSEUDO:
7817 case AMDGPU::S_USUBO_PSEUDO: {
7824 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7825 ? AMDGPU::V_ADD_CO_U32_e64
7826 : AMDGPU::V_SUB_CO_U32_e64;
7829 Register DestReg =
MRI.createVirtualRegister(NewRC);
7837 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7844 case AMDGPU::S_CSELECT_B32:
7845 case AMDGPU::S_CSELECT_B64:
7846 lowerSelect(Worklist, Inst, MDT);
7849 case AMDGPU::S_CMP_EQ_I32:
7850 case AMDGPU::S_CMP_LG_I32:
7851 case AMDGPU::S_CMP_GT_I32:
7852 case AMDGPU::S_CMP_GE_I32:
7853 case AMDGPU::S_CMP_LT_I32:
7854 case AMDGPU::S_CMP_LE_I32:
7855 case AMDGPU::S_CMP_EQ_U32:
7856 case AMDGPU::S_CMP_LG_U32:
7857 case AMDGPU::S_CMP_GT_U32:
7858 case AMDGPU::S_CMP_GE_U32:
7859 case AMDGPU::S_CMP_LT_U32:
7860 case AMDGPU::S_CMP_LE_U32:
7861 case AMDGPU::S_CMP_EQ_U64:
7862 case AMDGPU::S_CMP_LG_U64:
7863 case AMDGPU::S_CMP_LT_F32:
7864 case AMDGPU::S_CMP_EQ_F32:
7865 case AMDGPU::S_CMP_LE_F32:
7866 case AMDGPU::S_CMP_GT_F32:
7867 case AMDGPU::S_CMP_LG_F32:
7868 case AMDGPU::S_CMP_GE_F32:
7869 case AMDGPU::S_CMP_O_F32:
7870 case AMDGPU::S_CMP_U_F32:
7871 case AMDGPU::S_CMP_NGE_F32:
7872 case AMDGPU::S_CMP_NLG_F32:
7873 case AMDGPU::S_CMP_NGT_F32:
7874 case AMDGPU::S_CMP_NLE_F32:
7875 case AMDGPU::S_CMP_NEQ_F32:
7876 case AMDGPU::S_CMP_NLT_F32: {
7881 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7895 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7899 case AMDGPU::S_CMP_LT_F16:
7900 case AMDGPU::S_CMP_EQ_F16:
7901 case AMDGPU::S_CMP_LE_F16:
7902 case AMDGPU::S_CMP_GT_F16:
7903 case AMDGPU::S_CMP_LG_F16:
7904 case AMDGPU::S_CMP_GE_F16:
7905 case AMDGPU::S_CMP_O_F16:
7906 case AMDGPU::S_CMP_U_F16:
7907 case AMDGPU::S_CMP_NGE_F16:
7908 case AMDGPU::S_CMP_NLG_F16:
7909 case AMDGPU::S_CMP_NGT_F16:
7910 case AMDGPU::S_CMP_NLE_F16:
7911 case AMDGPU::S_CMP_NEQ_F16:
7912 case AMDGPU::S_CMP_NLT_F16: {
7935 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7939 case AMDGPU::S_CVT_HI_F32_F16: {
7941 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7942 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7948 .
addReg(TmpReg, 0, AMDGPU::hi16)
7964 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7968 case AMDGPU::S_MINIMUM_F32:
7969 case AMDGPU::S_MAXIMUM_F32: {
7971 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7982 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7986 case AMDGPU::S_MINIMUM_F16:
7987 case AMDGPU::S_MAXIMUM_F16: {
7990 ? &AMDGPU::VGPR_16RegClass
7991 : &AMDGPU::VGPR_32RegClass);
8003 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8007 case AMDGPU::V_S_EXP_F16_e64:
8008 case AMDGPU::V_S_LOG_F16_e64:
8009 case AMDGPU::V_S_RCP_F16_e64:
8010 case AMDGPU::V_S_RSQ_F16_e64:
8011 case AMDGPU::V_S_SQRT_F16_e64: {
8014 ? &AMDGPU::VGPR_16RegClass
8015 : &AMDGPU::VGPR_32RegClass);
8027 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8033 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8041 if (NewOpcode == Opcode) {
8050 if (
MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
8052 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
8056 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8058 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8076 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8078 MRI.replaceRegWith(DstReg, NewDstReg);
8079 MRI.clearKillFlags(NewDstReg);
8097 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8098 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8099 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8101 get(AMDGPU::IMPLICIT_DEF), Undef);
8103 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8109 MRI.replaceRegWith(DstReg, NewDstReg);
8110 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8112 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8115 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8116 MRI.replaceRegWith(DstReg, NewDstReg);
8117 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8122 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8123 MRI.replaceRegWith(DstReg, NewDstReg);
8125 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8135 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8136 AMDGPU::OpName::src0_modifiers) >= 0)
8140 NewInstr->addOperand(Src);
8143 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8146 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8148 NewInstr.addImm(
Size);
8149 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8153 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8158 "Scalar BFE is only implemented for constant width and offset");
8166 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8167 AMDGPU::OpName::src1_modifiers) >= 0)
8169 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8171 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8172 AMDGPU::OpName::src2_modifiers) >= 0)
8174 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8176 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8178 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8180 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8186 NewInstr->addOperand(
Op);
8193 if (
Op.getReg() == AMDGPU::SCC) {
8195 if (
Op.isDef() && !
Op.isDead())
8196 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8198 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8203 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8204 Register DstReg = NewInstr->getOperand(0).getReg();
8209 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8210 MRI.replaceRegWith(DstReg, NewDstReg);
8219 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8223std::pair<bool, MachineBasicBlock *>
8235 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8238 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8240 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8241 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8249 MRI.replaceRegWith(OldDstReg, ResultReg);
8252 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8253 return std::pair(
true, NewBB);
8256 return std::pair(
false,
nullptr);
8273 bool IsSCC = (CondReg == AMDGPU::SCC);
8281 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8288 NewCondReg =
MRI.createVirtualRegister(TC);
8292 bool CopyFound =
false;
8296 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8298 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8300 .
addReg(CandI.getOperand(1).getReg());
8312 ST.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8322 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8323 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8336 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8338 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8350 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8351 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8354 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8364 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8365 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8380 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8388 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8389 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8395 bool Src0IsSGPR = Src0.
isReg() &&
8397 bool Src1IsSGPR = Src1.
isReg() &&
8400 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8401 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8411 }
else if (Src1IsSGPR) {
8425 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8429 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8435 unsigned Opcode)
const {
8445 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8446 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8458 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8459 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8464 unsigned Opcode)
const {
8474 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8475 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8487 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8488 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8506 &AMDGPU::SGPR_32RegClass;
8509 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8512 AMDGPU::sub0, Src0SubRC);
8517 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8519 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8523 AMDGPU::sub1, Src0SubRC);
8525 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8531 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8538 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8540 Worklist.
insert(&LoHalf);
8541 Worklist.
insert(&HiHalf);
8547 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8558 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8559 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8560 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8571 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8575 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8605 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8611 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8617 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8628 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8644 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8656 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8667 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8668 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8669 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8680 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8684 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8696 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8697 ? AMDGPU::V_MUL_HI_U32_e64
8698 : AMDGPU::V_MUL_HI_I32_e64;
8713 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8721 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8740 &AMDGPU::SGPR_32RegClass;
8743 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8746 &AMDGPU::SGPR_32RegClass;
8749 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8752 AMDGPU::sub0, Src0SubRC);
8754 AMDGPU::sub0, Src1SubRC);
8756 AMDGPU::sub1, Src0SubRC);
8758 AMDGPU::sub1, Src1SubRC);
8763 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8765 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8770 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8775 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8782 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8784 Worklist.
insert(&LoHalf);
8785 Worklist.
insert(&HiHalf);
8788 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8806 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8822 Register NewDest =
MRI.createVirtualRegister(DestRC);
8828 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8846 MRI.getRegClass(Src.getReg()) :
8847 &AMDGPU::SGPR_32RegClass;
8849 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8850 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8853 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8856 AMDGPU::sub0, SrcSubRC);
8858 AMDGPU::sub1, SrcSubRC);
8864 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8868 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8887 Offset == 0 &&
"Not implemented");
8890 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8891 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8892 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8909 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8910 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8915 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8916 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8920 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8923 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8928 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8929 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8950 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8951 unsigned OpcodeAdd =
8952 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8955 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8957 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8964 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8965 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8966 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8967 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8974 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8980 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8982 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8984 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8987void SIInstrInfo::addUsersToMoveToVALUWorklist(
8995 switch (
UseMI.getOpcode()) {
8998 case AMDGPU::SOFT_WQM:
8999 case AMDGPU::STRICT_WWM:
9000 case AMDGPU::STRICT_WQM:
9001 case AMDGPU::REG_SEQUENCE:
9003 case AMDGPU::INSERT_SUBREG:
9006 OpNo = MO.getOperandNo();
9021 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9028 case AMDGPU::S_PACK_LL_B32_B16: {
9029 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9030 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9047 case AMDGPU::S_PACK_LH_B32_B16: {
9048 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9057 case AMDGPU::S_PACK_HL_B32_B16: {
9058 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9068 case AMDGPU::S_PACK_HH_B32_B16: {
9069 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9070 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9087 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9088 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9097 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9098 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9106 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9110 Register DestReg =
MI.getOperand(0).getReg();
9112 MRI.replaceRegWith(DestReg, NewCond);
9117 MI.getOperand(SCCIdx).setReg(NewCond);
9123 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9126 for (
auto &Copy : CopyToDelete)
9127 Copy->eraseFromParent();
9135void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9144 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9146 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9163 case AMDGPU::REG_SEQUENCE:
9164 case AMDGPU::INSERT_SUBREG:
9166 case AMDGPU::SOFT_WQM:
9167 case AMDGPU::STRICT_WWM:
9168 case AMDGPU::STRICT_WQM: {
9176 case AMDGPU::REG_SEQUENCE:
9177 case AMDGPU::INSERT_SUBREG:
9187 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9204 int OpIndices[3])
const {
9223 for (
unsigned i = 0; i < 3; ++i) {
9224 int Idx = OpIndices[i];
9261 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9262 SGPRReg = UsedSGPRs[0];
9265 if (!SGPRReg && UsedSGPRs[1]) {
9266 if (UsedSGPRs[1] == UsedSGPRs[2])
9267 SGPRReg = UsedSGPRs[1];
9274 AMDGPU::OpName OperandName)
const {
9275 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9279 return &
MI.getOperand(
Idx);
9296 RsrcDataFormat |= (1ULL << 56);
9301 RsrcDataFormat |= (2ULL << 59);
9304 return RsrcDataFormat;
9326 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
9332 unsigned Opc =
MI.getOpcode();
9338 return get(
Opc).mayLoad() &&
9343 int &FrameIndex)
const {
9351 FrameIndex =
Addr->getIndex();
9356 int &FrameIndex)
const {
9359 FrameIndex =
Addr->getIndex();
9364 int &FrameIndex)
const {
9378 int &FrameIndex)
const {
9395 while (++
I != E &&
I->isInsideBundle()) {
9396 assert(!
I->isBundle() &&
"No nested bundle!");
9404 unsigned Opc =
MI.getOpcode();
9406 unsigned DescSize =
Desc.getSize();
9411 unsigned Size = DescSize;
9426 bool HasLiteral =
false;
9427 unsigned LiteralSize = 4;
9428 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9450 return HasLiteral ? DescSize + LiteralSize : DescSize;
9455 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9459 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9460 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9464 case TargetOpcode::BUNDLE:
9466 case TargetOpcode::INLINEASM:
9467 case TargetOpcode::INLINEASM_BR: {
9469 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9473 if (
MI.isMetaInstruction())
9477 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9480 unsigned LoInstOpcode = D16Info->LoOp;
9482 DescSize =
Desc.getSize();
9493 if (
MI.memoperands_empty())
9505 static const std::pair<int, const char *> TargetIndices[] = {
9543std::pair<unsigned, unsigned>
9550 static const std::pair<unsigned, const char *> TargetFlags[] = {
9568 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9582 return AMDGPU::WWM_COPY;
9584 return AMDGPU::COPY;
9596 bool IsNullOrVectorRegister =
true;
9604 return IsNullOrVectorRegister &&
9606 (Opcode == AMDGPU::IMPLICIT_DEF &&
9608 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9609 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9622 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
9653 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9654 case AMDGPU::SI_KILL_I1_TERMINATOR:
9663 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9664 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9665 case AMDGPU::SI_KILL_I1_PSEUDO:
9666 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9678 const unsigned OffsetBits =
9680 return (1 << OffsetBits) - 1;
9687 if (
MI.isInlineAsm())
9690 for (
auto &
Op :
MI.implicit_operands()) {
9691 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9692 Op.setReg(AMDGPU::VCC_LO);
9701 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9705 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9723 if (Imm <= MaxImm + 64) {
9725 Overflow = Imm - MaxImm;
9815std::pair<int64_t, int64_t>
9818 int64_t RemainderOffset = COffsetVal;
9819 int64_t ImmField = 0;
9824 if (AllowNegative) {
9826 int64_t
D = 1LL << NumBits;
9827 RemainderOffset = (COffsetVal /
D) *
D;
9828 ImmField = COffsetVal - RemainderOffset;
9832 (ImmField % 4) != 0) {
9834 RemainderOffset += ImmField % 4;
9835 ImmField -= ImmField % 4;
9837 }
else if (COffsetVal >= 0) {
9838 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9839 RemainderOffset = COffsetVal - ImmField;
9843 assert(RemainderOffset + ImmField == COffsetVal);
9844 return {ImmField, RemainderOffset};
9856 switch (ST.getGeneration()) {
9882 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9883 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9884 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9885 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9886 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9887 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9888 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9889 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9896#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9897 case OPCODE##_dpp: \
9898 case OPCODE##_e32: \
9899 case OPCODE##_e64: \
9900 case OPCODE##_e64_dpp: \
9915 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9916 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9917 case AMDGPU::V_FMA_F16_gfx9_e64:
9918 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9919 case AMDGPU::V_INTERP_P2_F16:
9920 case AMDGPU::V_MAD_F16_e64:
9921 case AMDGPU::V_MAD_U16_e64:
9922 case AMDGPU::V_MAD_I16_e64:
9957 if (
isMAI(Opcode)) {
10005 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10006 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10007 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10019 switch (
MI.getOpcode()) {
10021 case AMDGPU::REG_SEQUENCE:
10025 case AMDGPU::INSERT_SUBREG:
10026 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10043 if (!
P.Reg.isVirtual())
10047 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10048 while (
auto *
MI = DefInst) {
10050 switch (
MI->getOpcode()) {
10052 case AMDGPU::V_MOV_B32_e32: {
10053 auto &Op1 =
MI->getOperand(1);
10058 DefInst =
MRI.getVRegDef(RSR.Reg);
10066 DefInst =
MRI.getVRegDef(RSR.Reg);
10079 assert(
MRI.isSSA() &&
"Must be run on SSA");
10081 auto *
TRI =
MRI.getTargetRegisterInfo();
10082 auto *DefBB =
DefMI.getParent();
10086 if (
UseMI.getParent() != DefBB)
10089 const int MaxInstScan = 20;
10093 auto E =
UseMI.getIterator();
10094 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10095 if (
I->isDebugInstr())
10098 if (++NumInst > MaxInstScan)
10101 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10111 assert(
MRI.isSSA() &&
"Must be run on SSA");
10113 auto *
TRI =
MRI.getTargetRegisterInfo();
10114 auto *DefBB =
DefMI.getParent();
10116 const int MaxUseScan = 10;
10119 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10120 auto &UseInst = *
Use.getParent();
10123 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10126 if (++NumUse > MaxUseScan)
10133 const int MaxInstScan = 20;
10137 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10140 if (
I->isDebugInstr())
10143 if (++NumInst > MaxInstScan)
10156 if (Reg == VReg && --NumUse == 0)
10158 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10170 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10173 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
10182 if (InsPt !=
MBB.
end() &&
10183 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10184 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10185 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10186 InsPt->definesRegister(Src,
nullptr)) {
10190 : AMDGPU::S_MOV_B64_term),
10192 .
addReg(Src, 0, SrcSubReg)
10217 if (isFullCopyInstr(
MI)) {
10218 Register DstReg =
MI.getOperand(0).getReg();
10219 Register SrcReg =
MI.getOperand(1).getReg();
10226 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10230 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10241 unsigned *PredCost)
const {
10242 if (
MI.isBundle()) {
10245 unsigned Lat = 0, Count = 0;
10246 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10248 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10250 return Lat + Count - 1;
10253 return SchedModel.computeInstrLatency(&
MI);
10259 unsigned opcode =
MI.getOpcode();
10263 Register Src = isa<GIntrinsic>(
MI) ?
MI.getOperand(2).getReg()
10264 :
MI.getOperand(1).getReg();
10265 LLT DstTy =
MRI.getType(Dst);
10266 LLT SrcTy =
MRI.getType(Src);
10279 if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
10280 return HandleAddrSpaceCast(
MI);
10282 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
10283 auto IID = GI->getIntrinsicID();
10290 case Intrinsic::amdgcn_addrspacecast_nonnull:
10291 return HandleAddrSpaceCast(
MI);
10292 case Intrinsic::amdgcn_if:
10293 case Intrinsic::amdgcn_else:
10307 if (opcode == AMDGPU::G_LOAD) {
10308 if (
MI.memoperands_empty())
10312 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10313 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10321 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
10322 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10323 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10336 unsigned opcode =
MI.getOpcode();
10337 if (opcode == AMDGPU::V_READLANE_B32 ||
10338 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10339 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10342 if (isCopyInstr(
MI)) {
10346 RI.getPhysRegBaseClass(srcOp.
getReg());
10354 if (
MI.isPreISelOpcode())
10369 if (
MI.memoperands_empty())
10373 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10374 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10389 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10391 if (!
SrcOp.isReg())
10395 if (!Reg || !
SrcOp.readsReg())
10401 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10428 F,
"ds_ordered_count unsupported for this calling conv"));
10442 Register &SrcReg2, int64_t &CmpMask,
10443 int64_t &CmpValue)
const {
10444 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10447 switch (
MI.getOpcode()) {
10450 case AMDGPU::S_CMP_EQ_U32:
10451 case AMDGPU::S_CMP_EQ_I32:
10452 case AMDGPU::S_CMP_LG_U32:
10453 case AMDGPU::S_CMP_LG_I32:
10454 case AMDGPU::S_CMP_LT_U32:
10455 case AMDGPU::S_CMP_LT_I32:
10456 case AMDGPU::S_CMP_GT_U32:
10457 case AMDGPU::S_CMP_GT_I32:
10458 case AMDGPU::S_CMP_LE_U32:
10459 case AMDGPU::S_CMP_LE_I32:
10460 case AMDGPU::S_CMP_GE_U32:
10461 case AMDGPU::S_CMP_GE_I32:
10462 case AMDGPU::S_CMP_EQ_U64:
10463 case AMDGPU::S_CMP_LG_U64:
10464 SrcReg =
MI.getOperand(0).getReg();
10465 if (
MI.getOperand(1).isReg()) {
10466 if (
MI.getOperand(1).getSubReg())
10468 SrcReg2 =
MI.getOperand(1).getReg();
10470 }
else if (
MI.getOperand(1).isImm()) {
10472 CmpValue =
MI.getOperand(1).getImm();
10478 case AMDGPU::S_CMPK_EQ_U32:
10479 case AMDGPU::S_CMPK_EQ_I32:
10480 case AMDGPU::S_CMPK_LG_U32:
10481 case AMDGPU::S_CMPK_LG_I32:
10482 case AMDGPU::S_CMPK_LT_U32:
10483 case AMDGPU::S_CMPK_LT_I32:
10484 case AMDGPU::S_CMPK_GT_U32:
10485 case AMDGPU::S_CMPK_GT_I32:
10486 case AMDGPU::S_CMPK_LE_U32:
10487 case AMDGPU::S_CMPK_LE_I32:
10488 case AMDGPU::S_CMPK_GE_U32:
10489 case AMDGPU::S_CMPK_GE_I32:
10490 SrcReg =
MI.getOperand(0).getReg();
10492 CmpValue =
MI.getOperand(1).getImm();
10501 Register SrcReg2, int64_t CmpMask,
10510 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10511 this](int64_t ExpectedValue,
unsigned SrcSize,
10512 bool IsReversible,
bool IsSigned) ->
bool {
10537 if (!Def || Def->getParent() != CmpInstr.
getParent())
10540 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10541 Def->getOpcode() != AMDGPU::S_AND_B64)
10545 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10556 SrcOp = &Def->getOperand(2);
10557 else if (isMask(&Def->getOperand(2)))
10558 SrcOp = &Def->getOperand(1);
10564 assert(llvm::has_single_bit<uint64_t>(Mask) &&
"Invalid mask.");
10566 if (IsSigned && BitNo == SrcSize - 1)
10569 ExpectedValue <<= BitNo;
10571 bool IsReversedCC =
false;
10572 if (CmpValue != ExpectedValue) {
10575 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10580 Register DefReg = Def->getOperand(0).getReg();
10581 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10584 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10586 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10587 I->killsRegister(AMDGPU::SCC, &RI))
10592 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10596 if (!
MRI->use_nodbg_empty(DefReg)) {
10604 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10605 : AMDGPU::S_BITCMP1_B32
10606 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10607 : AMDGPU::S_BITCMP1_B64;
10612 Def->eraseFromParent();
10620 case AMDGPU::S_CMP_EQ_U32:
10621 case AMDGPU::S_CMP_EQ_I32:
10622 case AMDGPU::S_CMPK_EQ_U32:
10623 case AMDGPU::S_CMPK_EQ_I32:
10624 return optimizeCmpAnd(1, 32,
true,
false);
10625 case AMDGPU::S_CMP_GE_U32:
10626 case AMDGPU::S_CMPK_GE_U32:
10627 return optimizeCmpAnd(1, 32,
false,
false);
10628 case AMDGPU::S_CMP_GE_I32:
10629 case AMDGPU::S_CMPK_GE_I32:
10630 return optimizeCmpAnd(1, 32,
false,
true);
10631 case AMDGPU::S_CMP_EQ_U64:
10632 return optimizeCmpAnd(1, 64,
true,
false);
10633 case AMDGPU::S_CMP_LG_U32:
10634 case AMDGPU::S_CMP_LG_I32:
10635 case AMDGPU::S_CMPK_LG_U32:
10636 case AMDGPU::S_CMPK_LG_I32:
10637 return optimizeCmpAnd(0, 32,
true,
false);
10638 case AMDGPU::S_CMP_GT_U32:
10639 case AMDGPU::S_CMPK_GT_U32:
10640 return optimizeCmpAnd(0, 32,
false,
false);
10641 case AMDGPU::S_CMP_GT_I32:
10642 case AMDGPU::S_CMPK_GT_I32:
10643 return optimizeCmpAnd(0, 32,
false,
true);
10644 case AMDGPU::S_CMP_LG_U64:
10645 return optimizeCmpAnd(0, 64,
true,
false);
10652 AMDGPU::OpName
OpName)
const {
10656 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10670 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10673 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10674 : &AMDGPU::VReg_64_Align2RegClass);
10676 .
addReg(DataReg, 0,
Op.getSubReg())
10681 Op.setSubReg(AMDGPU::sub0);
10703 unsigned Opcode =
MI.getOpcode();
10709 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10710 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool hasBF16PackedInsts() const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasFlatGVSMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasFmaakFmamkF64Insts() const
bool hasScaleOffset() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasVALUMaskWriteHazard() const
bool hasGFX1250Insts() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
bool hasAddPC64Inst() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool has64BitLiterals() const
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
bool hasVALUReadSGPRHazard() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasVectorMulU64() const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
bool isGenericType() const
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
bool isVectorSuperClass(const TargetRegisterClass *RC) const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.