32#include "llvm/IR/IntrinsicsAMDGPU.h"
39#define DEBUG_TYPE "si-instr-info"
41#define GET_INSTRINFO_CTOR_DTOR
42#include "AMDGPUGenInstrInfo.inc"
45#define GET_D16ImageDimIntrinsics_IMPL
46#define GET_ImageDimIntrinsicTable_IMPL
47#define GET_RsrcIntrinsics_IMPL
48#include "AMDGPUGenSearchableTables.inc"
56 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
59 "amdgpu-fix-16-bit-physreg-copies",
60 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
75 unsigned N =
Node->getNumOperands();
76 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
88 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
89 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
91 if (Op0Idx == -1 && Op1Idx == -1)
95 if ((Op0Idx == -1 && Op1Idx != -1) ||
96 (Op1Idx == -1 && Op0Idx != -1))
117 return !
MI.memoperands_empty() &&
119 return MMO->isLoad() && MMO->isInvariant();
141 if (!
MI.hasImplicitDef() &&
142 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
143 !
MI.mayRaiseFPException())
151bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
154 if (
MI.isCompare()) {
165 switch (
Use.getOpcode()) {
166 case AMDGPU::S_AND_SAVEEXEC_B32:
167 case AMDGPU::S_AND_SAVEEXEC_B64:
169 case AMDGPU::S_AND_B32:
170 case AMDGPU::S_AND_B64:
171 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
181 switch (
MI.getOpcode()) {
184 case AMDGPU::V_READFIRSTLANE_B32:
201 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
206 for (
auto Op :
MI.uses()) {
207 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
213 if (FromCycle ==
nullptr)
219 while (FromCycle && !FromCycle->
contains(ToCycle)) {
239 int64_t &Offset1)
const {
247 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
251 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
267 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
268 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
269 if (Offset0Idx == -1 || Offset1Idx == -1)
276 Offset0Idx -=
get(Opc0).NumDefs;
277 Offset1Idx -=
get(Opc1).NumDefs;
298 assert(NumOps == 4 || NumOps == 5);
303 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
305 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
307 if (!Load0Offset || !Load1Offset)
324 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
325 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
327 if (OffIdx0 == -1 || OffIdx1 == -1)
333 OffIdx0 -=
get(Opc0).NumDefs;
334 OffIdx1 -=
get(Opc1).NumDefs;
340 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
353 case AMDGPU::DS_READ2ST64_B32:
354 case AMDGPU::DS_READ2ST64_B64:
355 case AMDGPU::DS_WRITE2ST64_B32:
356 case AMDGPU::DS_WRITE2ST64_B64:
371 OffsetIsScalable =
false;
388 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
391 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
404 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
405 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
406 if (Offset0 + 1 != Offset1)
417 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
425 Offset = EltSize * Offset0;
427 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
428 if (DataOpIdx == -1) {
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
447 if (BaseOp && !BaseOp->
isFI())
455 if (SOffset->
isReg())
461 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
472 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
473 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
475 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
476 if (VAddr0Idx >= 0) {
478 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
485 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
500 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
517 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
536 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
544 if (MO1->getAddrSpace() != MO2->getAddrSpace())
547 const auto *Base1 = MO1->getValue();
548 const auto *Base2 = MO2->getValue();
549 if (!Base1 || !Base2)
554 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
557 return Base1 == Base2;
561 int64_t Offset1,
bool OffsetIsScalable1,
563 int64_t Offset2,
bool OffsetIsScalable2,
564 unsigned ClusterSize,
565 unsigned NumBytes)
const {
578 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
597 const unsigned LoadSize = NumBytes / ClusterSize;
598 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
599 return NumDWords <= MaxMemoryClusterDWords;
613 int64_t Offset0, int64_t Offset1,
614 unsigned NumLoads)
const {
615 assert(Offset1 > Offset0 &&
616 "Second offset should be larger than first offset!");
621 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
628 const char *Msg =
"illegal VGPR to SGPR copy") {
649 assert((
TII.getSubtarget().hasMAIInsts() &&
650 !
TII.getSubtarget().hasGFX90AInsts()) &&
651 "Expected GFX908 subtarget.");
654 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
655 "Source register of the copy should be either an SGPR or an AGPR.");
658 "Destination register of the copy should be an AGPR.");
667 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
670 if (!Def->modifiesRegister(SrcReg, &RI))
673 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
674 Def->getOperand(0).getReg() != SrcReg)
681 bool SafeToPropagate =
true;
684 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
685 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
686 SafeToPropagate =
false;
688 if (!SafeToPropagate)
691 for (
auto I = Def;
I !=
MI; ++
I)
692 I->clearRegisterKills(DefOp.
getReg(), &RI);
701 if (ImpUseSuperReg) {
702 Builder.
addReg(ImpUseSuperReg,
720 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
724 "VGPR used for an intermediate copy should have been reserved.");
739 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
740 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
741 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
748 if (ImpUseSuperReg) {
749 UseBuilder.
addReg(ImpUseSuperReg,
771 int16_t SubIdx = BaseIndices[
Idx];
772 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
773 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
774 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
775 unsigned Opcode = AMDGPU::S_MOV_B32;
778 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
779 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
780 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
784 DestSubReg = RI.getSubReg(DestReg, SubIdx);
785 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
786 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
787 Opcode = AMDGPU::S_MOV_B64;
802 assert(FirstMI && LastMI);
810 LastMI->addRegisterKilled(SrcReg, &RI);
816 Register SrcReg,
bool KillSrc,
bool RenamableDest,
817 bool RenamableSrc)
const {
819 unsigned Size = RI.getRegSizeInBits(*RC);
821 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
827 if (((
Size == 16) != (SrcSize == 16))) {
834 if (DestReg == SrcReg) {
840 RC = RI.getPhysRegBaseClass(DestReg);
841 Size = RI.getRegSizeInBits(*RC);
842 SrcRC = RI.getPhysRegBaseClass(SrcReg);
843 SrcSize = RI.getRegSizeInBits(*SrcRC);
847 if (RC == &AMDGPU::VGPR_32RegClass) {
849 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
850 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
851 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
852 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
858 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
859 RC == &AMDGPU::SReg_32RegClass) {
860 if (SrcReg == AMDGPU::SCC) {
867 if (DestReg == AMDGPU::VCC_LO) {
868 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
882 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
892 if (RC == &AMDGPU::SReg_64RegClass) {
893 if (SrcReg == AMDGPU::SCC) {
900 if (DestReg == AMDGPU::VCC) {
901 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
915 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
925 if (DestReg == AMDGPU::SCC) {
928 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
946 if (RC == &AMDGPU::AGPR_32RegClass) {
947 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
948 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
963 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
970 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
971 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
973 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
974 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
975 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
976 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
993 if (IsAGPRDst || IsAGPRSrc) {
994 if (!DstLow || !SrcLow) {
996 "Cannot use hi16 subreg with an AGPR!");
1009 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1010 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1023 if (!DstLow || !SrcLow) {
1025 "Cannot use hi16 subreg on VI!");
1076 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1082 unsigned EltSize = 4;
1083 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1086 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1089 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1091 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1093 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1099 Opcode = AMDGPU::V_MOV_B64_e32;
1102 Opcode = AMDGPU::V_PK_MOV_B32;
1112 std::unique_ptr<RegScavenger> RS;
1113 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1114 RS = std::make_unique<RegScavenger>();
1120 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1121 const bool CanKillSuperReg = KillSrc && !Overlap;
1126 SubIdx = SubIndices[
Idx];
1128 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1129 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1130 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1131 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1133 bool IsFirstSubreg =
Idx == 0;
1134 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1136 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1140 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1141 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1187 return &AMDGPU::VGPR_32RegClass;
1198 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1199 "Not a VGPR32 reg");
1201 if (
Cond.size() == 1) {
1202 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1211 }
else if (
Cond.size() == 2) {
1212 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1214 case SIInstrInfo::SCC_TRUE: {
1215 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1217 : AMDGPU::S_CSELECT_B64), SReg)
1228 case SIInstrInfo::SCC_FALSE: {
1229 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1231 : AMDGPU::S_CSELECT_B64), SReg)
1242 case SIInstrInfo::VCCNZ: {
1245 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1256 case SIInstrInfo::VCCZ: {
1259 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 case SIInstrInfo::EXECNZ: {
1271 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1274 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1277 : AMDGPU::S_CSELECT_B64), SReg)
1288 case SIInstrInfo::EXECZ: {
1289 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1292 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1295 : AMDGPU::S_CSELECT_B64), SReg)
1343 int64_t &ImmVal)
const {
1344 switch (
MI.getOpcode()) {
1345 case AMDGPU::V_MOV_B32_e32:
1346 case AMDGPU::S_MOV_B32:
1347 case AMDGPU::S_MOVK_I32:
1348 case AMDGPU::S_MOV_B64:
1349 case AMDGPU::V_MOV_B64_e32:
1350 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1351 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1352 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1353 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1354 case AMDGPU::V_MOV_B64_PSEUDO: {
1358 return MI.getOperand(0).getReg() == Reg;
1363 case AMDGPU::S_BREV_B32:
1364 case AMDGPU::V_BFREV_B32_e32:
1365 case AMDGPU::V_BFREV_B32_e64: {
1368 ImmVal =
static_cast<int64_t
>(reverseBits<int32_t>(Src0.
getImm()));
1369 return MI.getOperand(0).getReg() == Reg;
1374 case AMDGPU::S_NOT_B32:
1375 case AMDGPU::V_NOT_B32_e32:
1376 case AMDGPU::V_NOT_B32_e64: {
1379 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1380 return MI.getOperand(0).getReg() == Reg;
1393 return AMDGPU::COPY;
1394 if (RI.getRegSizeInBits(*DstRC) == 16) {
1397 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1399 if (RI.getRegSizeInBits(*DstRC) == 32)
1400 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1401 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1402 return AMDGPU::S_MOV_B64;
1403 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1404 return AMDGPU::V_MOV_B64_PSEUDO;
1405 return AMDGPU::COPY;
1410 bool IsIndirectSrc)
const {
1411 if (IsIndirectSrc) {
1413 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1415 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1417 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1419 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1423 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1425 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1427 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1429 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1431 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1433 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1434 if (VecSize <= 1024)
1435 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1441 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1443 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1445 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1447 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1449 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1451 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1453 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1455 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1457 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1459 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1461 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1462 if (VecSize <= 1024)
1463 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1470 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1472 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1474 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1476 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1478 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1480 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1482 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1484 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1486 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1488 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1490 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1491 if (VecSize <= 1024)
1492 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1499 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1501 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1503 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1505 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1507 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1509 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1511 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1513 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1515 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1517 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1520 if (VecSize <= 1024)
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1528 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1530 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1532 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1534 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1535 if (VecSize <= 1024)
1536 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1543 bool IsSGPR)
const {
1555 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1562 return AMDGPU::SI_SPILL_S32_SAVE;
1564 return AMDGPU::SI_SPILL_S64_SAVE;
1566 return AMDGPU::SI_SPILL_S96_SAVE;
1568 return AMDGPU::SI_SPILL_S128_SAVE;
1570 return AMDGPU::SI_SPILL_S160_SAVE;
1572 return AMDGPU::SI_SPILL_S192_SAVE;
1574 return AMDGPU::SI_SPILL_S224_SAVE;
1576 return AMDGPU::SI_SPILL_S256_SAVE;
1578 return AMDGPU::SI_SPILL_S288_SAVE;
1580 return AMDGPU::SI_SPILL_S320_SAVE;
1582 return AMDGPU::SI_SPILL_S352_SAVE;
1584 return AMDGPU::SI_SPILL_S384_SAVE;
1586 return AMDGPU::SI_SPILL_S512_SAVE;
1588 return AMDGPU::SI_SPILL_S1024_SAVE;
1597 return AMDGPU::SI_SPILL_V16_SAVE;
1599 return AMDGPU::SI_SPILL_V32_SAVE;
1601 return AMDGPU::SI_SPILL_V64_SAVE;
1603 return AMDGPU::SI_SPILL_V96_SAVE;
1605 return AMDGPU::SI_SPILL_V128_SAVE;
1607 return AMDGPU::SI_SPILL_V160_SAVE;
1609 return AMDGPU::SI_SPILL_V192_SAVE;
1611 return AMDGPU::SI_SPILL_V224_SAVE;
1613 return AMDGPU::SI_SPILL_V256_SAVE;
1615 return AMDGPU::SI_SPILL_V288_SAVE;
1617 return AMDGPU::SI_SPILL_V320_SAVE;
1619 return AMDGPU::SI_SPILL_V352_SAVE;
1621 return AMDGPU::SI_SPILL_V384_SAVE;
1623 return AMDGPU::SI_SPILL_V512_SAVE;
1625 return AMDGPU::SI_SPILL_V1024_SAVE;
1634 return AMDGPU::SI_SPILL_AV32_SAVE;
1636 return AMDGPU::SI_SPILL_AV64_SAVE;
1638 return AMDGPU::SI_SPILL_AV96_SAVE;
1640 return AMDGPU::SI_SPILL_AV128_SAVE;
1642 return AMDGPU::SI_SPILL_AV160_SAVE;
1644 return AMDGPU::SI_SPILL_AV192_SAVE;
1646 return AMDGPU::SI_SPILL_AV224_SAVE;
1648 return AMDGPU::SI_SPILL_AV256_SAVE;
1650 return AMDGPU::SI_SPILL_AV288_SAVE;
1652 return AMDGPU::SI_SPILL_AV320_SAVE;
1654 return AMDGPU::SI_SPILL_AV352_SAVE;
1656 return AMDGPU::SI_SPILL_AV384_SAVE;
1658 return AMDGPU::SI_SPILL_AV512_SAVE;
1660 return AMDGPU::SI_SPILL_AV1024_SAVE;
1667 bool IsVectorSuperClass) {
1672 if (IsVectorSuperClass)
1673 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1675 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1708 FrameInfo.getObjectAlign(FrameIndex));
1709 unsigned SpillSize =
TRI->getSpillSize(*RC);
1714 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1715 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1716 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1724 if (SrcReg.
isVirtual() && SpillSize == 4) {
1725 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1754 return AMDGPU::SI_SPILL_S32_RESTORE;
1756 return AMDGPU::SI_SPILL_S64_RESTORE;
1758 return AMDGPU::SI_SPILL_S96_RESTORE;
1760 return AMDGPU::SI_SPILL_S128_RESTORE;
1762 return AMDGPU::SI_SPILL_S160_RESTORE;
1764 return AMDGPU::SI_SPILL_S192_RESTORE;
1766 return AMDGPU::SI_SPILL_S224_RESTORE;
1768 return AMDGPU::SI_SPILL_S256_RESTORE;
1770 return AMDGPU::SI_SPILL_S288_RESTORE;
1772 return AMDGPU::SI_SPILL_S320_RESTORE;
1774 return AMDGPU::SI_SPILL_S352_RESTORE;
1776 return AMDGPU::SI_SPILL_S384_RESTORE;
1778 return AMDGPU::SI_SPILL_S512_RESTORE;
1780 return AMDGPU::SI_SPILL_S1024_RESTORE;
1789 return AMDGPU::SI_SPILL_V16_RESTORE;
1791 return AMDGPU::SI_SPILL_V32_RESTORE;
1793 return AMDGPU::SI_SPILL_V64_RESTORE;
1795 return AMDGPU::SI_SPILL_V96_RESTORE;
1797 return AMDGPU::SI_SPILL_V128_RESTORE;
1799 return AMDGPU::SI_SPILL_V160_RESTORE;
1801 return AMDGPU::SI_SPILL_V192_RESTORE;
1803 return AMDGPU::SI_SPILL_V224_RESTORE;
1805 return AMDGPU::SI_SPILL_V256_RESTORE;
1807 return AMDGPU::SI_SPILL_V288_RESTORE;
1809 return AMDGPU::SI_SPILL_V320_RESTORE;
1811 return AMDGPU::SI_SPILL_V352_RESTORE;
1813 return AMDGPU::SI_SPILL_V384_RESTORE;
1815 return AMDGPU::SI_SPILL_V512_RESTORE;
1817 return AMDGPU::SI_SPILL_V1024_RESTORE;
1826 return AMDGPU::SI_SPILL_AV32_RESTORE;
1828 return AMDGPU::SI_SPILL_AV64_RESTORE;
1830 return AMDGPU::SI_SPILL_AV96_RESTORE;
1832 return AMDGPU::SI_SPILL_AV128_RESTORE;
1834 return AMDGPU::SI_SPILL_AV160_RESTORE;
1836 return AMDGPU::SI_SPILL_AV192_RESTORE;
1838 return AMDGPU::SI_SPILL_AV224_RESTORE;
1840 return AMDGPU::SI_SPILL_AV256_RESTORE;
1842 return AMDGPU::SI_SPILL_AV288_RESTORE;
1844 return AMDGPU::SI_SPILL_AV320_RESTORE;
1846 return AMDGPU::SI_SPILL_AV352_RESTORE;
1848 return AMDGPU::SI_SPILL_AV384_RESTORE;
1850 return AMDGPU::SI_SPILL_AV512_RESTORE;
1852 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1859 bool IsVectorSuperClass) {
1864 if (IsVectorSuperClass)
1865 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1867 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1898 unsigned SpillSize =
TRI->getSpillSize(*RC);
1905 FrameInfo.getObjectAlign(FrameIndex));
1909 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1910 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1911 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1916 if (DestReg.
isVirtual() && SpillSize == 4) {
1918 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1947 unsigned Quantity)
const {
1949 while (Quantity > 0) {
1950 unsigned Arg = std::min(Quantity, 8u);
1964 if (HasNoTerminator) {
1965 if (
Info->returnsVoid()) {
1979 constexpr unsigned DoorbellIDMask = 0x3ff;
1980 constexpr unsigned ECQueueWaveAbort = 0x400;
1998 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2002 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2005 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2006 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2010 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2011 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2012 .
addUse(DoorbellRegMasked)
2013 .
addImm(ECQueueWaveAbort);
2014 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2015 .
addUse(SetWaveAbortBit);
2018 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2033 switch (
MI.getOpcode()) {
2035 if (
MI.isMetaInstruction())
2040 return MI.getOperand(0).getImm() + 1;
2049 switch (
MI.getOpcode()) {
2051 case AMDGPU::S_MOV_B64_term:
2054 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2057 case AMDGPU::S_MOV_B32_term:
2060 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2063 case AMDGPU::S_XOR_B64_term:
2066 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2069 case AMDGPU::S_XOR_B32_term:
2072 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2074 case AMDGPU::S_OR_B64_term:
2077 MI.setDesc(
get(AMDGPU::S_OR_B64));
2079 case AMDGPU::S_OR_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_OR_B32));
2085 case AMDGPU::S_ANDN2_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2091 case AMDGPU::S_ANDN2_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2097 case AMDGPU::S_AND_B64_term:
2100 MI.setDesc(
get(AMDGPU::S_AND_B64));
2103 case AMDGPU::S_AND_B32_term:
2106 MI.setDesc(
get(AMDGPU::S_AND_B32));
2109 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2112 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2115 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2118 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2121 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2122 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2125 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2126 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2127 MI.getMF()->getRegInfo().constrainRegClass(
MI.getOperand(0).getReg(),
2128 &AMDGPU::SReg_32_XM0RegClass);
2130 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2134 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2137 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2140 int64_t Imm =
MI.getOperand(1).getImm();
2142 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2143 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2145 .
addImm(SignExtend64<32>(Imm))
2148 .
addImm(SignExtend64<32>(Imm >> 32))
2150 MI.eraseFromParent();
2156 case AMDGPU::V_MOV_B64_PSEUDO: {
2158 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2159 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2165 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2170 if (
SrcOp.isImm()) {
2172 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2173 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2216 MI.eraseFromParent();
2219 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2223 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2228 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2234 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2239 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2240 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2242 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2243 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2250 MI.eraseFromParent();
2253 case AMDGPU::V_SET_INACTIVE_B32: {
2257 .
add(
MI.getOperand(3))
2258 .
add(
MI.getOperand(4))
2259 .
add(
MI.getOperand(1))
2260 .
add(
MI.getOperand(2))
2261 .
add(
MI.getOperand(5));
2262 MI.eraseFromParent();
2265 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2266 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2267 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2268 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2269 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2270 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2271 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2272 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2273 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2274 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2275 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2276 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2277 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2278 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2279 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2280 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2281 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2282 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2283 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2284 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2285 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2286 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2287 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2288 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2289 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2290 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2291 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2292 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2293 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2298 Opc = AMDGPU::V_MOVRELD_B32_e32;
2300 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2301 : AMDGPU::S_MOVRELD_B32;
2306 bool IsUndef =
MI.getOperand(1).isUndef();
2307 unsigned SubReg =
MI.getOperand(3).getImm();
2308 assert(VecReg ==
MI.getOperand(1).getReg());
2313 .
add(
MI.getOperand(2))
2317 const int ImpDefIdx =
2319 const int ImpUseIdx = ImpDefIdx + 1;
2321 MI.eraseFromParent();
2324 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2325 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2326 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2327 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2328 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2329 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2330 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2331 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2332 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2333 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2334 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2335 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2338 bool IsUndef =
MI.getOperand(1).isUndef();
2347 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2351 .
add(
MI.getOperand(2))
2356 const int ImpDefIdx =
2358 const int ImpUseIdx = ImpDefIdx + 1;
2365 MI.eraseFromParent();
2368 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2369 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2370 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2371 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2372 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2373 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2374 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2375 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2376 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2377 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2378 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2379 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2383 bool IsUndef =
MI.getOperand(1).isUndef();
2401 MI.eraseFromParent();
2404 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2407 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2408 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2431 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2438 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2448 MI.eraseFromParent();
2451 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2461 Op.setOffset(
Op.getOffset() + 4);
2463 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2467 MI.eraseFromParent();
2470 case AMDGPU::ENTER_STRICT_WWM: {
2474 : AMDGPU::S_OR_SAVEEXEC_B64));
2477 case AMDGPU::ENTER_STRICT_WQM: {
2480 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2481 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2482 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2486 MI.eraseFromParent();
2489 case AMDGPU::EXIT_STRICT_WWM:
2490 case AMDGPU::EXIT_STRICT_WQM: {
2493 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2496 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
2497 case AMDGPU::SI_RETURN: {
2511 MI.eraseFromParent();
2515 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2516 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2517 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2520 case AMDGPU::S_GETPC_B64_pseudo:
2521 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2524 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2533 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2535 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2559 case AMDGPU::S_LOAD_DWORDX16_IMM:
2560 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2573 for (
auto &CandMO :
I->operands()) {
2574 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2582 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2590 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2592 unsigned NewOpcode = -1;
2593 if (SubregSize == 256)
2594 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2595 else if (SubregSize == 128)
2596 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2603 MRI.setRegClass(DestReg, NewRC);
2606 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2611 MI->getOperand(0).setReg(DestReg);
2612 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2616 OffsetMO->
setImm(FinalOffset);
2622 MI->setMemRefs(*MF, NewMMOs);
2635std::pair<MachineInstr*, MachineInstr*>
2637 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2639 if (ST.
hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2642 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2643 return std::pair(&
MI,
nullptr);
2654 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2656 if (Dst.isPhysical()) {
2657 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2660 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2664 for (
unsigned I = 1;
I <= 2; ++
I) {
2667 if (
SrcOp.isImm()) {
2669 Imm.ashrInPlace(Part * 32);
2670 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2674 if (Src.isPhysical())
2675 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2682 MovDPP.addImm(MO.getImm());
2684 Split[Part] = MovDPP;
2688 if (Dst.isVirtual())
2695 MI.eraseFromParent();
2696 return std::pair(Split[0], Split[1]);
2699std::optional<DestSourcePair>
2701 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2704 return std::nullopt;
2708 AMDGPU::OpName Src0OpName,
2710 AMDGPU::OpName Src1OpName)
const {
2717 "All commutable instructions have both src0 and src1 modifiers");
2719 int Src0ModsVal = Src0Mods->
getImm();
2720 int Src1ModsVal = Src1Mods->
getImm();
2722 Src1Mods->
setImm(Src0ModsVal);
2723 Src0Mods->
setImm(Src1ModsVal);
2732 bool IsKill = RegOp.
isKill();
2734 bool IsUndef = RegOp.
isUndef();
2735 bool IsDebug = RegOp.
isDebug();
2737 if (NonRegOp.
isImm())
2739 else if (NonRegOp.
isFI())
2760 int64_t NonRegVal = NonRegOp1.
getImm();
2763 NonRegOp2.
setImm(NonRegVal);
2770 unsigned OpIdx1)
const {
2775 unsigned Opc =
MI.getOpcode();
2776 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2786 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2789 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2794 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2800 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2815 unsigned Src1Idx)
const {
2816 assert(!NewMI &&
"this should never be used");
2818 unsigned Opc =
MI.getOpcode();
2820 if (CommutedOpcode == -1)
2823 if (Src0Idx > Src1Idx)
2826 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2827 static_cast<int>(Src0Idx) &&
2828 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2829 static_cast<int>(Src1Idx) &&
2830 "inconsistency with findCommutedOpIndices");
2855 Src1, AMDGPU::OpName::src1_modifiers);
2858 AMDGPU::OpName::src1_sel);
2870 unsigned &SrcOpIdx0,
2871 unsigned &SrcOpIdx1)
const {
2876 unsigned &SrcOpIdx0,
2877 unsigned &SrcOpIdx1)
const {
2878 if (!
Desc.isCommutable())
2881 unsigned Opc =
Desc.getOpcode();
2882 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2886 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2890 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2894 int64_t BrOffset)
const {
2911 return MI.getOperand(0).getMBB();
2916 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2917 MI.getOpcode() == AMDGPU::SI_LOOP)
2929 "new block should be inserted for expanding unconditional branch");
2932 "restore block should be inserted for restoring clobbered registers");
2942 MCCtx.createTempSymbol(
"offset",
true);
2946 MCCtx.createTempSymbol(
"post_addpc",
true);
2947 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2951 Offset->setVariableValue(OffsetExpr);
2955 assert(RS &&
"RegScavenger required for long branching");
2959 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2965 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2966 if (FlushSGPRWrites)
2974 ApplyHazardWorkarounds();
2977 MCCtx.createTempSymbol(
"post_getpc",
true);
2981 MCCtx.createTempSymbol(
"offset_lo",
true);
2983 MCCtx.createTempSymbol(
"offset_hi",
true);
2986 .
addReg(PCReg, 0, AMDGPU::sub0)
2990 .
addReg(PCReg, 0, AMDGPU::sub1)
2992 ApplyHazardWorkarounds();
3033 if (LongBranchReservedReg) {
3035 Scav = LongBranchReservedReg;
3044 MRI.replaceRegWith(PCReg, Scav);
3045 MRI.clearVirtRegs();
3051 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3052 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3053 MRI.clearVirtRegs();
3068unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3070 case SIInstrInfo::SCC_TRUE:
3071 return AMDGPU::S_CBRANCH_SCC1;
3072 case SIInstrInfo::SCC_FALSE:
3073 return AMDGPU::S_CBRANCH_SCC0;
3074 case SIInstrInfo::VCCNZ:
3075 return AMDGPU::S_CBRANCH_VCCNZ;
3076 case SIInstrInfo::VCCZ:
3077 return AMDGPU::S_CBRANCH_VCCZ;
3078 case SIInstrInfo::EXECNZ:
3079 return AMDGPU::S_CBRANCH_EXECNZ;
3080 case SIInstrInfo::EXECZ:
3081 return AMDGPU::S_CBRANCH_EXECZ;
3087SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3089 case AMDGPU::S_CBRANCH_SCC0:
3091 case AMDGPU::S_CBRANCH_SCC1:
3093 case AMDGPU::S_CBRANCH_VCCNZ:
3095 case AMDGPU::S_CBRANCH_VCCZ:
3097 case AMDGPU::S_CBRANCH_EXECNZ:
3099 case AMDGPU::S_CBRANCH_EXECZ:
3111 bool AllowModify)
const {
3112 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3114 TBB =
I->getOperand(0).getMBB();
3118 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3119 if (Pred == INVALID_BR)
3124 Cond.push_back(
I->getOperand(1));
3134 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3136 FBB =
I->getOperand(0).getMBB();
3146 bool AllowModify)
const {
3154 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3155 switch (
I->getOpcode()) {
3156 case AMDGPU::S_MOV_B64_term:
3157 case AMDGPU::S_XOR_B64_term:
3158 case AMDGPU::S_OR_B64_term:
3159 case AMDGPU::S_ANDN2_B64_term:
3160 case AMDGPU::S_AND_B64_term:
3161 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3162 case AMDGPU::S_MOV_B32_term:
3163 case AMDGPU::S_XOR_B32_term:
3164 case AMDGPU::S_OR_B32_term:
3165 case AMDGPU::S_ANDN2_B32_term:
3166 case AMDGPU::S_AND_B32_term:
3167 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3170 case AMDGPU::SI_ELSE:
3171 case AMDGPU::SI_KILL_I1_TERMINATOR:
3172 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3189 int *BytesRemoved)
const {
3191 unsigned RemovedSize = 0;
3194 if (
MI.isBranch() ||
MI.isReturn()) {
3196 MI.eraseFromParent();
3202 *BytesRemoved = RemovedSize;
3219 int *BytesAdded)
const {
3220 if (!FBB &&
Cond.empty()) {
3231 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3268 if (
Cond.size() != 2) {
3272 if (
Cond[0].isImm()) {
3283 Register FalseReg,
int &CondCycles,
3284 int &TrueCycles,
int &FalseCycles)
const {
3290 if (
MRI.getRegClass(FalseReg) != RC)
3294 CondCycles = TrueCycles = FalseCycles = NumInsts;
3297 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3305 if (
MRI.getRegClass(FalseReg) != RC)
3311 if (NumInsts % 2 == 0)
3314 CondCycles = TrueCycles = FalseCycles = NumInsts;
3326 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3327 if (Pred == VCCZ || Pred == SCC_FALSE) {
3328 Pred =
static_cast<BranchPredicate
>(-Pred);
3334 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3336 if (DstSize == 32) {
3338 if (Pred == SCC_TRUE) {
3353 if (DstSize == 64 && Pred == SCC_TRUE) {
3363 static const int16_t Sub0_15[] = {
3364 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3365 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3366 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3367 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3370 static const int16_t Sub0_15_64[] = {
3371 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3372 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3373 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3374 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3377 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3379 const int16_t *SubIndices = Sub0_15;
3380 int NElts = DstSize / 32;
3384 if (Pred == SCC_TRUE) {
3386 SelOp = AMDGPU::S_CSELECT_B32;
3387 EltRC = &AMDGPU::SGPR_32RegClass;
3389 SelOp = AMDGPU::S_CSELECT_B64;
3390 EltRC = &AMDGPU::SGPR_64RegClass;
3391 SubIndices = Sub0_15_64;
3397 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3402 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3403 Register DstElt =
MRI.createVirtualRegister(EltRC);
3406 unsigned SubIdx = SubIndices[
Idx];
3409 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3412 .
addReg(FalseReg, 0, SubIdx)
3413 .
addReg(TrueReg, 0, SubIdx);
3417 .
addReg(TrueReg, 0, SubIdx)
3418 .
addReg(FalseReg, 0, SubIdx);
3430 switch (
MI.getOpcode()) {
3431 case AMDGPU::V_MOV_B16_t16_e32:
3432 case AMDGPU::V_MOV_B16_t16_e64:
3433 case AMDGPU::V_MOV_B32_e32:
3434 case AMDGPU::V_MOV_B32_e64:
3435 case AMDGPU::V_MOV_B64_PSEUDO:
3436 case AMDGPU::V_MOV_B64_e32:
3437 case AMDGPU::V_MOV_B64_e64:
3438 case AMDGPU::S_MOV_B32:
3439 case AMDGPU::S_MOV_B64:
3440 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3442 case AMDGPU::WWM_COPY:
3443 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3444 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3445 case AMDGPU::V_ACCVGPR_MOV_B32:
3446 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3448 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3459 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3460 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3461 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3464 unsigned Opc =
MI.getOpcode();
3466 int Idx = AMDGPU::getNamedOperandIdx(
Opc,
Name);
3468 MI.removeOperand(
Idx);
3473 unsigned SubRegIndex) {
3474 switch (SubRegIndex) {
3475 case AMDGPU::NoSubRegister:
3478 return SignExtend64<32>(Imm);
3480 return SignExtend64<32>(Imm >> 32);
3482 return SignExtend64<16>(Imm);
3484 return SignExtend64<16>(Imm >> 16);
3485 case AMDGPU::sub1_lo16:
3486 return SignExtend64<16>(Imm >> 32);
3487 case AMDGPU::sub1_hi16:
3488 return SignExtend64<16>(Imm >> 48);
3490 return std::nullopt;
3498 case AMDGPU::V_MAC_F16_e32:
3499 case AMDGPU::V_MAC_F16_e64:
3500 case AMDGPU::V_MAD_F16_e64:
3501 return AMDGPU::V_MADAK_F16;
3502 case AMDGPU::V_MAC_F32_e32:
3503 case AMDGPU::V_MAC_F32_e64:
3504 case AMDGPU::V_MAD_F32_e64:
3505 return AMDGPU::V_MADAK_F32;
3506 case AMDGPU::V_FMAC_F32_e32:
3507 case AMDGPU::V_FMAC_F32_e64:
3508 case AMDGPU::V_FMA_F32_e64:
3509 return AMDGPU::V_FMAAK_F32;
3510 case AMDGPU::V_FMAC_F16_e32:
3511 case AMDGPU::V_FMAC_F16_e64:
3512 case AMDGPU::V_FMAC_F16_t16_e64:
3513 case AMDGPU::V_FMAC_F16_fake16_e64:
3514 case AMDGPU::V_FMA_F16_e64:
3515 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3516 ? AMDGPU::V_FMAAK_F16_t16
3517 : AMDGPU::V_FMAAK_F16_fake16
3518 : AMDGPU::V_FMAAK_F16;
3519 case AMDGPU::V_FMAC_F64_e32:
3520 case AMDGPU::V_FMAC_F64_e64:
3521 case AMDGPU::V_FMA_F64_e64:
3522 return AMDGPU::V_FMAAK_F64;
3530 case AMDGPU::V_MAC_F16_e32:
3531 case AMDGPU::V_MAC_F16_e64:
3532 case AMDGPU::V_MAD_F16_e64:
3533 return AMDGPU::V_MADMK_F16;
3534 case AMDGPU::V_MAC_F32_e32:
3535 case AMDGPU::V_MAC_F32_e64:
3536 case AMDGPU::V_MAD_F32_e64:
3537 return AMDGPU::V_MADMK_F32;
3538 case AMDGPU::V_FMAC_F32_e32:
3539 case AMDGPU::V_FMAC_F32_e64:
3540 case AMDGPU::V_FMA_F32_e64:
3541 return AMDGPU::V_FMAMK_F32;
3542 case AMDGPU::V_FMAC_F16_e32:
3543 case AMDGPU::V_FMAC_F16_e64:
3544 case AMDGPU::V_FMAC_F16_t16_e64:
3545 case AMDGPU::V_FMAC_F16_fake16_e64:
3546 case AMDGPU::V_FMA_F16_e64:
3547 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3548 ? AMDGPU::V_FMAMK_F16_t16
3549 : AMDGPU::V_FMAMK_F16_fake16
3550 : AMDGPU::V_FMAMK_F16;
3551 case AMDGPU::V_FMAC_F64_e32:
3552 case AMDGPU::V_FMAC_F64_e64:
3553 case AMDGPU::V_FMA_F64_e64:
3554 return AMDGPU::V_FMAMK_F64;
3562 if (!
MRI->hasOneNonDBGUse(Reg))
3569 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3572 if (
Opc == AMDGPU::COPY) {
3573 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3580 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3581 RI.getSubRegIdxSize(UseSubReg) == 16;
3587 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3593 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3600 for (
unsigned MovOp :
3601 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3602 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3610 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3614 if (MovDstPhysReg) {
3618 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3625 if (MovDstPhysReg) {
3626 if (!MovDstRC->
contains(MovDstPhysReg))
3628 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3650 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3654 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3656 UseMI.getOperand(0).setReg(MovDstPhysReg);
3661 UseMI.setDesc(NewMCID);
3662 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3663 UseMI.addImplicitDefUseOperands(*MF);
3667 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3668 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3669 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3670 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3671 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3672 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3673 Opc == AMDGPU::V_FMAC_F64_e64) {
3682 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3697 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3698 if (!RegSrc->
isReg())
3716 if (Def && Def->isMoveImmediate() &&
3727 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3728 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3738 unsigned SrcSubReg = RegSrc->
getSubReg();
3743 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3744 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3745 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3746 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3747 UseMI.untieRegOperand(
3748 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3755 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3757 DefMI.eraseFromParent();
3767 bool Src0Inlined =
false;
3768 if (Src0->
isReg()) {
3773 if (Def && Def->isMoveImmediate() &&
3785 if (Src1->
isReg() && !Src0Inlined) {
3788 if (Def && Def->isMoveImmediate() &&
3805 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3806 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3812 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3813 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3814 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3815 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3816 UseMI.untieRegOperand(
3817 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3819 const std::optional<int64_t> SubRegImm =
3833 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3835 DefMI.eraseFromParent();
3847 if (BaseOps1.
size() != BaseOps2.
size())
3849 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3850 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3858 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3859 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3860 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3862 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3865bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3868 int64_t Offset0, Offset1;
3871 bool Offset0IsScalable, Offset1IsScalable;
3893 "MIa must load from or modify a memory location");
3895 "MIb must load from or modify a memory location");
3914 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3921 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3931 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3945 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3956 if (Reg.isPhysical())
3958 auto *Def =
MRI.getUniqueVRegDef(Reg);
3960 Imm = Def->getOperand(1).getImm();
3980 unsigned NumOps =
MI.getNumOperands();
3981 for (
unsigned I = 1;
I < NumOps; ++
I) {
3983 if (
Op.isReg() &&
Op.isKill())
3991 case AMDGPU::V_MAC_F16_e32:
3992 case AMDGPU::V_MAC_F16_e64:
3993 return AMDGPU::V_MAD_F16_e64;
3994 case AMDGPU::V_MAC_F32_e32:
3995 case AMDGPU::V_MAC_F32_e64:
3996 return AMDGPU::V_MAD_F32_e64;
3997 case AMDGPU::V_MAC_LEGACY_F32_e32:
3998 case AMDGPU::V_MAC_LEGACY_F32_e64:
3999 return AMDGPU::V_MAD_LEGACY_F32_e64;
4000 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4001 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4002 return AMDGPU::V_FMA_LEGACY_F32_e64;
4003 case AMDGPU::V_FMAC_F16_e32:
4004 case AMDGPU::V_FMAC_F16_e64:
4005 case AMDGPU::V_FMAC_F16_t16_e64:
4006 case AMDGPU::V_FMAC_F16_fake16_e64:
4007 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4008 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4009 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4010 : AMDGPU::V_FMA_F16_gfx9_e64;
4011 case AMDGPU::V_FMAC_F32_e32:
4012 case AMDGPU::V_FMAC_F32_e64:
4013 return AMDGPU::V_FMA_F32_e64;
4014 case AMDGPU::V_FMAC_F64_e32:
4015 case AMDGPU::V_FMAC_F64_e64:
4016 return AMDGPU::V_FMA_F64_e64;
4026 unsigned Opc =
MI.getOpcode();
4030 if (NewMFMAOpc != -1) {
4033 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4034 MIB.
add(
MI.getOperand(
I));
4040 if (Def.isEarlyClobber() && Def.isReg() &&
4045 auto UpdateDefIndex = [&](
LiveRange &LR) {
4046 auto *S = LR.
find(OldIndex);
4047 if (S != LR.end() && S->start == OldIndex) {
4048 assert(S->valno && S->valno->def == OldIndex);
4049 S->start = NewIndex;
4050 S->valno->def = NewIndex;
4054 for (
auto &SR : LI.subranges())
4065 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4075 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4076 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4077 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4081 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4082 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4083 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4084 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4085 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4086 bool Src0Literal =
false;
4091 case AMDGPU::V_MAC_F16_e64:
4092 case AMDGPU::V_FMAC_F16_e64:
4093 case AMDGPU::V_FMAC_F16_t16_e64:
4094 case AMDGPU::V_FMAC_F16_fake16_e64:
4095 case AMDGPU::V_MAC_F32_e64:
4096 case AMDGPU::V_MAC_LEGACY_F32_e64:
4097 case AMDGPU::V_FMAC_F32_e64:
4098 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4099 case AMDGPU::V_FMAC_F64_e64:
4101 case AMDGPU::V_MAC_F16_e32:
4102 case AMDGPU::V_FMAC_F16_e32:
4103 case AMDGPU::V_MAC_F32_e32:
4104 case AMDGPU::V_MAC_LEGACY_F32_e32:
4105 case AMDGPU::V_FMAC_F32_e32:
4106 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4107 case AMDGPU::V_FMAC_F64_e32: {
4108 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4109 AMDGPU::OpName::src0);
4136 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4142 const auto killDef = [&]() ->
void {
4147 if (
MRI.hasOneNonDBGUse(DefReg)) {
4164 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4166 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4167 MIOp.setIsUndef(
true);
4168 MIOp.setReg(DummyReg);
4217 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4258 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4269 switch (
MI.getOpcode()) {
4270 case AMDGPU::S_SET_GPR_IDX_ON:
4271 case AMDGPU::S_SET_GPR_IDX_MODE:
4272 case AMDGPU::S_SET_GPR_IDX_OFF:
4290 if (
MI.isTerminator() ||
MI.isPosition())
4294 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4297 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4303 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4304 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4305 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4306 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4307 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4312 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4313 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4314 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4322 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4331 if (
MI.memoperands_empty())
4336 unsigned AS = Memop->getAddrSpace();
4337 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4338 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4339 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4340 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4354 unsigned Opcode =
MI.getOpcode();
4369 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4370 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4371 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4374 if (
MI.isCall() ||
MI.isInlineAsm())
4390 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4391 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4392 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4393 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4401 if (
MI.isMetaInstruction())
4405 if (
MI.isCopyLike()) {
4410 return MI.readsRegister(AMDGPU::EXEC, &RI);
4421 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4425 switch (Imm.getBitWidth()) {
4445 APInt IntImm = Imm.bitcastToAPInt();
4468 switch (OperandType) {
4478 int32_t Trunc =
static_cast<int32_t
>(Imm);
4513 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4518 int16_t Trunc =
static_cast<int16_t
>(Imm);
4527 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4528 int16_t Trunc =
static_cast<int16_t
>(Imm);
4590 OpNo ==(
unsigned)AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4591 AMDGPU::OpName::src2))
4613 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4631 AMDGPU::OpName
OpName)
const {
4633 return Mods && Mods->
getImm();
4646 switch (
MI.getOpcode()) {
4647 default:
return false;
4649 case AMDGPU::V_ADDC_U32_e64:
4650 case AMDGPU::V_SUBB_U32_e64:
4651 case AMDGPU::V_SUBBREV_U32_e64: {
4659 case AMDGPU::V_MAC_F16_e64:
4660 case AMDGPU::V_MAC_F32_e64:
4661 case AMDGPU::V_MAC_LEGACY_F32_e64:
4662 case AMDGPU::V_FMAC_F16_e64:
4663 case AMDGPU::V_FMAC_F16_t16_e64:
4664 case AMDGPU::V_FMAC_F16_fake16_e64:
4665 case AMDGPU::V_FMAC_F32_e64:
4666 case AMDGPU::V_FMAC_F64_e64:
4667 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4673 case AMDGPU::V_CNDMASK_B32_e64:
4709 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4718 unsigned Op32)
const {
4732 Inst32.
add(
MI.getOperand(
I));
4736 int Idx =
MI.getNumExplicitDefs();
4738 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4743 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4776 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4781 return MO.
getReg() == AMDGPU::M0 || MO.
getReg() == AMDGPU::VCC ||
4782 MO.
getReg() == AMDGPU::VCC_LO;
4784 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4785 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4794 switch (MO.getReg()) {
4796 case AMDGPU::VCC_LO:
4797 case AMDGPU::VCC_HI:
4799 case AMDGPU::FLAT_SCR:
4812 switch (
MI.getOpcode()) {
4813 case AMDGPU::V_READLANE_B32:
4814 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4815 case AMDGPU::V_WRITELANE_B32:
4816 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4823 if (
MI.isPreISelOpcode() ||
4824 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4839 if (
SubReg.getReg().isPhysical())
4842 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4854 ErrInfo =
"illegal copy from vector register to SGPR";
4872 if (!
MRI.isSSA() &&
MI.isCopy())
4873 return verifyCopy(
MI,
MRI, ErrInfo);
4875 if (SIInstrInfo::isGenericOpcode(Opcode))
4878 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4879 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4880 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4882 if (Src0Idx == -1) {
4884 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4885 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4886 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4887 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4892 if (!
Desc.isVariadic() &&
4893 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4894 ErrInfo =
"Instruction has wrong number of operands.";
4898 if (
MI.isInlineAsm()) {
4911 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4912 ErrInfo =
"inlineasm operand has incorrect register class.";
4920 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4921 ErrInfo =
"missing memory operand from image instruction.";
4926 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
4929 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4930 "all fp values to integers.";
4934 int RegClass =
Desc.operands()[i].RegClass;
4939 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4940 ErrInfo =
"Illegal immediate value for operand.";
4974 ErrInfo =
"Illegal immediate value for operand.";
4981 ErrInfo =
"Expected inline constant for operand.";
4996 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4997 ErrInfo =
"Expected immediate, but got non-immediate";
5025 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5034 ErrInfo =
"Subtarget requires even aligned vector registers";
5039 if (RegClass != -1) {
5040 if (Reg.isVirtual())
5045 ErrInfo =
"Operand has incorrect register class.";
5054 ErrInfo =
"SDWA is not supported on this target";
5058 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5059 AMDGPU::OpName::dst_sel}) {
5063 int64_t Imm = MO->
getImm();
5065 ErrInfo =
"Invalid SDWA selection";
5070 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5072 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5080 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5087 "Only reg allowed as operands in SDWA instructions on GFX9+";
5096 if (OMod !=
nullptr &&
5098 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5103 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5104 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5105 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5106 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5109 unsigned Mods = Src0ModsMO->
getImm();
5112 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5118 if (
isVOPC(BasicOpcode)) {
5122 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5123 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5129 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5130 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5136 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5137 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5144 if (DstUnused && DstUnused->isImm() &&
5147 if (!Dst.isReg() || !Dst.isTied()) {
5148 ErrInfo =
"Dst register should have tied register";
5153 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5156 "Dst register should be tied to implicit use of preserved register";
5160 ErrInfo =
"Dst register should use same physical register as preserved";
5167 if (
isImage(Opcode) && !
MI.mayStore()) {
5187 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5191 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5192 if (RegCount > DstSize) {
5193 ErrInfo =
"Image instruction returns too many registers for dst "
5202 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5203 unsigned ConstantBusCount = 0;
5204 bool UsesLiteral =
false;
5207 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5211 LiteralVal = &
MI.getOperand(ImmIdx);
5220 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5231 }
else if (!MO.
isFI()) {
5238 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5248 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5249 return !RI.regsOverlap(SGPRUsed, SGPR);
5259 Opcode != AMDGPU::V_WRITELANE_B32) {
5260 ErrInfo =
"VOP* instruction violates constant bus restriction";
5265 ErrInfo =
"VOP3 instruction uses literal";
5272 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5273 unsigned SGPRCount = 0;
5276 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5284 if (MO.
getReg() != SGPRUsed)
5290 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5297 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5298 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5305 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5315 ErrInfo =
"ABS not allowed in VOP3B instructions";
5328 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5335 if (
Desc.isBranch()) {
5337 ErrInfo =
"invalid branch target for SOPK instruction";
5343 if (!isUInt<16>(Imm)) {
5344 ErrInfo =
"invalid immediate for SOPK instruction";
5348 if (!isInt<16>(Imm)) {
5349 ErrInfo =
"invalid immediate for SOPK instruction";
5356 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5357 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5358 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5359 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5360 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5361 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5363 const unsigned StaticNumOps =
5364 Desc.getNumOperands() +
Desc.implicit_uses().size();
5365 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5370 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5371 ErrInfo =
"missing implicit register operands";
5377 if (!Dst->isUse()) {
5378 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5383 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5384 UseOpIdx != StaticNumOps + 1) {
5385 ErrInfo =
"movrel implicit operands should be tied";
5392 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5394 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5395 ErrInfo =
"src0 should be subreg of implicit vector use";
5403 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5404 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5410 if (
MI.mayStore() &&
5415 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5416 ErrInfo =
"scalar stores must use m0 as offset register";
5424 if (
Offset->getImm() != 0) {
5425 ErrInfo =
"subtarget does not support offsets in flat instructions";
5432 if (GDSOp && GDSOp->
getImm() != 0) {
5433 ErrInfo =
"GDS is not supported on this subtarget";
5441 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5442 AMDGPU::OpName::vaddr0);
5443 AMDGPU::OpName RSrcOpName =
5444 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5445 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5453 ErrInfo =
"dim is out of range";
5460 IsA16 = R128A16->
getImm() != 0;
5461 }
else if (ST.
hasA16()) {
5463 IsA16 = A16->
getImm() != 0;
5466 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5468 unsigned AddrWords =
5471 unsigned VAddrWords;
5473 VAddrWords = RsrcIdx - VAddr0Idx;
5476 unsigned LastVAddrIdx = RsrcIdx - 1;
5477 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5485 if (VAddrWords != AddrWords) {
5487 <<
" but got " << VAddrWords <<
"\n");
5488 ErrInfo =
"bad vaddr size";
5496 using namespace AMDGPU::DPP;
5498 unsigned DC = DppCt->
getImm();
5499 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5500 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5501 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5502 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5503 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5504 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5505 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5506 ErrInfo =
"Invalid dpp_ctrl value";
5509 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5511 ErrInfo =
"Invalid dpp_ctrl value: "
5512 "wavefront shifts are not supported on GFX10+";
5515 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5517 ErrInfo =
"Invalid dpp_ctrl value: "
5518 "broadcasts are not supported on GFX10+";
5521 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5523 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5524 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5526 ErrInfo =
"Invalid dpp_ctrl value: "
5527 "row_newbroadcast/row_share is not supported before "
5532 ErrInfo =
"Invalid dpp_ctrl value: "
5533 "row_share and row_xmask are not supported before GFX10";
5538 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5541 ErrInfo =
"Invalid dpp_ctrl value: "
5542 "DP ALU dpp only support row_newbcast";
5549 AMDGPU::OpName DataName =
5550 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5559 ErrInfo =
"Invalid register class: "
5560 "vdata and vdst should be both VGPR or AGPR";
5563 if (
Data && Data2 &&
5565 ErrInfo =
"Invalid register class: "
5566 "both data operands should be VGPR or AGPR";
5570 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
5573 ErrInfo =
"Invalid register class: "
5574 "agpr loads and stores not supported on this GPU";
5581 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5586 if (Reg.isPhysical())
5593 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5594 Opcode == AMDGPU::DS_GWS_BARRIER) {
5596 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5597 ErrInfo =
"Subtarget requires even aligned vector registers "
5598 "for DS_GWS instructions";
5604 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5605 ErrInfo =
"Subtarget requires even aligned vector registers "
5606 "for vaddr operand of image instructions";
5612 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.
hasGFX90AInsts()) {
5614 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
5615 ErrInfo =
"Invalid register class: "
5616 "v_accvgpr_write with an SGPR is not supported on this GPU";
5621 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5624 ErrInfo =
"pseudo expects only physical SGPRs";
5632 ErrInfo =
"Subtarget does not support offset scaling";
5636 ErrInfo =
"Instruction does not support offset scaling";
5645 for (
unsigned I = 0;
I < 3; ++
I) {
5658 switch (
MI.getOpcode()) {
5659 default:
return AMDGPU::INSTRUCTION_LIST_END;
5660 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5661 case AMDGPU::COPY:
return AMDGPU::COPY;
5662 case AMDGPU::PHI:
return AMDGPU::PHI;
5663 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5664 case AMDGPU::WQM:
return AMDGPU::WQM;
5665 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5666 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5667 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5668 case AMDGPU::S_MOV_B32: {
5670 return MI.getOperand(1).isReg() ||
5672 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5674 case AMDGPU::S_ADD_I32:
5675 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5676 case AMDGPU::S_ADDC_U32:
5677 return AMDGPU::V_ADDC_U32_e32;
5678 case AMDGPU::S_SUB_I32:
5679 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5682 case AMDGPU::S_ADD_U32:
5683 return AMDGPU::V_ADD_CO_U32_e32;
5684 case AMDGPU::S_SUB_U32:
5685 return AMDGPU::V_SUB_CO_U32_e32;
5686 case AMDGPU::S_ADD_U64_PSEUDO:
5687 return AMDGPU::V_ADD_U64_PSEUDO;
5688 case AMDGPU::S_SUB_U64_PSEUDO:
5689 return AMDGPU::V_SUB_U64_PSEUDO;
5690 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5691 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5692 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5693 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5694 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5695 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5696 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5697 case AMDGPU::S_XNOR_B32:
5698 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5699 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5700 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5701 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5702 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5703 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5704 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5705 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5706 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5707 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5708 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5709 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5710 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5711 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5712 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5713 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5714 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5715 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5716 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5717 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5718 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5719 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5720 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5721 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5722 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5723 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5724 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5725 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5726 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5727 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5728 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5729 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5730 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5731 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5732 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5733 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5734 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5735 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5736 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5737 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5738 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5739 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5740 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5741 case AMDGPU::S_CVT_F32_F16:
5742 case AMDGPU::S_CVT_HI_F32_F16:
5744 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5745 case AMDGPU::S_CVT_F16_F32:
5747 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5748 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5749 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5750 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5751 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5752 case AMDGPU::S_CEIL_F16:
5754 : AMDGPU::V_CEIL_F16_fake16_e64;
5755 case AMDGPU::S_FLOOR_F16:
5757 : AMDGPU::V_FLOOR_F16_fake16_e64;
5758 case AMDGPU::S_TRUNC_F16:
5760 : AMDGPU::V_TRUNC_F16_fake16_e64;
5761 case AMDGPU::S_RNDNE_F16:
5763 : AMDGPU::V_RNDNE_F16_fake16_e64;
5764 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5765 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5766 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5767 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5768 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5769 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5770 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5771 case AMDGPU::S_ADD_F16:
5773 : AMDGPU::V_ADD_F16_fake16_e64;
5774 case AMDGPU::S_SUB_F16:
5776 : AMDGPU::V_SUB_F16_fake16_e64;
5777 case AMDGPU::S_MIN_F16:
5779 : AMDGPU::V_MIN_F16_fake16_e64;
5780 case AMDGPU::S_MAX_F16:
5782 : AMDGPU::V_MAX_F16_fake16_e64;
5783 case AMDGPU::S_MINIMUM_F16:
5785 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5786 case AMDGPU::S_MAXIMUM_F16:
5788 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5789 case AMDGPU::S_MUL_F16:
5791 : AMDGPU::V_MUL_F16_fake16_e64;
5792 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5793 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5794 case AMDGPU::S_FMAC_F16:
5796 : AMDGPU::V_FMAC_F16_fake16_e64;
5797 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5798 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5799 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5800 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5801 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5802 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5803 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5804 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5805 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5806 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5807 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5808 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5809 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5810 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5811 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5812 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5813 case AMDGPU::S_CMP_LT_F16:
5815 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5816 case AMDGPU::S_CMP_EQ_F16:
5818 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5819 case AMDGPU::S_CMP_LE_F16:
5821 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5822 case AMDGPU::S_CMP_GT_F16:
5824 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5825 case AMDGPU::S_CMP_LG_F16:
5827 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5828 case AMDGPU::S_CMP_GE_F16:
5830 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5831 case AMDGPU::S_CMP_O_F16:
5833 : AMDGPU::V_CMP_O_F16_fake16_e64;
5834 case AMDGPU::S_CMP_U_F16:
5836 : AMDGPU::V_CMP_U_F16_fake16_e64;
5837 case AMDGPU::S_CMP_NGE_F16:
5839 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5840 case AMDGPU::S_CMP_NLG_F16:
5842 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5843 case AMDGPU::S_CMP_NGT_F16:
5845 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5846 case AMDGPU::S_CMP_NLE_F16:
5848 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5849 case AMDGPU::S_CMP_NEQ_F16:
5851 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5852 case AMDGPU::S_CMP_NLT_F16:
5854 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5855 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5856 case AMDGPU::V_S_EXP_F16_e64:
5858 : AMDGPU::V_EXP_F16_fake16_e64;
5859 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5860 case AMDGPU::V_S_LOG_F16_e64:
5862 : AMDGPU::V_LOG_F16_fake16_e64;
5863 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5864 case AMDGPU::V_S_RCP_F16_e64:
5866 : AMDGPU::V_RCP_F16_fake16_e64;
5867 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5868 case AMDGPU::V_S_RSQ_F16_e64:
5870 : AMDGPU::V_RSQ_F16_fake16_e64;
5871 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5872 case AMDGPU::V_S_SQRT_F16_e64:
5874 : AMDGPU::V_SQRT_F16_fake16_e64;
5877 "Unexpected scalar opcode without corresponding vector one!");
5890 bool IsWave32 = ST.isWave32();
5895 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5896 MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5905 const unsigned OrSaveExec =
5906 IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
5919 unsigned ExecMov =
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5921 auto ExecRestoreMI =
5930 "Not a whole wave func");
5933 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
5934 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
5943 bool IsAllocatable) {
5944 if ((IsAllocatable || !ST.hasGFX90AInsts()) &&
5949 case AMDGPU::AV_32RegClassID:
5950 RCID = AMDGPU::VGPR_32RegClassID;
5952 case AMDGPU::AV_64RegClassID:
5953 RCID = AMDGPU::VReg_64RegClassID;
5955 case AMDGPU::AV_96RegClassID:
5956 RCID = AMDGPU::VReg_96RegClassID;
5958 case AMDGPU::AV_128RegClassID:
5959 RCID = AMDGPU::VReg_128RegClassID;
5961 case AMDGPU::AV_160RegClassID:
5962 RCID = AMDGPU::VReg_160RegClassID;
5964 case AMDGPU::AV_512RegClassID:
5965 RCID = AMDGPU::VReg_512RegClassID;
5981 auto RegClass = TID.
operands()[OpNum].RegClass;
5982 bool IsAllocatable =
false;
5991 const int VDstIdx = AMDGPU::getNamedOperandIdx(TID.
Opcode,
5992 AMDGPU::OpName::vdst);
5993 const int DataIdx = AMDGPU::getNamedOperandIdx(TID.
Opcode,
5995 : AMDGPU::OpName::vdata);
5996 if (DataIdx != -1) {
5998 TID.
Opcode, AMDGPU::OpName::data1);
6005 unsigned OpNo)
const {
6007 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6008 Desc.operands()[OpNo].RegClass == -1) {
6011 if (Reg.isVirtual()) {
6013 MI.getParent()->getParent()->getRegInfo();
6014 return MRI.getRegClass(Reg);
6016 return RI.getPhysRegBaseClass(Reg);
6019 unsigned RCID =
Desc.operands()[OpNo].RegClass;
6028 unsigned RCID =
get(
MI.getOpcode()).operands()[
OpIdx].RegClass;
6030 unsigned Size = RI.getRegSizeInBits(*RC);
6031 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6032 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6033 : AMDGPU::V_MOV_B32_e32;
6035 Opcode = AMDGPU::COPY;
6037 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6051 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6057 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6068 if (SubIdx == AMDGPU::sub0)
6070 if (SubIdx == AMDGPU::sub1)
6082void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6098 if (Reg.isPhysical())
6109 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
6120 unsigned Opc =
MI.getOpcode();
6126 constexpr const AMDGPU::OpName OpNames[] = {
6127 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6130 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6131 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6148 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6149 const int DataIdx = AMDGPU::getNamedOperandIdx(
6150 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6151 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6152 MI.getOperand(DataIdx).isReg() &&
6153 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6155 if ((
int)
OpIdx == DataIdx) {
6156 if (VDstIdx != -1 &&
6157 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6160 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6161 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6162 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6168 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6188 constexpr const unsigned NumOps = 3;
6189 constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
6190 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6191 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6192 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6197 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6200 MO = &
MI.getOperand(SrcIdx);
6207 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[NumOps + SrcN]);
6211 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6215 return !OpSel && !OpSelHi;
6239 if (!LiteralLimit--)
6249 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6255 if (!SGPRsUsed.
count(SGPR) &&
6258 if (--ConstantBusLimit <= 0)
6270 if (!LiteralLimit--)
6272 if (--ConstantBusLimit <= 0)
6278 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6282 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6284 !
Op.isIdenticalTo(*MO))
6308 bool Is64BitOp = Is64BitFPOp ||
6324 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6343 unsigned Opc =
MI.getOpcode();
6346 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6349 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6362 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6365 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6371 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6388 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6389 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6390 if (!RI.
isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6402 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6404 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6416 if (HasImplicitSGPR || !
MI.isCommutable()) {
6433 if (CommutedOpc == -1) {
6438 MI.setDesc(
get(CommutedOpc));
6442 bool Src0Kill = Src0.
isKill();
6446 else if (Src1.
isReg()) {
6461 unsigned Opc =
MI.getOpcode();
6464 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6465 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6466 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6469 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6470 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6471 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6472 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6473 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6474 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6475 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6480 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6485 if (VOP3Idx[2] != -1) {
6488 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6500 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6502 SGPRsUsed.
insert(SGPRReg);
6506 for (
int Idx : VOP3Idx) {
6515 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6540 if (ConstantBusLimit > 0) {
6552 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6553 !RI.
isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6559 for (
unsigned I = 0;
I < 3; ++
I) {
6572 SRC = RI.getCommonSubClass(SRC, DstRC);
6575 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6579 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6581 get(TargetOpcode::COPY), NewSrcReg)
6588 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6594 for (
unsigned i = 0; i < SubRegs; ++i) {
6595 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6597 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6604 get(AMDGPU::REG_SEQUENCE), DstReg);
6605 for (
unsigned i = 0; i < SubRegs; ++i) {
6620 if (SBase && !RI.
isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6622 SBase->setReg(SGPR);
6633 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6634 if (OldSAddrIdx < 0)
6650 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6651 if (NewVAddrIdx < 0)
6654 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6658 if (OldVAddrIdx >= 0) {
6660 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6672 if (OldVAddrIdx == NewVAddrIdx) {
6675 MRI.removeRegOperandFromUseList(&NewVAddr);
6676 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6680 MRI.removeRegOperandFromUseList(&NewVAddr);
6681 MRI.addRegOperandToUseList(&NewVAddr);
6683 assert(OldSAddrIdx == NewVAddrIdx);
6685 if (OldVAddrIdx >= 0) {
6686 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6687 AMDGPU::OpName::vdst_in);
6691 if (NewVDstIn != -1) {
6692 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6698 if (NewVDstIn != -1) {
6699 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6740 unsigned OpSubReg =
Op.getSubReg();
6749 Register DstReg =
MRI.createVirtualRegister(DstRC);
6759 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6762 bool ImpDef = Def->isImplicitDef();
6763 while (!ImpDef && Def && Def->isCopy()) {
6764 if (Def->getOperand(1).getReg().isPhysical())
6766 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6767 ImpDef = Def && Def->isImplicitDef();
6769 if (!RI.
isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6787 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6788 unsigned SaveExecOpc =
6789 ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
6790 unsigned XorTermOpc =
6791 ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
6793 ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
6794 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6800 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6801 unsigned NumSubRegs =
RegSize / 32;
6802 Register VScalarOp = ScalarOp->getReg();
6804 if (NumSubRegs == 1) {
6805 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6807 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6810 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6812 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6818 CondReg = NewCondReg;
6820 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6828 ScalarOp->setReg(CurReg);
6829 ScalarOp->setIsKill();
6833 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6834 "Unhandled register size");
6836 for (
unsigned Idx = 0;
Idx < NumSubRegs;
Idx += 2) {
6838 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6840 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6843 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6844 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(
Idx));
6847 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6848 .
addReg(VScalarOp, VScalarOpUndef,
6849 TRI->getSubRegFromChannel(
Idx + 1));
6855 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6856 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6862 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6863 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6866 if (NumSubRegs <= 2)
6867 Cmp.addReg(VScalarOp);
6869 Cmp.addReg(VScalarOp, VScalarOpUndef,
6870 TRI->getSubRegFromChannel(
Idx, 2));
6874 CondReg = NewCondReg;
6876 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6884 const auto *SScalarOpRC =
6885 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6886 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6890 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6891 unsigned Channel = 0;
6892 for (
Register Piece : ReadlanePieces) {
6893 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6897 ScalarOp->setReg(SScalarOp);
6898 ScalarOp->setIsKill();
6902 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6903 MRI.setSimpleHint(SaveExec, CondReg);
6934 if (!Begin.isValid())
6936 if (!
End.isValid()) {
6941 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
6942 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
6943 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6952 std::numeric_limits<unsigned>::max()) !=
6955 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6961 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6970 for (
auto I = Begin;
I != AfterMI;
I++) {
6971 for (
auto &MO :
I->all_uses())
6972 MRI.clearKillFlags(MO.getReg());
7007 for (
auto &Succ : RemainderBB->
successors()) {
7030static std::tuple<unsigned, unsigned>
7038 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7039 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7042 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7043 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7044 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7045 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7046 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7063 .
addImm(AMDGPU::sub0_sub1)
7069 return std::tuple(RsrcPtr, NewSRsrc);
7106 if (
MI.getOpcode() == AMDGPU::PHI) {
7108 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7109 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7112 MRI.getRegClass(
MI.getOperand(i).getReg());
7127 VRC = &AMDGPU::VReg_1RegClass;
7143 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7145 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7161 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7168 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7170 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7188 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7193 if (DstRC != Src0RC) {
7202 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7210 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7211 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7212 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7213 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7214 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7215 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7216 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7231 ? AMDGPU::OpName::rsrc
7232 : AMDGPU::OpName::srsrc;
7237 AMDGPU::OpName SampOpName =
7238 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7247 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7253 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7254 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7259 while (Start->getOpcode() != FrameSetupOpcode)
7262 while (
End->getOpcode() != FrameDestroyOpcode)
7266 while (
End !=
MBB.
end() &&
End->isCopy() &&
End->getOperand(1).isReg() &&
7267 MI.definesRegister(
End->getOperand(1).getReg(),
nullptr))
7275 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7277 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7279 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7289 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7290 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7291 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7292 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7301 bool isSoffsetLegal =
true;
7303 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7304 if (SoffsetIdx != -1) {
7308 isSoffsetLegal =
false;
7312 bool isRsrcLegal =
true;
7314 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7315 if (RsrcIdx != -1) {
7318 isRsrcLegal =
false;
7322 if (isRsrcLegal && isSoffsetLegal)
7346 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7347 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7348 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7351 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7352 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7354 unsigned RsrcPtr, NewSRsrc;
7361 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7368 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7386 "FIXME: Need to emit flat atomics here");
7388 unsigned RsrcPtr, NewSRsrc;
7391 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7414 MIB.
addImm(CPol->getImm());
7419 MIB.
addImm(TFE->getImm());
7439 MI.removeFromParent();
7444 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7446 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7450 if (!isSoffsetLegal) {
7462 if (!isSoffsetLegal) {
7471 InstrList.insert(
MI);
7474 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7475 if (RsrcIdx != -1) {
7476 DeferredList.insert(
MI);
7481 return DeferredList.contains(
MI);
7494 unsigned Opcode =
MI.getOpcode();
7498 OpIdx >=
get(Opcode).getNumOperands() ||
7499 get(Opcode).operands()[
OpIdx].RegClass == -1)
7503 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7510 unsigned RCID =
get(Opcode).operands()[
OpIdx].RegClass;
7512 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7513 Op.setSubReg(AMDGPU::lo16);
7514 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7516 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7517 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7524 Op.setReg(NewDstReg);
7536 while (!Worklist.
empty()) {
7550 "Deferred MachineInstr are not supposed to re-populate worklist");
7568 case AMDGPU::S_ADD_I32:
7569 case AMDGPU::S_SUB_I32: {
7573 std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7581 case AMDGPU::S_MUL_U64:
7583 NewOpcode = AMDGPU::V_MUL_U64_e64;
7587 splitScalarSMulU64(Worklist, Inst, MDT);
7591 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7592 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7595 splitScalarSMulPseudo(Worklist, Inst, MDT);
7599 case AMDGPU::S_AND_B64:
7600 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7604 case AMDGPU::S_OR_B64:
7605 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7609 case AMDGPU::S_XOR_B64:
7610 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7614 case AMDGPU::S_NAND_B64:
7615 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7619 case AMDGPU::S_NOR_B64:
7620 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7624 case AMDGPU::S_XNOR_B64:
7626 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7628 splitScalar64BitXnor(Worklist, Inst, MDT);
7632 case AMDGPU::S_ANDN2_B64:
7633 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7637 case AMDGPU::S_ORN2_B64:
7638 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7642 case AMDGPU::S_BREV_B64:
7643 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7647 case AMDGPU::S_NOT_B64:
7648 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7652 case AMDGPU::S_BCNT1_I32_B64:
7653 splitScalar64BitBCNT(Worklist, Inst);
7657 case AMDGPU::S_BFE_I64:
7658 splitScalar64BitBFE(Worklist, Inst);
7662 case AMDGPU::S_FLBIT_I32_B64:
7663 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7666 case AMDGPU::S_FF1_I32_B64:
7667 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7671 case AMDGPU::S_LSHL_B32:
7673 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7677 case AMDGPU::S_ASHR_I32:
7679 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7683 case AMDGPU::S_LSHR_B32:
7685 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7689 case AMDGPU::S_LSHL_B64:
7692 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7693 : AMDGPU::V_LSHLREV_B64_e64;
7697 case AMDGPU::S_ASHR_I64:
7699 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7703 case AMDGPU::S_LSHR_B64:
7705 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7710 case AMDGPU::S_ABS_I32:
7711 lowerScalarAbs(Worklist, Inst);
7715 case AMDGPU::S_CBRANCH_SCC0:
7716 case AMDGPU::S_CBRANCH_SCC1: {
7719 bool IsSCC = CondReg == AMDGPU::SCC;
7722 unsigned Opc = ST.
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
7725 .
addReg(IsSCC ? VCC : CondReg);
7729 case AMDGPU::S_BFE_U64:
7730 case AMDGPU::S_BFM_B64:
7733 case AMDGPU::S_PACK_LL_B32_B16:
7734 case AMDGPU::S_PACK_LH_B32_B16:
7735 case AMDGPU::S_PACK_HL_B32_B16:
7736 case AMDGPU::S_PACK_HH_B32_B16:
7737 movePackToVALU(Worklist,
MRI, Inst);
7741 case AMDGPU::S_XNOR_B32:
7742 lowerScalarXnor(Worklist, Inst);
7746 case AMDGPU::S_NAND_B32:
7747 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7751 case AMDGPU::S_NOR_B32:
7752 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7756 case AMDGPU::S_ANDN2_B32:
7757 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7761 case AMDGPU::S_ORN2_B32:
7762 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7770 case AMDGPU::S_ADD_CO_PSEUDO:
7771 case AMDGPU::S_SUB_CO_PSEUDO: {
7772 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7773 ? AMDGPU::V_ADDC_U32_e64
7774 : AMDGPU::V_SUBB_U32_e64;
7778 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7779 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7797 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7801 case AMDGPU::S_UADDO_PSEUDO:
7802 case AMDGPU::S_USUBO_PSEUDO: {
7809 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7810 ? AMDGPU::V_ADD_CO_U32_e64
7811 : AMDGPU::V_SUB_CO_U32_e64;
7814 Register DestReg =
MRI.createVirtualRegister(NewRC);
7822 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7829 case AMDGPU::S_CSELECT_B32:
7830 case AMDGPU::S_CSELECT_B64:
7831 lowerSelect(Worklist, Inst, MDT);
7834 case AMDGPU::S_CMP_EQ_I32:
7835 case AMDGPU::S_CMP_LG_I32:
7836 case AMDGPU::S_CMP_GT_I32:
7837 case AMDGPU::S_CMP_GE_I32:
7838 case AMDGPU::S_CMP_LT_I32:
7839 case AMDGPU::S_CMP_LE_I32:
7840 case AMDGPU::S_CMP_EQ_U32:
7841 case AMDGPU::S_CMP_LG_U32:
7842 case AMDGPU::S_CMP_GT_U32:
7843 case AMDGPU::S_CMP_GE_U32:
7844 case AMDGPU::S_CMP_LT_U32:
7845 case AMDGPU::S_CMP_LE_U32:
7846 case AMDGPU::S_CMP_EQ_U64:
7847 case AMDGPU::S_CMP_LG_U64:
7848 case AMDGPU::S_CMP_LT_F32:
7849 case AMDGPU::S_CMP_EQ_F32:
7850 case AMDGPU::S_CMP_LE_F32:
7851 case AMDGPU::S_CMP_GT_F32:
7852 case AMDGPU::S_CMP_LG_F32:
7853 case AMDGPU::S_CMP_GE_F32:
7854 case AMDGPU::S_CMP_O_F32:
7855 case AMDGPU::S_CMP_U_F32:
7856 case AMDGPU::S_CMP_NGE_F32:
7857 case AMDGPU::S_CMP_NLG_F32:
7858 case AMDGPU::S_CMP_NGT_F32:
7859 case AMDGPU::S_CMP_NLE_F32:
7860 case AMDGPU::S_CMP_NEQ_F32:
7861 case AMDGPU::S_CMP_NLT_F32: {
7866 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7880 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7884 case AMDGPU::S_CMP_LT_F16:
7885 case AMDGPU::S_CMP_EQ_F16:
7886 case AMDGPU::S_CMP_LE_F16:
7887 case AMDGPU::S_CMP_GT_F16:
7888 case AMDGPU::S_CMP_LG_F16:
7889 case AMDGPU::S_CMP_GE_F16:
7890 case AMDGPU::S_CMP_O_F16:
7891 case AMDGPU::S_CMP_U_F16:
7892 case AMDGPU::S_CMP_NGE_F16:
7893 case AMDGPU::S_CMP_NLG_F16:
7894 case AMDGPU::S_CMP_NGT_F16:
7895 case AMDGPU::S_CMP_NLE_F16:
7896 case AMDGPU::S_CMP_NEQ_F16:
7897 case AMDGPU::S_CMP_NLT_F16: {
7920 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7924 case AMDGPU::S_CVT_HI_F32_F16: {
7926 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7927 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7933 .
addReg(TmpReg, 0, AMDGPU::hi16)
7949 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7953 case AMDGPU::S_MINIMUM_F32:
7954 case AMDGPU::S_MAXIMUM_F32: {
7956 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7967 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7971 case AMDGPU::S_MINIMUM_F16:
7972 case AMDGPU::S_MAXIMUM_F16: {
7975 ? &AMDGPU::VGPR_16RegClass
7976 : &AMDGPU::VGPR_32RegClass);
7988 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
7992 case AMDGPU::V_S_EXP_F16_e64:
7993 case AMDGPU::V_S_LOG_F16_e64:
7994 case AMDGPU::V_S_RCP_F16_e64:
7995 case AMDGPU::V_S_RSQ_F16_e64:
7996 case AMDGPU::V_S_SQRT_F16_e64: {
7999 ? &AMDGPU::VGPR_16RegClass
8000 : &AMDGPU::VGPR_32RegClass);
8012 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8018 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8026 if (NewOpcode == Opcode) {
8035 if (
MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
8037 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
8041 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8043 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8061 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8063 MRI.replaceRegWith(DstReg, NewDstReg);
8064 MRI.clearKillFlags(NewDstReg);
8082 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8083 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8084 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8086 get(AMDGPU::IMPLICIT_DEF), Undef);
8088 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8094 MRI.replaceRegWith(DstReg, NewDstReg);
8095 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8097 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8100 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8101 MRI.replaceRegWith(DstReg, NewDstReg);
8102 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8107 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8108 MRI.replaceRegWith(DstReg, NewDstReg);
8110 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8120 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8121 AMDGPU::OpName::src0_modifiers) >= 0)
8125 NewInstr->addOperand(Src);
8128 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8131 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8133 NewInstr.addImm(
Size);
8134 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8138 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8143 "Scalar BFE is only implemented for constant width and offset");
8151 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8152 AMDGPU::OpName::src1_modifiers) >= 0)
8154 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8156 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8157 AMDGPU::OpName::src2_modifiers) >= 0)
8159 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8161 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8163 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8165 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8171 NewInstr->addOperand(
Op);
8178 if (
Op.getReg() == AMDGPU::SCC) {
8180 if (
Op.isDef() && !
Op.isDead())
8181 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8183 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8188 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8189 Register DstReg = NewInstr->getOperand(0).getReg();
8194 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8195 MRI.replaceRegWith(DstReg, NewDstReg);
8204 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8208std::pair<bool, MachineBasicBlock *>
8220 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8223 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8225 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8226 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8234 MRI.replaceRegWith(OldDstReg, ResultReg);
8237 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8238 return std::pair(
true, NewBB);
8241 return std::pair(
false,
nullptr);
8258 bool IsSCC = (CondReg == AMDGPU::SCC);
8266 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8273 NewCondReg =
MRI.createVirtualRegister(TC);
8277 bool CopyFound =
false;
8281 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8283 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8285 .
addReg(CandI.getOperand(1).getReg());
8297 ST.
isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8307 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8308 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8321 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8323 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8335 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8336 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8339 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8349 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8350 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8365 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8373 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8374 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8380 bool Src0IsSGPR = Src0.
isReg() &&
8382 bool Src1IsSGPR = Src1.
isReg() &&
8385 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8386 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8396 }
else if (Src1IsSGPR) {
8410 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8414 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8420 unsigned Opcode)
const {
8430 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8431 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8443 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8444 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8449 unsigned Opcode)
const {
8459 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8460 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8472 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8473 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8491 &AMDGPU::SGPR_32RegClass;
8494 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8497 AMDGPU::sub0, Src0SubRC);
8502 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8504 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8508 AMDGPU::sub1, Src0SubRC);
8510 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8516 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8523 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8525 Worklist.
insert(&LoHalf);
8526 Worklist.
insert(&HiHalf);
8532 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8543 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8544 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8545 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8556 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8560 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8590 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8596 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8602 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8613 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8629 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8641 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8652 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8653 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8654 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8665 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8669 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8681 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8682 ? AMDGPU::V_MUL_HI_U32_e64
8683 : AMDGPU::V_MUL_HI_I32_e64;
8698 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8706 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8725 &AMDGPU::SGPR_32RegClass;
8728 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8731 &AMDGPU::SGPR_32RegClass;
8734 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8737 AMDGPU::sub0, Src0SubRC);
8739 AMDGPU::sub0, Src1SubRC);
8741 AMDGPU::sub1, Src0SubRC);
8743 AMDGPU::sub1, Src1SubRC);
8748 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8750 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8755 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8760 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8767 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8769 Worklist.
insert(&LoHalf);
8770 Worklist.
insert(&HiHalf);
8773 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8791 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8807 Register NewDest =
MRI.createVirtualRegister(DestRC);
8813 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8831 MRI.getRegClass(Src.getReg()) :
8832 &AMDGPU::SGPR_32RegClass;
8834 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8835 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8838 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8841 AMDGPU::sub0, SrcSubRC);
8843 AMDGPU::sub1, SrcSubRC);
8849 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8853 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8872 Offset == 0 &&
"Not implemented");
8875 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8876 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8877 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8894 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8895 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8900 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8901 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8905 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8908 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8913 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8914 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8935 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
8936 unsigned OpcodeAdd =
8937 ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
8940 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
8942 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8949 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8950 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8951 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8952 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8959 .
addReg(IsCtlz ? MidReg1 : MidReg2)
8965 .
addReg(IsCtlz ? MidReg2 : MidReg1);
8967 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
8969 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
8972void SIInstrInfo::addUsersToMoveToVALUWorklist(
8980 switch (
UseMI.getOpcode()) {
8983 case AMDGPU::SOFT_WQM:
8984 case AMDGPU::STRICT_WWM:
8985 case AMDGPU::STRICT_WQM:
8986 case AMDGPU::REG_SEQUENCE:
8988 case AMDGPU::INSERT_SUBREG:
8991 OpNo = MO.getOperandNo();
9006 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9013 case AMDGPU::S_PACK_LL_B32_B16: {
9014 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9015 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9032 case AMDGPU::S_PACK_LH_B32_B16: {
9033 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9042 case AMDGPU::S_PACK_HL_B32_B16: {
9043 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9053 case AMDGPU::S_PACK_HH_B32_B16: {
9054 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9055 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9072 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9073 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9082 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9083 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9091 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9095 Register DestReg =
MI.getOperand(0).getReg();
9097 MRI.replaceRegWith(DestReg, NewCond);
9102 MI.getOperand(SCCIdx).setReg(NewCond);
9108 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9111 for (
auto &Copy : CopyToDelete)
9112 Copy->eraseFromParent();
9120void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9129 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9131 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9148 case AMDGPU::REG_SEQUENCE:
9149 case AMDGPU::INSERT_SUBREG:
9151 case AMDGPU::SOFT_WQM:
9152 case AMDGPU::STRICT_WWM:
9153 case AMDGPU::STRICT_WQM: {
9161 case AMDGPU::REG_SEQUENCE:
9162 case AMDGPU::INSERT_SUBREG:
9172 if (RI.
isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9189 int OpIndices[3])
const {
9208 for (
unsigned i = 0; i < 3; ++i) {
9209 int Idx = OpIndices[i];
9246 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9247 SGPRReg = UsedSGPRs[0];
9250 if (!SGPRReg && UsedSGPRs[1]) {
9251 if (UsedSGPRs[1] == UsedSGPRs[2])
9252 SGPRReg = UsedSGPRs[1];
9259 AMDGPU::OpName OperandName)
const {
9260 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9264 return &
MI.getOperand(
Idx);
9281 RsrcDataFormat |= (1ULL << 56);
9286 RsrcDataFormat |= (2ULL << 59);
9289 return RsrcDataFormat;
9311 Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
9317 unsigned Opc =
MI.getOpcode();
9323 return get(
Opc).mayLoad() &&
9328 int &FrameIndex)
const {
9336 FrameIndex =
Addr->getIndex();
9341 int &FrameIndex)
const {
9344 FrameIndex =
Addr->getIndex();
9349 int &FrameIndex)
const {
9363 int &FrameIndex)
const {
9380 while (++
I != E &&
I->isInsideBundle()) {
9381 assert(!
I->isBundle() &&
"No nested bundle!");
9389 unsigned Opc =
MI.getOpcode();
9391 unsigned DescSize =
Desc.getSize();
9396 unsigned Size = DescSize;
9411 bool HasLiteral =
false;
9412 unsigned LiteralSize = 4;
9413 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9435 return HasLiteral ? DescSize + LiteralSize : DescSize;
9440 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9444 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9445 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9449 case TargetOpcode::BUNDLE:
9451 case TargetOpcode::INLINEASM:
9452 case TargetOpcode::INLINEASM_BR: {
9454 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9458 if (
MI.isMetaInstruction())
9462 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9465 unsigned LoInstOpcode = D16Info->LoOp;
9467 DescSize =
Desc.getSize();
9478 if (
MI.memoperands_empty())
9490 static const std::pair<int, const char *> TargetIndices[] = {
9528std::pair<unsigned, unsigned>
9535 static const std::pair<unsigned, const char *> TargetFlags[] = {
9553 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9567 return AMDGPU::WWM_COPY;
9569 return AMDGPU::COPY;
9581 bool IsNullOrVectorRegister =
true;
9589 return IsNullOrVectorRegister &&
9591 (Opcode == AMDGPU::IMPLICIT_DEF &&
9593 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9594 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9607 MRI.setRegAllocationHint(UnusedCarry, 0, RI.
getVCC());
9638 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9639 case AMDGPU::SI_KILL_I1_TERMINATOR:
9648 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9649 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9650 case AMDGPU::SI_KILL_I1_PSEUDO:
9651 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9663 const unsigned OffsetBits =
9665 return (1 << OffsetBits) - 1;
9672 if (
MI.isInlineAsm())
9675 for (
auto &
Op :
MI.implicit_operands()) {
9676 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9677 Op.setReg(AMDGPU::VCC_LO);
9686 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9690 const auto RCID =
MI.getDesc().operands()[
Idx].RegClass;
9708 if (Imm <= MaxImm + 64) {
9710 Overflow = Imm - MaxImm;
9800std::pair<int64_t, int64_t>
9803 int64_t RemainderOffset = COffsetVal;
9804 int64_t ImmField = 0;
9809 if (AllowNegative) {
9811 int64_t
D = 1LL << NumBits;
9812 RemainderOffset = (COffsetVal /
D) *
D;
9813 ImmField = COffsetVal - RemainderOffset;
9817 (ImmField % 4) != 0) {
9819 RemainderOffset += ImmField % 4;
9820 ImmField -= ImmField % 4;
9822 }
else if (COffsetVal >= 0) {
9823 ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
9824 RemainderOffset = COffsetVal - ImmField;
9828 assert(RemainderOffset + ImmField == COffsetVal);
9829 return {ImmField, RemainderOffset};
9841 switch (ST.getGeneration()) {
9867 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
9868 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
9869 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
9870 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
9871 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
9872 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
9873 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
9874 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
9881#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
9882 case OPCODE##_dpp: \
9883 case OPCODE##_e32: \
9884 case OPCODE##_e64: \
9885 case OPCODE##_e64_dpp: \
9900 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
9901 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
9902 case AMDGPU::V_FMA_F16_gfx9_e64:
9903 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
9904 case AMDGPU::V_INTERP_P2_F16:
9905 case AMDGPU::V_MAD_F16_e64:
9906 case AMDGPU::V_MAD_U16_e64:
9907 case AMDGPU::V_MAD_I16_e64:
9942 if (
isMAI(Opcode)) {
9990 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
9991 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
9992 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10004 switch (
MI.getOpcode()) {
10006 case AMDGPU::REG_SEQUENCE:
10010 case AMDGPU::INSERT_SUBREG:
10011 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10028 if (!
P.Reg.isVirtual())
10032 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10033 while (
auto *
MI = DefInst) {
10035 switch (
MI->getOpcode()) {
10037 case AMDGPU::V_MOV_B32_e32: {
10038 auto &Op1 =
MI->getOperand(1);
10043 DefInst =
MRI.getVRegDef(RSR.Reg);
10051 DefInst =
MRI.getVRegDef(RSR.Reg);
10064 assert(
MRI.isSSA() &&
"Must be run on SSA");
10066 auto *
TRI =
MRI.getTargetRegisterInfo();
10067 auto *DefBB =
DefMI.getParent();
10071 if (
UseMI.getParent() != DefBB)
10074 const int MaxInstScan = 20;
10078 auto E =
UseMI.getIterator();
10079 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10080 if (
I->isDebugInstr())
10083 if (++NumInst > MaxInstScan)
10086 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10096 assert(
MRI.isSSA() &&
"Must be run on SSA");
10098 auto *
TRI =
MRI.getTargetRegisterInfo();
10099 auto *DefBB =
DefMI.getParent();
10101 const int MaxUseScan = 10;
10104 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10105 auto &UseInst = *
Use.getParent();
10108 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10111 if (++NumUse > MaxUseScan)
10118 const int MaxInstScan = 20;
10122 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10125 if (
I->isDebugInstr())
10128 if (++NumInst > MaxInstScan)
10141 if (Reg == VReg && --NumUse == 0)
10143 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10155 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10158 }
while (Cur !=
MBB.
end() && Cur != LastPHIIt);
10167 if (InsPt !=
MBB.
end() &&
10168 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10169 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10170 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10171 InsPt->definesRegister(Src,
nullptr)) {
10175 : AMDGPU::S_MOV_B64_term),
10177 .
addReg(Src, 0, SrcSubReg)
10202 if (isFullCopyInstr(
MI)) {
10203 Register DstReg =
MI.getOperand(0).getReg();
10204 Register SrcReg =
MI.getOperand(1).getReg();
10211 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10215 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10226 unsigned *PredCost)
const {
10227 if (
MI.isBundle()) {
10230 unsigned Lat = 0, Count = 0;
10231 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10233 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10235 return Lat + Count - 1;
10238 return SchedModel.computeInstrLatency(&
MI);
10244 unsigned opcode =
MI.getOpcode();
10248 Register Src = isa<GIntrinsic>(
MI) ?
MI.getOperand(2).getReg()
10249 :
MI.getOperand(1).getReg();
10250 LLT DstTy =
MRI.getType(Dst);
10251 LLT SrcTy =
MRI.getType(Src);
10264 if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
10265 return HandleAddrSpaceCast(
MI);
10267 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI)) {
10268 auto IID = GI->getIntrinsicID();
10275 case Intrinsic::amdgcn_addrspacecast_nonnull:
10276 return HandleAddrSpaceCast(
MI);
10277 case Intrinsic::amdgcn_if:
10278 case Intrinsic::amdgcn_else:
10292 if (opcode == AMDGPU::G_LOAD) {
10293 if (
MI.memoperands_empty())
10297 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10298 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10306 if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
10307 opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10308 opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10321 unsigned opcode =
MI.getOpcode();
10322 if (opcode == AMDGPU::V_READLANE_B32 ||
10323 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10324 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10327 if (isCopyInstr(
MI)) {
10331 RI.getPhysRegBaseClass(srcOp.
getReg());
10339 if (
MI.isPreISelOpcode())
10354 if (
MI.memoperands_empty())
10358 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10359 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10374 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10376 if (!
SrcOp.isReg())
10380 if (!Reg || !
SrcOp.readsReg())
10386 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10413 F,
"ds_ordered_count unsupported for this calling conv"));
10427 Register &SrcReg2, int64_t &CmpMask,
10428 int64_t &CmpValue)
const {
10429 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10432 switch (
MI.getOpcode()) {
10435 case AMDGPU::S_CMP_EQ_U32:
10436 case AMDGPU::S_CMP_EQ_I32:
10437 case AMDGPU::S_CMP_LG_U32:
10438 case AMDGPU::S_CMP_LG_I32:
10439 case AMDGPU::S_CMP_LT_U32:
10440 case AMDGPU::S_CMP_LT_I32:
10441 case AMDGPU::S_CMP_GT_U32:
10442 case AMDGPU::S_CMP_GT_I32:
10443 case AMDGPU::S_CMP_LE_U32:
10444 case AMDGPU::S_CMP_LE_I32:
10445 case AMDGPU::S_CMP_GE_U32:
10446 case AMDGPU::S_CMP_GE_I32:
10447 case AMDGPU::S_CMP_EQ_U64:
10448 case AMDGPU::S_CMP_LG_U64:
10449 SrcReg =
MI.getOperand(0).getReg();
10450 if (
MI.getOperand(1).isReg()) {
10451 if (
MI.getOperand(1).getSubReg())
10453 SrcReg2 =
MI.getOperand(1).getReg();
10455 }
else if (
MI.getOperand(1).isImm()) {
10457 CmpValue =
MI.getOperand(1).getImm();
10463 case AMDGPU::S_CMPK_EQ_U32:
10464 case AMDGPU::S_CMPK_EQ_I32:
10465 case AMDGPU::S_CMPK_LG_U32:
10466 case AMDGPU::S_CMPK_LG_I32:
10467 case AMDGPU::S_CMPK_LT_U32:
10468 case AMDGPU::S_CMPK_LT_I32:
10469 case AMDGPU::S_CMPK_GT_U32:
10470 case AMDGPU::S_CMPK_GT_I32:
10471 case AMDGPU::S_CMPK_LE_U32:
10472 case AMDGPU::S_CMPK_LE_I32:
10473 case AMDGPU::S_CMPK_GE_U32:
10474 case AMDGPU::S_CMPK_GE_I32:
10475 SrcReg =
MI.getOperand(0).getReg();
10477 CmpValue =
MI.getOperand(1).getImm();
10486 Register SrcReg2, int64_t CmpMask,
10495 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10496 this](int64_t ExpectedValue,
unsigned SrcSize,
10497 bool IsReversible,
bool IsSigned) ->
bool {
10522 if (!Def || Def->getParent() != CmpInstr.
getParent())
10525 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10526 Def->getOpcode() != AMDGPU::S_AND_B64)
10530 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10541 SrcOp = &Def->getOperand(2);
10542 else if (isMask(&Def->getOperand(2)))
10543 SrcOp = &Def->getOperand(1);
10549 assert(llvm::has_single_bit<uint64_t>(Mask) &&
"Invalid mask.");
10551 if (IsSigned && BitNo == SrcSize - 1)
10554 ExpectedValue <<= BitNo;
10556 bool IsReversedCC =
false;
10557 if (CmpValue != ExpectedValue) {
10560 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10565 Register DefReg = Def->getOperand(0).getReg();
10566 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10569 for (
auto I = std::next(Def->getIterator()), E = CmpInstr.
getIterator();
10571 if (
I->modifiesRegister(AMDGPU::SCC, &RI) ||
10572 I->killsRegister(AMDGPU::SCC, &RI))
10577 Def->findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10581 if (!
MRI->use_nodbg_empty(DefReg)) {
10589 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10590 : AMDGPU::S_BITCMP1_B32
10591 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10592 : AMDGPU::S_BITCMP1_B64;
10597 Def->eraseFromParent();
10605 case AMDGPU::S_CMP_EQ_U32:
10606 case AMDGPU::S_CMP_EQ_I32:
10607 case AMDGPU::S_CMPK_EQ_U32:
10608 case AMDGPU::S_CMPK_EQ_I32:
10609 return optimizeCmpAnd(1, 32,
true,
false);
10610 case AMDGPU::S_CMP_GE_U32:
10611 case AMDGPU::S_CMPK_GE_U32:
10612 return optimizeCmpAnd(1, 32,
false,
false);
10613 case AMDGPU::S_CMP_GE_I32:
10614 case AMDGPU::S_CMPK_GE_I32:
10615 return optimizeCmpAnd(1, 32,
false,
true);
10616 case AMDGPU::S_CMP_EQ_U64:
10617 return optimizeCmpAnd(1, 64,
true,
false);
10618 case AMDGPU::S_CMP_LG_U32:
10619 case AMDGPU::S_CMP_LG_I32:
10620 case AMDGPU::S_CMPK_LG_U32:
10621 case AMDGPU::S_CMPK_LG_I32:
10622 return optimizeCmpAnd(0, 32,
true,
false);
10623 case AMDGPU::S_CMP_GT_U32:
10624 case AMDGPU::S_CMPK_GT_U32:
10625 return optimizeCmpAnd(0, 32,
false,
false);
10626 case AMDGPU::S_CMP_GT_I32:
10627 case AMDGPU::S_CMPK_GT_I32:
10628 return optimizeCmpAnd(0, 32,
false,
true);
10629 case AMDGPU::S_CMP_LG_U64:
10630 return optimizeCmpAnd(0, 64,
true,
false);
10637 AMDGPU::OpName
OpName)
const {
10641 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10655 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10658 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10659 : &AMDGPU::VReg_64_Align2RegClass);
10661 .
addReg(DataReg, 0,
Op.getSubReg())
10666 Op.setSubReg(AMDGPU::sub0);
10688 unsigned Opcode =
MI.getOpcode();
10694 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10695 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static const TargetRegisterClass * adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, const MCInstrDesc &TID, unsigned RCID, bool IsAllocatable)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
bool hasBF16PackedInsts() const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
bool has16BitInsts() const
bool hasInv2PiInlineImm() const
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool useVGPRIndexMode() const
bool hasFlatGVSMode() const
bool hasSDWAScalar() const
bool hasScalarCompareEq64() const
bool hasOnlyRevVALUShifts() const
bool hasFlatInstOffsets() const
bool hasGFX90AInsts() const
bool hasFmaakFmamkF64Insts() const
bool hasScaleOffset() const
bool hasMFMAInlineLiteralBug() const
bool hasNegativeScratchOffsetBug() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasVALUMaskWriteHazard() const
bool hasGFX1250Insts() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasOffset3fBug() const
bool hasGetPCZeroExtension() const
bool hasAddPC64Inst() const
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool has64BitLiterals() const
bool hasSDWAOutModsVOPC() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool hasGFX940Insts() const
bool hasVALUReadSGPRHazard() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasNegativeUnalignedScratchOffsetBug() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasVectorMulU64() const
Generation getGeneration() const
bool hasVOP3Literal() const
bool hasUnpackedD16VMem() const
bool hasAddNoCarry() const
bool hasPartialNSAEncoding() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
bool isGenericType() const
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
unsigned pred_size() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void backward()
Update internal register state and move MBB iterator backwards.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
static bool isMAI(const MachineInstr &MI)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
static bool isVOP3(const MachineInstr &MI)
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const override final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
static bool hasVectorRegisters(const TargetRegisterClass *RC)
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
bool opCanUseLiteralConstant(unsigned OpType) const
static bool hasVGPRs(const TargetRegisterClass *RC)
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getChannelFromSubReg(unsigned SubReg) const
MCRegister getVCC() const
bool isVectorSuperClass(const TargetRegisterClass *RC) const
static bool hasAGPRs(const TargetRegisterClass *RC)
const TargetRegisterClass * getWaveMaskRegClass() const
bool spillSGPRToVGPR() const
const TargetRegisterClass * getVGPR64Class() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const
Return the maximal subclass of the given register class that is allocatable or NULL.
unsigned getSubRegIdxSize(unsigned Idx) const
Get the size of the bit range covered by a sub-register index.
unsigned getSubRegIdxOffset(unsigned Idx) const
Get the offset of the bit range covered by a sub-register index.
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Description of the encoding of one expression Op.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.