23#define DEBUG_TYPE "si-fold-operands"
44 unsigned DefSubReg = AMDGPU::NoSubRegister;
49 FoldableDef() =
delete;
51 unsigned DefSubReg = AMDGPU::NoSubRegister)
52 : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.
getType()) {
55 ImmToFold = FoldOp.
getImm();
56 }
else if (FoldOp.
isFI()) {
57 FrameIndexToFold = FoldOp.
getIndex();
67 unsigned DefSubReg = AMDGPU::NoSubRegister)
68 : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
73 FoldableDef Copy(*
this);
74 Copy.DefSubReg =
TRI.composeSubRegIndices(DefSubReg,
SubReg);
82 return OpToFold->getReg();
85 unsigned getSubReg()
const {
87 return OpToFold->getSubReg();
98 return FrameIndexToFold;
106 std::optional<int64_t> getEffectiveImmVal()
const {
114 unsigned OpIdx)
const {
117 std::optional<int64_t> ImmToFold = getEffectiveImmVal();
127 if (DefSubReg != AMDGPU::NoSubRegister)
135 if (DefSubReg != AMDGPU::NoSubRegister)
137 return TII.isOperandLegal(
MI,
OpIdx, OpToFold);
144struct FoldCandidate {
152 bool Commuted =
false,
int ShrinkOp = -1)
153 :
UseMI(
MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
154 Commuted(Commuted) {}
156 bool isFI()
const {
return Def.isFI(); }
160 return Def.FrameIndexToFold;
163 bool isImm()
const {
return Def.isImm(); }
165 bool isReg()
const {
return Def.isReg(); }
169 bool isGlobal()
const {
return Def.isGlobal(); }
171 bool needsShrink()
const {
return ShrinkOpcode != -1; }
174class SIFoldOperandsImpl {
183 const FoldableDef &OpToFold)
const;
186 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
188 case AMDGPU::S_ADD_I32: {
189 if (ST->hasAddNoCarry())
190 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
191 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
193 case AMDGPU::S_OR_B32:
194 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
195 case AMDGPU::S_AND_B32:
196 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
197 case AMDGPU::S_MUL_I32:
198 return AMDGPU::V_MUL_LO_U32_e64;
200 return AMDGPU::INSTRUCTION_LIST_END;
204 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
210 int64_t ImmVal)
const;
214 int64_t ImmVal)
const;
218 const FoldableDef &OpToFold)
const;
227 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
230 std::pair<int64_t, const TargetRegisterClass *>
248 bool foldInstOperand(
MachineInstr &
MI,
const FoldableDef &OpToFold)
const;
250 bool foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const;
257 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
266 SIFoldOperandsImpl() =
default;
280 return SIFoldOperandsImpl().run(MF);
300char SIFoldOperandsLegacy::
ID = 0;
309 TRI.getSubRegisterClass(RC, MO.getSubReg()))
317 case AMDGPU::V_MAC_F32_e64:
318 return AMDGPU::V_MAD_F32_e64;
319 case AMDGPU::V_MAC_F16_e64:
320 return AMDGPU::V_MAD_F16_e64;
321 case AMDGPU::V_FMAC_F32_e64:
322 return AMDGPU::V_FMA_F32_e64;
323 case AMDGPU::V_FMAC_F16_e64:
324 return AMDGPU::V_FMA_F16_gfx9_e64;
325 case AMDGPU::V_FMAC_F16_t16_e64:
326 return AMDGPU::V_FMA_F16_gfx9_t16_e64;
327 case AMDGPU::V_FMAC_F16_fake16_e64:
328 return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
329 case AMDGPU::V_FMAC_LEGACY_F32_e64:
330 return AMDGPU::V_FMA_LEGACY_F32_e64;
331 case AMDGPU::V_FMAC_F64_e64:
332 return AMDGPU::V_FMA_F64_e64;
334 return AMDGPU::INSTRUCTION_LIST_END;
340 const FoldableDef &OpToFold)
const {
341 if (!OpToFold.isFI())
344 const unsigned Opc =
UseMI.getOpcode();
346 case AMDGPU::S_ADD_I32:
347 case AMDGPU::S_ADD_U32:
348 case AMDGPU::V_ADD_U32_e32:
349 case AMDGPU::V_ADD_CO_U32_e32:
353 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
355 case AMDGPU::V_ADD_U32_e64:
356 case AMDGPU::V_ADD_CO_U32_e64:
357 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
364 return OpNo == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
368 int SIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
372 int VIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
373 return OpNo == VIdx && SIdx == -1;
379bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
381 if (
TRI->isVGPR(*
MRI, DstReg) &&
TRI->isSGPRReg(*
MRI, SrcReg) &&
382 MRI->hasOneNonDBGUse(SrcReg)) {
384 if (!Def ||
Def->getNumOperands() != 4)
399 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
400 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
401 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
402 !
Def->getOperand(3).isDead())
407 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
411 if (
Add->getDesc().getNumDefs() == 2) {
412 Register CarryOutReg =
MRI->createVirtualRegister(
TRI->getBoolRC());
414 MRI->setRegAllocationHint(CarryOutReg, 0,
TRI->getVCC());
417 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
421 Def->eraseFromParent();
422 MI.eraseFromParent();
426 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
437 Def->eraseFromParent();
438 MI.eraseFromParent();
447 return new SIFoldOperandsLegacy();
450bool SIFoldOperandsImpl::canUseImmWithOpSel(
const MachineInstr *
MI,
452 int64_t ImmVal)
const {
453 const uint64_t TSFlags =
MI->getDesc().TSFlags;
461 int OpNo =
MI->getOperandNo(&Old);
463 unsigned Opcode =
MI->getOpcode();
464 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
486bool SIFoldOperandsImpl::tryFoldImmWithOpSel(
MachineInstr *
MI,
unsigned UseOpNo,
487 int64_t ImmVal)
const {
489 unsigned Opcode =
MI->getOpcode();
490 int OpNo =
MI->getOperandNo(&Old);
491 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
503 AMDGPU::OpName ModName = AMDGPU::OpName::NUM_OPERAND_NAMES;
504 unsigned SrcIdx = ~0;
505 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
506 ModName = AMDGPU::OpName::src0_modifiers;
508 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
509 ModName = AMDGPU::OpName::src1_modifiers;
511 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
512 ModName = AMDGPU::OpName::src2_modifiers;
515 assert(ModName != AMDGPU::OpName::NUM_OPERAND_NAMES);
516 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModName);
518 unsigned ModVal =
Mod.getImm();
542 Mod.setImm(NewModVal);
547 if (
static_cast<int16_t
>(
Lo) < 0) {
548 int32_t SExt =
static_cast<int16_t
>(
Lo);
550 Mod.setImm(NewModVal);
576 if (tryFoldToInline(Imm))
585 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
586 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
587 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
589 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp);
590 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
597 if (tryFoldToInline(NegImm)) {
599 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
600 MI->setDesc(
TII->get(NegOpcode));
609bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
614 std::optional<int64_t> ImmVal;
616 ImmVal = Fold.Def.getEffectiveImmVal();
618 if (ImmVal && canUseImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal)) {
619 if (tryFoldImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal))
625 int OpNo =
MI->getOperandNo(&Old);
626 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
632 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
640 int Op32 = Fold.ShrinkOpcode;
645 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
648 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
652 if (HaveNonDbgCarryUse) {
665 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
666 MI->removeOperand(
I);
667 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
670 TII->commuteInstruction(*Inst32,
false);
674 assert(!Fold.needsShrink() &&
"not handled");
679 if (NewMFMAOpc == -1)
681 MI->setDesc(
TII->get(NewMFMAOpc));
682 MI->untieRegOperand(0);
687 int OpNo =
MI->getOperandNo(&Old);
688 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
695 if (Fold.isGlobal()) {
696 Old.
ChangeToGA(Fold.Def.OpToFold->getGlobal(),
697 Fold.Def.OpToFold->getOffset(),
698 Fold.Def.OpToFold->getTargetFlags());
718 FoldCandidate &&Entry) {
720 for (FoldCandidate &Fold : FoldList)
721 if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
723 LLVM_DEBUG(
dbgs() <<
"Append " << (Entry.Commuted ?
"commuted" :
"normal")
724 <<
" operand " << Entry.UseOpNo <<
"\n " << *Entry.UseMI);
730 const FoldableDef &FoldOp,
731 bool Commuted =
false,
int ShrinkOp = -1) {
733 FoldCandidate(
MI, OpNo, FoldOp, Commuted, ShrinkOp));
736bool SIFoldOperandsImpl::tryAddToFoldList(
738 const FoldableDef &OpToFold)
const {
739 const unsigned Opc =
MI->getOpcode();
741 auto tryToFoldAsFMAAKorMK = [&]() {
742 if (!OpToFold.isImm())
745 const bool TryAK = OpNo == 3;
746 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
747 MI->setDesc(
TII->get(NewOpc));
750 bool FoldAsFMAAKorMK =
751 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
752 if (FoldAsFMAAKorMK) {
754 MI->untieRegOperand(3);
775 bool IsLegal = OpToFold.isOperandLegal(*
TII, *
MI, OpNo);
776 if (!IsLegal && OpToFold.isImm()) {
777 if (std::optional<int64_t> ImmVal = OpToFold.getEffectiveImmVal())
778 IsLegal = canUseImmWithOpSel(
MI, OpNo, *ImmVal);
784 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
787 MI->setDesc(
TII->get(NewOpc));
792 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
794 MI->untieRegOperand(OpNo);
798 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
804 if (
Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
805 if (tryToFoldAsFMAAKorMK())
810 if (OpToFold.isImm()) {
812 if (
Opc == AMDGPU::S_SETREG_B32)
813 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
814 else if (
Opc == AMDGPU::S_SETREG_B32_mode)
815 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
817 MI->setDesc(
TII->get(ImmOpc));
826 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
837 if (!
Op.isReg() || !CommutedOp.
isReg())
842 if (
Op.isReg() && CommutedOp.
isReg() &&
843 (
Op.getReg() == CommutedOp.
getReg() &&
847 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
851 if (!OpToFold.isOperandLegal(*
TII, *
MI, CommuteOpNo)) {
852 if ((
Opc != AMDGPU::V_ADD_CO_U32_e64 &&
Opc != AMDGPU::V_SUB_CO_U32_e64 &&
853 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
854 (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
855 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
862 if (!OtherOp.
isReg() ||
869 unsigned MaybeCommutedOpc =
MI->getOpcode();
883 if (
Opc == AMDGPU::S_FMAC_F32 &&
884 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
885 if (tryToFoldAsFMAAKorMK())
896 return !
TII->isSDWA(
MI);
904 SubDef &&
TII.isFoldableCopy(*SubDef);
905 SubDef =
MRI.getVRegDef(
Sub->getReg())) {
913 if (
SrcOp.getSubReg())
936 else if (!
TRI->getCommonSubClass(RC, OpRC))
939 if (
SrcOp.getSubReg()) {
941 Defs.emplace_back(&
SrcOp, SubRegIdx);
946 if (DefSrc && (DefSrc->
isReg() || DefSrc->
isImm())) {
947 Defs.emplace_back(DefSrc, SubRegIdx);
951 Defs.emplace_back(&
SrcOp, SubRegIdx);
964 if (!Def || !
Def->isRegSequence())
967 return getRegSeqInit(*Def, Defs);
970std::pair<int64_t, const TargetRegisterClass *>
971SIFoldOperandsImpl::isRegSeqSplat(
MachineInstr &RegSeq)
const {
977 bool TryToMatchSplat64 =
false;
980 for (
unsigned I = 0, E = Defs.
size();
I != E; ++
I) {
985 int64_t SubImm =
Op->getImm();
992 if (
I == 1 && (E & 1) == 0) {
995 TryToMatchSplat64 =
true;
1003 if (!TryToMatchSplat64)
1004 return {Defs[0].first->getImm(), SrcRC};
1009 for (
unsigned I = 0, E = Defs.
size();
I != E;
I += 2) {
1016 unsigned SubReg0 = Defs[
I].second;
1017 unsigned SubReg1 = Defs[
I + 1].second;
1021 if (
TRI->getChannelFromSubReg(SubReg0) + 1 !=
1022 TRI->getChannelFromSubReg(SubReg1))
1027 SplatVal64 = MergedVal;
1028 else if (SplatVal64 != MergedVal)
1035 return {SplatVal64, RC64};
1038bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
1042 if (UseOpIdx >=
Desc.getNumOperands())
1049 int16_t RCID =
Desc.operands()[UseOpIdx].RegClass;
1058 if (SplatVal != 0 && SplatVal != -1) {
1062 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
1068 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
1073 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
1079 if (!
TRI->getCommonSubClass(OpRC, SplatRC))
1084 if (!
TII->isOperandLegal(*
UseMI, UseOpIdx, &TmpOp))
1090bool SIFoldOperandsImpl::tryToFoldACImm(
1094 if (UseOpIdx >=
Desc.getNumOperands())
1102 if (OpToFold.isImm() && OpToFold.isOperandLegal(*
TII, *
UseMI, UseOpIdx)) {
1112 if (!OpToFold.isReg())
1125 if (Def &&
TII->isFoldableCopy(*Def)) {
1127 if (DefOp.
isImm() &&
TII->isOperandLegal(*
UseMI, UseOpIdx, &DefOp)) {
1128 FoldableDef FoldableImm(DefOp.
getImm(), OpToFold.DefRC,
1129 OpToFold.DefSubReg);
1138void SIFoldOperandsImpl::foldOperand(
1144 if (!isUseSafeToFold(*
UseMI, *UseOp))
1148 if (UseOp->
isReg() && OpToFold.isReg()) {
1152 if (UseOp->
getSubReg() != AMDGPU::NoSubRegister &&
1154 !
TRI->isSGPRReg(*
MRI, OpToFold.getReg())))
1167 std::tie(SplatVal, SplatRC) = isRegSeqSplat(*
UseMI);
1172 for (
unsigned I = 0;
I != UsesToProcess.size(); ++
I) {
1184 if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
1185 FoldableDef SplatDef(SplatVal, SplatRC);
1192 if (RSUse->
getSubReg() != RegSeqDstSubReg)
1197 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
1204 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
1207 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
1212 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
1213 MFI->getScratchRSrcReg())
1219 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
1230 TII->getNamedOperand(*
UseMI, AMDGPU::OpName::cpol)->getImm();
1245 bool FoldingImmLike =
1246 OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1265 for (
unsigned MovOp :
1266 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1267 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1268 AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
1281 const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
1283 TRI->getRegClass(MovDesc.
operands()[SrcIdx].RegClass);
1286 MovSrcRC =
TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
1287 if (!
MRI->constrainRegClass(SrcReg, MovSrcRC))
1292 while (ImpOpI != ImpOpE) {
1299 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1322 OpToFold.DefMI->implicit_operands().empty()) {
1323 LLVM_DEBUG(
dbgs() <<
"Folding " << OpToFold.OpToFold <<
"\n into "
1328 unsigned SubRegIdx = OpToFold.getSubReg();
1342 static_assert(AMDGPU::sub1_hi16 == 12,
"Subregister layout has changed");
1347 if (SubRegIdx > AMDGPU::sub1) {
1349 M |=
M.getLane(
M.getHighestLane() - 1);
1353 assert(Indexes.
size() == 1 &&
"Expected one 32-bit subreg to cover");
1354 SubRegIdx = Indexes[0];
1356 }
else if (
TII->getOpSize(*
UseMI, 1) == 4)
1359 SubRegIdx = AMDGPU::sub0;
1364 OpToFold.OpToFold->setIsKill(
false);
1368 if (foldCopyToAGPRRegSequence(
UseMI))
1373 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1374 (UseOpc == AMDGPU::V_READLANE_B32 &&
1376 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
1381 if (FoldingImmLike) {
1384 *OpToFold.DefMI, *
UseMI))
1389 if (OpToFold.isImm()) {
1391 *OpToFold.getEffectiveImmVal());
1392 }
else if (OpToFold.isFI())
1395 assert(OpToFold.isGlobal());
1397 OpToFold.OpToFold->getOffset(),
1398 OpToFold.OpToFold->getTargetFlags());
1404 if (OpToFold.isReg() &&
TRI->isSGPRReg(*
MRI, OpToFold.getReg())) {
1407 *OpToFold.DefMI, *
UseMI))
1428 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1432 if (!FoldingImmLike) {
1433 if (OpToFold.isReg() &&
ST->needsAlignedVGPRs()) {
1436 TRI->getRegClassForReg(*
MRI, OpToFold.getReg());
1438 if (
TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) {
1439 unsigned SubReg = OpToFold.getSubReg();
1445 if (!RC || !
TRI->isProperlyAlignedRC(*RC))
1449 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, OpToFold);
1457 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, OpToFold);
1463 case AMDGPU::V_AND_B32_e64:
1464 case AMDGPU::V_AND_B32_e32:
1465 case AMDGPU::S_AND_B32:
1468 case AMDGPU::V_OR_B32_e64:
1469 case AMDGPU::V_OR_B32_e32:
1470 case AMDGPU::S_OR_B32:
1473 case AMDGPU::V_XOR_B32_e64:
1474 case AMDGPU::V_XOR_B32_e32:
1475 case AMDGPU::S_XOR_B32:
1478 case AMDGPU::S_XNOR_B32:
1481 case AMDGPU::S_NAND_B32:
1484 case AMDGPU::S_NOR_B32:
1487 case AMDGPU::S_ANDN2_B32:
1488 Result =
LHS & ~RHS;
1490 case AMDGPU::S_ORN2_B32:
1491 Result =
LHS | ~RHS;
1493 case AMDGPU::V_LSHL_B32_e64:
1494 case AMDGPU::V_LSHL_B32_e32:
1495 case AMDGPU::S_LSHL_B32:
1497 Result =
LHS << (
RHS & 31);
1499 case AMDGPU::V_LSHLREV_B32_e64:
1500 case AMDGPU::V_LSHLREV_B32_e32:
1501 Result =
RHS << (
LHS & 31);
1503 case AMDGPU::V_LSHR_B32_e64:
1504 case AMDGPU::V_LSHR_B32_e32:
1505 case AMDGPU::S_LSHR_B32:
1506 Result =
LHS >> (
RHS & 31);
1508 case AMDGPU::V_LSHRREV_B32_e64:
1509 case AMDGPU::V_LSHRREV_B32_e32:
1510 Result =
RHS >> (
LHS & 31);
1512 case AMDGPU::V_ASHR_I32_e64:
1513 case AMDGPU::V_ASHR_I32_e32:
1514 case AMDGPU::S_ASHR_I32:
1515 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1517 case AMDGPU::V_ASHRREV_I32_e64:
1518 case AMDGPU::V_ASHRREV_I32_e32:
1519 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1527 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1531 MI.setDesc(NewDesc);
1537 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
1538 Desc.implicit_defs().size();
1540 for (
unsigned I =
MI.getNumOperands() - 1;
I >= NumOps; --
I)
1541 MI.removeOperand(
I);
1544std::optional<int64_t>
1549 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1550 return std::nullopt;
1553 if (Def &&
Def->isMoveImmediate()) {
1556 return TII->extractSubregFromImm(ImmSrc.
getImm(),
Op.getSubReg());
1559 return std::nullopt;
1565bool SIFoldOperandsImpl::tryConstantFoldOp(
MachineInstr *
MI)
const {
1566 if (!
MI->allImplicitDefsAreDead())
1569 unsigned Opc =
MI->getOpcode();
1571 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
1576 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1578 if ((
Opc == AMDGPU::V_NOT_B32_e64 ||
Opc == AMDGPU::V_NOT_B32_e32 ||
1579 Opc == AMDGPU::S_NOT_B32) &&
1581 MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
1586 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
1591 std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1593 if (!Src0Imm && !Src1Imm)
1599 if (Src0Imm && Src1Imm) {
1604 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1608 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1609 MI->removeOperand(Src1Idx);
1614 if (!
MI->isCommutable())
1617 if (Src0Imm && !Src1Imm) {
1623 int32_t Src1Val =
static_cast<int32_t
>(*Src1Imm);
1624 if (
Opc == AMDGPU::V_OR_B32_e64 ||
1625 Opc == AMDGPU::V_OR_B32_e32 ||
1626 Opc == AMDGPU::S_OR_B32) {
1629 MI->removeOperand(Src1Idx);
1631 }
else if (Src1Val == -1) {
1633 MI->removeOperand(Src1Idx);
1641 if (
Opc == AMDGPU::V_AND_B32_e64 ||
Opc == AMDGPU::V_AND_B32_e32 ||
1642 Opc == AMDGPU::S_AND_B32) {
1645 MI->removeOperand(Src0Idx);
1647 }
else if (Src1Val == -1) {
1649 MI->removeOperand(Src1Idx);
1657 if (
Opc == AMDGPU::V_XOR_B32_e64 ||
Opc == AMDGPU::V_XOR_B32_e32 ||
1658 Opc == AMDGPU::S_XOR_B32) {
1661 MI->removeOperand(Src1Idx);
1672 unsigned Opc =
MI.getOpcode();
1673 if (
Opc != AMDGPU::V_CNDMASK_B32_e32 &&
Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1674 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1680 std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1684 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1685 if (!Src0Imm || *Src0Imm != *Src1Imm)
1690 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1_modifiers);
1692 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
1693 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1694 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1700 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
1702 MI.removeOperand(Src2Idx);
1703 MI.removeOperand(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1));
1704 if (Src1ModIdx != -1)
1705 MI.removeOperand(Src1ModIdx);
1706 if (Src0ModIdx != -1)
1707 MI.removeOperand(Src0ModIdx);
1713bool SIFoldOperandsImpl::tryFoldZeroHighBits(
MachineInstr &
MI)
const {
1714 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1715 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1718 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(
MI.getOperand(1));
1719 if (!Src0Imm || *Src0Imm != 0xffff || !
MI.getOperand(2).isReg())
1724 if (!
ST->zeroesHigh16BitsOfDest(SrcDef->
getOpcode()))
1728 MRI->replaceRegWith(Dst, Src1);
1729 if (!
MI.getOperand(2).isKill())
1730 MRI->clearKillFlags(Src1);
1731 MI.eraseFromParent();
1736 const FoldableDef &OpToFold)
const {
1743 bool Changed =
false;
1745 if (OpToFold.isImm()) {
1756 if (tryConstantFoldOp(&
UseMI)) {
1765 for (
auto *U : UsesToProcess) {
1768 FoldableDef SubOpToFold = OpToFold.getWithSubReg(*
TRI,
U->getSubReg());
1773 if (CopiesToReplace.
empty() && FoldList.
empty())
1779 Copy->addImplicitDefUseOperands(*MF);
1782 for (FoldCandidate &Fold : FoldList) {
1783 assert(!Fold.isReg() || Fold.Def.OpToFold);
1784 if (Fold.isReg() && Fold.getReg().isVirtual()) {
1794 assert(Fold.Def.OpToFold && Fold.isReg());
1798 MRI->clearKillFlags(Fold.getReg());
1801 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1805 ConstantFoldCandidates.
insert(Fold.UseMI);
1807 }
else if (Fold.Commuted) {
1809 TII->commuteInstruction(*Fold.UseMI,
false);
1814 if (tryConstantFoldOp(
MI)) {
1824bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const {
1831 if (!
TRI->isAGPRClass(DefRC))
1852 unsigned NumFoldable = 0;
1854 for (
unsigned I = 1;
I != NumRegSeqOperands;
I += 2) {
1871 DefRC, &AMDGPU::AGPR_32RegClass, SubRegIdx);
1891 TRI->getMatchingSuperRegClass(DefRC, InputRC, SubRegIdx);
1902 if (NumFoldable == 0)
1905 CopyMI->
setDesc(
TII->get(AMDGPU::REG_SEQUENCE));
1909 for (
auto [Def, DestSubIdx] : NewDefs) {
1910 if (!
Def->isReg()) {
1913 Register Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1914 BuildMI(
MBB, CopyMI,
DL,
TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1919 Def->setIsKill(
false);
1921 Register &VGPRCopy = VGPRCopies[Src];
1924 TRI->getSubRegisterClass(UseRC, DestSubIdx);
1933 if (
TRI->getSubRegisterClass(
MRI->getRegClass(Src.Reg), Src.SubReg) !=
1935 VGPRCopy =
MRI->createVirtualRegister(VGPRUseSubRC);
1947 B.addImm(DestSubIdx);
1954bool SIFoldOperandsImpl::tryFoldFoldableCopy(
1959 if (DstReg == AMDGPU::M0) {
1961 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1962 MI.eraseFromParent();
1974 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
1976 if (
TII->hasAnyModifiersSet(
MI))
1978 OpToFoldPtr = &
MI.getOperand(2);
1980 OpToFoldPtr = &
MI.getOperand(1);
1985 if (!FoldingImm && !OpToFold.
isReg())
2001 MRI->getRegClass(
MI.getOperand(0).getReg());
2017 if (
MI.getOpcode() == AMDGPU::COPY && OpToFold.
isReg() &&
2019 if (DstRC == &AMDGPU::SReg_32RegClass &&
2020 DstRC ==
MRI->getRegClass(OpToFold.
getReg())) {
2028 if (OpToFold.
isReg() &&
MI.isCopy() && !
MI.getOperand(1).getSubReg()) {
2029 if (foldCopyToAGPRRegSequence(&
MI))
2033 FoldableDef
Def(OpToFold, DstRC);
2034 bool Changed = foldInstOperand(
MI, Def);
2041 auto *InstToErase = &
MI;
2042 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2043 auto &
SrcOp = InstToErase->getOperand(1);
2045 InstToErase->eraseFromParent();
2047 InstToErase =
nullptr;
2050 InstToErase =
MRI->getVRegDef(SrcReg);
2051 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
2055 if (InstToErase && InstToErase->isRegSequence() &&
2056 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2057 InstToErase->eraseFromParent();
2067 return OpToFold.
isReg() &&
2068 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI);
2075 unsigned Op =
MI.getOpcode();
2077 case AMDGPU::V_MAX_F32_e64:
2078 case AMDGPU::V_MAX_F16_e64:
2079 case AMDGPU::V_MAX_F16_t16_e64:
2080 case AMDGPU::V_MAX_F16_fake16_e64:
2081 case AMDGPU::V_MAX_F64_e64:
2082 case AMDGPU::V_MAX_NUM_F64_e64:
2083 case AMDGPU::V_PK_MAX_F16:
2084 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2085 case AMDGPU::V_PK_MAX_NUM_BF16: {
2086 if (
MI.mayRaiseFPException())
2089 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
2098 Src0->
getSubReg() != AMDGPU::NoSubRegister)
2102 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2106 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
2108 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
2112 unsigned UnsetMods =
2113 (
Op == AMDGPU::V_PK_MAX_F16 ||
Op == AMDGPU::V_PK_MAX_NUM_BF16)
2116 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
2128 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
2140 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
2143 if (
Def->mayRaiseFPException())
2150 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
2156 Register MIDstReg =
MI.getOperand(0).getReg();
2157 if (
TRI->isSGPRReg(*
MRI, DefReg)) {
2164 MRI->replaceRegWith(MIDstReg, DefReg);
2166 MI.eraseFromParent();
2171 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2172 Def->eraseFromParent();
2179 case AMDGPU::V_MUL_F64_e64:
2180 case AMDGPU::V_MUL_F64_pseudo_e64: {
2182 case 0x3fe0000000000000:
2184 case 0x4000000000000000:
2186 case 0x4010000000000000:
2192 case AMDGPU::V_MUL_F32_e64: {
2193 switch (
static_cast<uint32_t>(Val)) {
2204 case AMDGPU::V_MUL_F16_e64:
2205 case AMDGPU::V_MUL_F16_t16_e64:
2206 case AMDGPU::V_MUL_F16_fake16_e64: {
2207 switch (
static_cast<uint16_t>(Val)) {
2226std::pair<const MachineOperand *, int>
2228 unsigned Op =
MI.getOpcode();
2230 case AMDGPU::V_MUL_F64_e64:
2231 case AMDGPU::V_MUL_F64_pseudo_e64:
2232 case AMDGPU::V_MUL_F32_e64:
2233 case AMDGPU::V_MUL_F16_t16_e64:
2234 case AMDGPU::V_MUL_F16_fake16_e64:
2235 case AMDGPU::V_MUL_F16_e64: {
2237 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
2239 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
2240 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
2241 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
2242 MFI->getMode().FP64FP16Denormals.Output !=
2244 MI.mayRaiseFPException())
2251 if (Src0->
isImm()) {
2254 }
else if (Src1->
isImm()) {
2262 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
2263 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
2264 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
2265 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
2268 return std::pair(RegOp, OMod);
2270 case AMDGPU::V_ADD_F64_e64:
2271 case AMDGPU::V_ADD_F64_pseudo_e64:
2272 case AMDGPU::V_ADD_F32_e64:
2273 case AMDGPU::V_ADD_F16_e64:
2274 case AMDGPU::V_ADD_F16_t16_e64:
2275 case AMDGPU::V_ADD_F16_fake16_e64: {
2277 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
2279 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
2280 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
2281 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
2291 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
2292 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
2293 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
2294 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2308 std::tie(RegOp, OMod) = isOMod(
MI);
2310 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
2311 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
2319 if (
Def->mayRaiseFPException())
2324 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
2330 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
2333 MRI->clearKillFlags(
Def->getOperand(0).getReg());
2334 MI.eraseFromParent();
2339 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2340 Def->eraseFromParent();
2349 auto Reg =
MI.getOperand(0).getReg();
2351 if (!
ST->hasGFX90AInsts() || !
TRI->isVGPR(*
MRI, Reg) ||
2352 !
MRI->hasOneNonDBGUse(Reg))
2356 if (!getRegSeqInit(Defs, Reg))
2359 for (
auto &[
Op, SubIdx] : Defs) {
2362 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
2376 if (!
TRI->isVGPR(*
MRI, Reg) || !
MRI->hasOneNonDBGUse(Reg))
2378 Op = &*
MRI->use_nodbg_begin(Reg);
2382 if (
Op->getSubReg())
2389 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
2392 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg));
2393 auto Dst =
MRI->createVirtualRegister(NewDstRC);
2395 TII->get(AMDGPU::REG_SEQUENCE), Dst);
2397 for (
auto &[Def, SubIdx] : Defs) {
2398 Def->setIsKill(
false);
2412 RS->eraseFromParent();
2420 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
2421 MI.eraseFromParent();
2429 Register &OutReg,
unsigned &OutSubReg) {
2439 if (
TRI.isAGPR(
MRI, CopySrcReg)) {
2440 OutReg = CopySrcReg;
2449 if (!CopySrcDef || !CopySrcDef->
isCopy())
2456 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2457 !
TRI.isAGPR(
MRI, OtherCopySrcReg))
2460 OutReg = OtherCopySrcReg;
2498 if (!
TRI->isVGPR(*
MRI, PhiOut))
2504 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2507 if (!Copy || !
Copy->isCopy())
2511 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2516 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2527 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2531 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2539 unsigned CopyOpc = AMDGPU::COPY;
2544 if (
Def->isCopy()) {
2546 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2560 if (IsAGPR32 && !
ST->hasGFX90AInsts() && !
MRI->hasOneNonDBGUse(Reg) &&
2562 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2565 InsertMBB =
Def->getParent();
2572 Register NewReg =
MRI->createVirtualRegister(ARC);
2574 TII->get(CopyOpc), NewReg)
2583 Register NewReg =
MRI->createVirtualRegister(ARC);
2584 PHI.getOperand(0).setReg(NewReg);
2590 TII->get(AMDGPU::COPY), PhiOut)
2600 if (!
ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
2620 while (!
Users.empty()) {
2622 if (!
I->isCopy() && !
I->isRegSequence())
2624 Register DstReg =
I->getOperand(0).getReg();
2628 if (
TRI->isAGPR(*
MRI, DstReg))
2632 Users.push_back(&U);
2636 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
2637 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2638 MRI->setRegClass(DefReg, RC);
2642 while (!MoveRegs.
empty()) {
2644 MRI->setRegClass(Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg)));
2687 if (
ST->hasGFX90AInsts())
2694 for (
auto &
MI :
MBB) {
2698 if (!
TRI->isAGPR(*
MRI,
MI.getOperand(0).getReg()))
2701 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2711 bool Changed =
false;
2712 for (
const auto &[Entry, MOs] : RegToMO) {
2713 if (MOs.size() == 1)
2724 MRI->createVirtualRegister(
TRI->getEquivalentVGPRClass(ARC));
2727 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2731 Register TempAGPR =
MRI->createVirtualRegister(ARC);
2733 TII->get(AMDGPU::COPY), TempAGPR)
2752 TII =
ST->getInstrInfo();
2753 TRI = &
TII->getRegisterInfo();
2761 bool HasNSZ = MFI->hasNoSignedZerosFPMath();
2763 bool Changed =
false;
2767 Changed |= tryFoldCndMask(
MI);
2769 if (tryFoldZeroHighBits(
MI)) {
2774 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2779 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2784 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2789 if (
TII->isFoldableCopy(
MI)) {
2790 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2795 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2796 CurrentKnownM0Val =
nullptr;
2802 Changed |= tryFoldClamp(
MI);
2805 Changed |= tryOptimizeAGPRPhis(*
MBB);
2815 bool Changed = SIFoldOperandsImpl().run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, FoldCandidate &&Entry)
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static unsigned getMovOpc(bool IsScalar)
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
static MachineOperand * lookUpCopyChain(const SIInstrInfo &TII, const MachineRegisterInfo &MRI, Register SrcReg)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
A vector that has set insertion semantics.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ Sub
Subtraction of integers.
char & SIFoldOperandsLegacyID
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
iterator_range< df_iterator< T > > depth_first(const T &G)
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
A pair composed of a register and a sub-register index.