23#define DEBUG_TYPE "si-fold-operands"
44 unsigned DefSubReg = AMDGPU::NoSubRegister;
49 FoldableDef() =
delete;
51 unsigned DefSubReg = AMDGPU::NoSubRegister)
52 : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.
getType()) {
55 ImmToFold = FoldOp.
getImm();
56 }
else if (FoldOp.
isFI()) {
57 FrameIndexToFold = FoldOp.
getIndex();
67 unsigned DefSubReg = AMDGPU::NoSubRegister)
68 : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
73 FoldableDef Copy(*
this);
74 Copy.DefSubReg =
TRI.composeSubRegIndices(DefSubReg,
SubReg);
82 return OpToFold->getReg();
85 unsigned getSubReg()
const {
87 return OpToFold->getSubReg();
98 return FrameIndexToFold;
106 std::optional<int64_t> getEffectiveImmVal()
const {
114 unsigned OpIdx)
const {
117 std::optional<int64_t> ImmToFold = getEffectiveImmVal();
127 if (DefSubReg != AMDGPU::NoSubRegister)
135 if (DefSubReg != AMDGPU::NoSubRegister)
137 return TII.isOperandLegal(
MI,
OpIdx, OpToFold);
144struct FoldCandidate {
152 bool Commuted =
false,
int ShrinkOp = -1)
153 :
UseMI(
MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
154 Commuted(Commuted) {}
156 bool isFI()
const {
return Def.isFI(); }
160 return Def.FrameIndexToFold;
163 bool isImm()
const {
return Def.isImm(); }
165 bool isReg()
const {
return Def.isReg(); }
169 bool isGlobal()
const {
return Def.isGlobal(); }
171 bool needsShrink()
const {
return ShrinkOpcode != -1; }
174class SIFoldOperandsImpl {
184 const FoldableDef &OpToFold)
const;
187 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
189 case AMDGPU::S_ADD_I32: {
190 if (ST->hasAddNoCarry())
191 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
192 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
194 case AMDGPU::S_OR_B32:
195 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
196 case AMDGPU::S_AND_B32:
197 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
198 case AMDGPU::S_MUL_I32:
199 return AMDGPU::V_MUL_LO_U32_e64;
201 return AMDGPU::INSTRUCTION_LIST_END;
205 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
211 int64_t ImmVal)
const;
215 int64_t ImmVal)
const;
219 const FoldableDef &OpToFold)
const;
228 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
231 std::pair<int64_t, const TargetRegisterClass *>
249 bool foldInstOperand(
MachineInstr &
MI,
const FoldableDef &OpToFold)
const;
251 bool foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const;
258 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
267 SIFoldOperandsImpl() =
default;
281 return SIFoldOperandsImpl().run(MF);
301char SIFoldOperandsLegacy::
ID = 0;
310 TRI.getSubRegisterClass(RC, MO.getSubReg()))
318 case AMDGPU::V_MAC_F32_e64:
319 return AMDGPU::V_MAD_F32_e64;
320 case AMDGPU::V_MAC_F16_e64:
321 return AMDGPU::V_MAD_F16_e64;
322 case AMDGPU::V_FMAC_F32_e64:
323 return AMDGPU::V_FMA_F32_e64;
324 case AMDGPU::V_FMAC_F16_e64:
325 return AMDGPU::V_FMA_F16_gfx9_e64;
326 case AMDGPU::V_FMAC_F16_t16_e64:
327 return AMDGPU::V_FMA_F16_gfx9_t16_e64;
328 case AMDGPU::V_FMAC_F16_fake16_e64:
329 return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
330 case AMDGPU::V_FMAC_LEGACY_F32_e64:
331 return AMDGPU::V_FMA_LEGACY_F32_e64;
332 case AMDGPU::V_FMAC_F64_e64:
333 return AMDGPU::V_FMA_F64_e64;
335 return AMDGPU::INSTRUCTION_LIST_END;
341 const FoldableDef &OpToFold)
const {
342 if (!OpToFold.isFI())
345 const unsigned Opc =
UseMI.getOpcode();
347 case AMDGPU::S_ADD_I32:
348 case AMDGPU::S_ADD_U32:
349 case AMDGPU::V_ADD_U32_e32:
350 case AMDGPU::V_ADD_CO_U32_e32:
354 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
356 case AMDGPU::V_ADD_U32_e64:
357 case AMDGPU::V_ADD_CO_U32_e64:
358 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
365 return OpNo == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
369 int SIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
373 int VIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
374 return OpNo == VIdx && SIdx == -1;
380bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
382 if (
TRI->isVGPR(*
MRI, DstReg) &&
TRI->isSGPRReg(*
MRI, SrcReg) &&
383 MRI->hasOneNonDBGUse(SrcReg)) {
385 if (!Def ||
Def->getNumOperands() != 4)
400 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
401 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
402 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
403 !
Def->getOperand(3).isDead())
408 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
412 if (
Add->getDesc().getNumDefs() == 2) {
413 Register CarryOutReg =
MRI->createVirtualRegister(
TRI->getBoolRC());
415 MRI->setRegAllocationHint(CarryOutReg, 0,
TRI->getVCC());
418 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
422 Def->eraseFromParent();
423 MI.eraseFromParent();
427 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
438 Def->eraseFromParent();
439 MI.eraseFromParent();
448 return new SIFoldOperandsLegacy();
451bool SIFoldOperandsImpl::canUseImmWithOpSel(
const MachineInstr *
MI,
453 int64_t ImmVal)
const {
454 const uint64_t TSFlags =
MI->getDesc().TSFlags;
462 int OpNo =
MI->getOperandNo(&Old);
464 unsigned Opcode =
MI->getOpcode();
465 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
487bool SIFoldOperandsImpl::tryFoldImmWithOpSel(
MachineInstr *
MI,
unsigned UseOpNo,
488 int64_t ImmVal)
const {
490 unsigned Opcode =
MI->getOpcode();
491 int OpNo =
MI->getOperandNo(&Old);
492 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
504 AMDGPU::OpName ModName = AMDGPU::OpName::NUM_OPERAND_NAMES;
505 unsigned SrcIdx = ~0;
506 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
507 ModName = AMDGPU::OpName::src0_modifiers;
509 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
510 ModName = AMDGPU::OpName::src1_modifiers;
512 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
513 ModName = AMDGPU::OpName::src2_modifiers;
516 assert(ModName != AMDGPU::OpName::NUM_OPERAND_NAMES);
517 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModName);
519 unsigned ModVal =
Mod.getImm();
543 Mod.setImm(NewModVal);
548 if (
static_cast<int16_t
>(
Lo) < 0) {
549 int32_t SExt =
static_cast<int16_t
>(
Lo);
551 Mod.setImm(NewModVal);
577 if (tryFoldToInline(Imm))
586 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
587 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
588 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
590 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp);
591 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
598 if (tryFoldToInline(NegImm)) {
600 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
601 MI->setDesc(
TII->get(NegOpcode));
610bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
615 std::optional<int64_t> ImmVal;
617 ImmVal = Fold.Def.getEffectiveImmVal();
619 if (ImmVal && canUseImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal)) {
620 if (tryFoldImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal))
626 int OpNo =
MI->getOperandNo(&Old);
627 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
633 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
641 int Op32 = Fold.ShrinkOpcode;
646 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
649 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
653 if (HaveNonDbgCarryUse) {
666 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
667 MI->removeOperand(
I);
668 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
671 TII->commuteInstruction(*Inst32,
false);
675 assert(!Fold.needsShrink() &&
"not handled");
680 if (NewMFMAOpc == -1)
682 MI->setDesc(
TII->get(NewMFMAOpc));
683 MI->untieRegOperand(0);
688 int OpNo =
MI->getOperandNo(&Old);
689 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
696 if (Fold.isGlobal()) {
697 Old.
ChangeToGA(Fold.Def.OpToFold->getGlobal(),
698 Fold.Def.OpToFold->getOffset(),
699 Fold.Def.OpToFold->getTargetFlags());
712 TII->getRegClass(
MI->getDesc(), Fold.UseOpNo,
TRI, *MF)) {
715 unsigned NewSubReg =
New->getSubReg();
719 if (NewSubReg && OldSubReg) {
721 ConstrainRC =
TRI->getCommonSuperRegClass(OpRC, OldSubReg, NewRC,
722 NewSubReg, PreA, PreB);
723 }
else if (OldSubReg) {
724 ConstrainRC =
TRI->getMatchingSuperRegClass(OldRC, OpRC, OldSubReg);
725 }
else if (NewSubReg) {
726 ConstrainRC =
TRI->getMatchingSuperRegClass(NewRC, OpRC, NewSubReg);
732 if (!
MRI->constrainRegClass(
New->getReg(), ConstrainRC)) {
734 <<
TRI->getRegClassName(ConstrainRC) <<
'\n');
749 FoldCandidate &&Entry) {
751 for (FoldCandidate &Fold : FoldList)
752 if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
754 LLVM_DEBUG(
dbgs() <<
"Append " << (Entry.Commuted ?
"commuted" :
"normal")
755 <<
" operand " << Entry.UseOpNo <<
"\n " << *Entry.UseMI);
761 const FoldableDef &FoldOp,
762 bool Commuted =
false,
int ShrinkOp = -1) {
764 FoldCandidate(
MI, OpNo, FoldOp, Commuted, ShrinkOp));
767bool SIFoldOperandsImpl::tryAddToFoldList(
769 const FoldableDef &OpToFold)
const {
770 const unsigned Opc =
MI->getOpcode();
772 auto tryToFoldAsFMAAKorMK = [&]() {
773 if (!OpToFold.isImm())
776 const bool TryAK = OpNo == 3;
777 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
778 MI->setDesc(
TII->get(NewOpc));
781 bool FoldAsFMAAKorMK =
782 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
783 if (FoldAsFMAAKorMK) {
785 MI->untieRegOperand(3);
806 bool IsLegal = OpToFold.isOperandLegal(*
TII, *
MI, OpNo);
807 if (!IsLegal && OpToFold.isImm()) {
808 if (std::optional<int64_t> ImmVal = OpToFold.getEffectiveImmVal())
809 IsLegal = canUseImmWithOpSel(
MI, OpNo, *ImmVal);
815 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
818 MI->setDesc(
TII->get(NewOpc));
823 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
825 MI->untieRegOperand(OpNo);
829 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
835 if (
Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
836 if (tryToFoldAsFMAAKorMK())
841 if (OpToFold.isImm()) {
843 if (
Opc == AMDGPU::S_SETREG_B32)
844 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
845 else if (
Opc == AMDGPU::S_SETREG_B32_mode)
846 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
848 MI->setDesc(
TII->get(ImmOpc));
857 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
868 if (!
Op.isReg() || !CommutedOp.
isReg())
873 if (
Op.isReg() && CommutedOp.
isReg() &&
874 (
Op.getReg() == CommutedOp.
getReg() &&
878 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
882 if (!OpToFold.isOperandLegal(*
TII, *
MI, CommuteOpNo)) {
883 if ((
Opc != AMDGPU::V_ADD_CO_U32_e64 &&
Opc != AMDGPU::V_SUB_CO_U32_e64 &&
884 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
885 (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
886 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
893 if (!OtherOp.
isReg() ||
900 unsigned MaybeCommutedOpc =
MI->getOpcode();
914 if (
Opc == AMDGPU::S_FMAC_F32 &&
915 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
916 if (tryToFoldAsFMAAKorMK())
927 return !
TII->isSDWA(
MI);
935 SubDef &&
TII.isFoldableCopy(*SubDef);
936 SubDef =
MRI.getVRegDef(
Sub->getReg())) {
944 if (
SrcOp.getSubReg())
967 else if (!
TRI->getCommonSubClass(RC, OpRC))
970 if (
SrcOp.getSubReg()) {
972 Defs.emplace_back(&
SrcOp, SubRegIdx);
977 if (DefSrc && (DefSrc->
isReg() || DefSrc->
isImm())) {
978 Defs.emplace_back(DefSrc, SubRegIdx);
982 Defs.emplace_back(&
SrcOp, SubRegIdx);
995 if (!Def || !
Def->isRegSequence())
998 return getRegSeqInit(*Def, Defs);
1001std::pair<int64_t, const TargetRegisterClass *>
1002SIFoldOperandsImpl::isRegSeqSplat(
MachineInstr &RegSeq)
const {
1008 bool TryToMatchSplat64 =
false;
1011 for (
unsigned I = 0, E = Defs.
size();
I != E; ++
I) {
1016 int64_t SubImm =
Op->getImm();
1022 if (Imm != SubImm) {
1023 if (
I == 1 && (E & 1) == 0) {
1026 TryToMatchSplat64 =
true;
1034 if (!TryToMatchSplat64)
1035 return {Defs[0].first->getImm(), SrcRC};
1040 for (
unsigned I = 0, E = Defs.
size();
I != E;
I += 2) {
1047 unsigned SubReg0 = Defs[
I].second;
1048 unsigned SubReg1 = Defs[
I + 1].second;
1052 if (
TRI->getChannelFromSubReg(SubReg0) + 1 !=
1053 TRI->getChannelFromSubReg(SubReg1))
1058 SplatVal64 = MergedVal;
1059 else if (SplatVal64 != MergedVal)
1066 return {SplatVal64, RC64};
1069bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
1073 if (UseOpIdx >=
Desc.getNumOperands())
1080 int16_t RCID =
Desc.operands()[UseOpIdx].RegClass;
1089 if (SplatVal != 0 && SplatVal != -1) {
1093 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
1099 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
1104 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
1110 if (!
TRI->getCommonSubClass(OpRC, SplatRC))
1115 if (!
TII->isOperandLegal(*
UseMI, UseOpIdx, &TmpOp))
1121bool SIFoldOperandsImpl::tryToFoldACImm(
1125 if (UseOpIdx >=
Desc.getNumOperands())
1133 if (OpToFold.isImm() && OpToFold.isOperandLegal(*
TII, *
UseMI, UseOpIdx)) {
1143 if (!OpToFold.isReg())
1156 if (Def &&
TII->isFoldableCopy(*Def)) {
1158 if (DefOp.
isImm() &&
TII->isOperandLegal(*
UseMI, UseOpIdx, &DefOp)) {
1159 FoldableDef FoldableImm(DefOp.
getImm(), OpToFold.DefRC,
1160 OpToFold.DefSubReg);
1169void SIFoldOperandsImpl::foldOperand(
1175 if (!isUseSafeToFold(*
UseMI, *UseOp))
1179 if (UseOp->
isReg() && OpToFold.isReg()) {
1183 if (UseOp->
getSubReg() != AMDGPU::NoSubRegister &&
1185 !
TRI->isSGPRReg(*
MRI, OpToFold.getReg())))
1198 std::tie(SplatVal, SplatRC) = isRegSeqSplat(*
UseMI);
1203 for (
unsigned I = 0;
I != UsesToProcess.size(); ++
I) {
1215 if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
1216 FoldableDef SplatDef(SplatVal, SplatRC);
1223 if (RSUse->
getSubReg() != RegSeqDstSubReg)
1228 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
1235 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
1238 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
1243 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
1244 MFI->getScratchRSrcReg())
1250 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
1261 TII->getNamedOperand(*
UseMI, AMDGPU::OpName::cpol)->getImm();
1276 bool FoldingImmLike =
1277 OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1296 for (
unsigned MovOp :
1297 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1298 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1299 AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
1300 AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
1313 const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
1315 TRI->getRegClass(MovDesc.
operands()[SrcIdx].RegClass);
1318 MovSrcRC =
TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
1319 if (!
MRI->constrainRegClass(SrcReg, MovSrcRC))
1329 if (!OpToFold.isImm() ||
1330 !
TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
1336 while (ImpOpI != ImpOpE) {
1343 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1366 OpToFold.DefMI->implicit_operands().empty()) {
1367 LLVM_DEBUG(
dbgs() <<
"Folding " << OpToFold.OpToFold <<
"\n into "
1372 unsigned SubRegIdx = OpToFold.getSubReg();
1386 static_assert(AMDGPU::sub1_hi16 == 12,
"Subregister layout has changed");
1391 if (SubRegIdx > AMDGPU::sub1) {
1393 M |=
M.getLane(
M.getHighestLane() - 1);
1397 assert(Indexes.
size() == 1 &&
"Expected one 32-bit subreg to cover");
1398 SubRegIdx = Indexes[0];
1400 }
else if (
TII->getOpSize(*
UseMI, 1) == 4)
1403 SubRegIdx = AMDGPU::sub0;
1408 OpToFold.OpToFold->setIsKill(
false);
1412 if (foldCopyToAGPRRegSequence(
UseMI))
1417 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1418 (UseOpc == AMDGPU::V_READLANE_B32 &&
1420 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
1425 if (FoldingImmLike) {
1428 *OpToFold.DefMI, *
UseMI))
1433 if (OpToFold.isImm()) {
1435 *OpToFold.getEffectiveImmVal());
1436 }
else if (OpToFold.isFI())
1439 assert(OpToFold.isGlobal());
1441 OpToFold.OpToFold->getOffset(),
1442 OpToFold.OpToFold->getTargetFlags());
1448 if (OpToFold.isReg() &&
TRI->isSGPRReg(*
MRI, OpToFold.getReg())) {
1451 *OpToFold.DefMI, *
UseMI))
1472 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1480 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, OpToFold);
1486 case AMDGPU::V_AND_B32_e64:
1487 case AMDGPU::V_AND_B32_e32:
1488 case AMDGPU::S_AND_B32:
1491 case AMDGPU::V_OR_B32_e64:
1492 case AMDGPU::V_OR_B32_e32:
1493 case AMDGPU::S_OR_B32:
1496 case AMDGPU::V_XOR_B32_e64:
1497 case AMDGPU::V_XOR_B32_e32:
1498 case AMDGPU::S_XOR_B32:
1501 case AMDGPU::S_XNOR_B32:
1504 case AMDGPU::S_NAND_B32:
1507 case AMDGPU::S_NOR_B32:
1510 case AMDGPU::S_ANDN2_B32:
1511 Result =
LHS & ~RHS;
1513 case AMDGPU::S_ORN2_B32:
1514 Result =
LHS | ~RHS;
1516 case AMDGPU::V_LSHL_B32_e64:
1517 case AMDGPU::V_LSHL_B32_e32:
1518 case AMDGPU::S_LSHL_B32:
1520 Result =
LHS << (
RHS & 31);
1522 case AMDGPU::V_LSHLREV_B32_e64:
1523 case AMDGPU::V_LSHLREV_B32_e32:
1524 Result =
RHS << (
LHS & 31);
1526 case AMDGPU::V_LSHR_B32_e64:
1527 case AMDGPU::V_LSHR_B32_e32:
1528 case AMDGPU::S_LSHR_B32:
1529 Result =
LHS >> (
RHS & 31);
1531 case AMDGPU::V_LSHRREV_B32_e64:
1532 case AMDGPU::V_LSHRREV_B32_e32:
1533 Result =
RHS >> (
LHS & 31);
1535 case AMDGPU::V_ASHR_I32_e64:
1536 case AMDGPU::V_ASHR_I32_e32:
1537 case AMDGPU::S_ASHR_I32:
1538 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1540 case AMDGPU::V_ASHRREV_I32_e64:
1541 case AMDGPU::V_ASHRREV_I32_e32:
1542 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1550 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1554 MI.setDesc(NewDesc);
1560 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
1561 Desc.implicit_defs().size();
1563 for (
unsigned I =
MI.getNumOperands() - 1;
I >= NumOps; --
I)
1564 MI.removeOperand(
I);
1567std::optional<int64_t>
1572 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1573 return std::nullopt;
1576 if (Def &&
Def->isMoveImmediate()) {
1579 return TII->extractSubregFromImm(ImmSrc.
getImm(),
Op.getSubReg());
1582 return std::nullopt;
1588bool SIFoldOperandsImpl::tryConstantFoldOp(
MachineInstr *
MI)
const {
1589 if (!
MI->allImplicitDefsAreDead())
1592 unsigned Opc =
MI->getOpcode();
1594 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
1599 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1601 if ((
Opc == AMDGPU::V_NOT_B32_e64 ||
Opc == AMDGPU::V_NOT_B32_e32 ||
1602 Opc == AMDGPU::S_NOT_B32) &&
1604 MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
1609 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
1614 std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1616 if (!Src0Imm && !Src1Imm)
1622 if (Src0Imm && Src1Imm) {
1627 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1631 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1632 MI->removeOperand(Src1Idx);
1637 if (!
MI->isCommutable())
1640 if (Src0Imm && !Src1Imm) {
1646 int32_t Src1Val =
static_cast<int32_t
>(*Src1Imm);
1647 if (
Opc == AMDGPU::V_OR_B32_e64 ||
1648 Opc == AMDGPU::V_OR_B32_e32 ||
1649 Opc == AMDGPU::S_OR_B32) {
1652 MI->removeOperand(Src1Idx);
1654 }
else if (Src1Val == -1) {
1656 MI->removeOperand(Src1Idx);
1664 if (
Opc == AMDGPU::V_AND_B32_e64 ||
Opc == AMDGPU::V_AND_B32_e32 ||
1665 Opc == AMDGPU::S_AND_B32) {
1668 MI->removeOperand(Src0Idx);
1670 }
else if (Src1Val == -1) {
1672 MI->removeOperand(Src1Idx);
1680 if (
Opc == AMDGPU::V_XOR_B32_e64 ||
Opc == AMDGPU::V_XOR_B32_e32 ||
1681 Opc == AMDGPU::S_XOR_B32) {
1684 MI->removeOperand(Src1Idx);
1695 unsigned Opc =
MI.getOpcode();
1696 if (
Opc != AMDGPU::V_CNDMASK_B32_e32 &&
Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1697 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1703 std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1707 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1708 if (!Src0Imm || *Src0Imm != *Src1Imm)
1713 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1_modifiers);
1715 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
1716 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1717 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1723 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
1725 MI.removeOperand(Src2Idx);
1726 MI.removeOperand(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1));
1727 if (Src1ModIdx != -1)
1728 MI.removeOperand(Src1ModIdx);
1729 if (Src0ModIdx != -1)
1730 MI.removeOperand(Src0ModIdx);
1736bool SIFoldOperandsImpl::tryFoldZeroHighBits(
MachineInstr &
MI)
const {
1737 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1738 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1741 std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(
MI.getOperand(1));
1742 if (!Src0Imm || *Src0Imm != 0xffff || !
MI.getOperand(2).isReg())
1747 if (!
ST->zeroesHigh16BitsOfDest(SrcDef->
getOpcode()))
1751 MRI->replaceRegWith(Dst, Src1);
1752 if (!
MI.getOperand(2).isKill())
1753 MRI->clearKillFlags(Src1);
1754 MI.eraseFromParent();
1759 const FoldableDef &OpToFold)
const {
1766 bool Changed =
false;
1768 if (OpToFold.isImm()) {
1779 if (tryConstantFoldOp(&
UseMI)) {
1788 for (
auto *U : UsesToProcess) {
1791 FoldableDef SubOpToFold = OpToFold.getWithSubReg(*
TRI,
U->getSubReg());
1796 if (CopiesToReplace.
empty() && FoldList.
empty())
1802 Copy->addImplicitDefUseOperands(*MF);
1805 for (FoldCandidate &Fold : FoldList) {
1806 assert(!Fold.isReg() || Fold.Def.OpToFold);
1807 if (Fold.isReg() && Fold.getReg().isVirtual()) {
1817 assert(Fold.Def.OpToFold && Fold.isReg());
1821 MRI->clearKillFlags(Fold.getReg());
1824 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1828 ConstantFoldCandidates.
insert(Fold.UseMI);
1830 }
else if (Fold.Commuted) {
1832 TII->commuteInstruction(*Fold.UseMI,
false);
1837 if (tryConstantFoldOp(
MI)) {
1847bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const {
1854 if (!
TRI->isAGPRClass(DefRC))
1875 unsigned NumFoldable = 0;
1877 for (
unsigned I = 1;
I != NumRegSeqOperands;
I += 2) {
1894 DefRC, &AMDGPU::AGPR_32RegClass, SubRegIdx);
1914 TRI->getMatchingSuperRegClass(DefRC, InputRC, SubRegIdx);
1925 if (NumFoldable == 0)
1928 CopyMI->
setDesc(
TII->get(AMDGPU::REG_SEQUENCE));
1932 for (
auto [Def, DestSubIdx] : NewDefs) {
1933 if (!
Def->isReg()) {
1936 Register Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1937 BuildMI(
MBB, CopyMI,
DL,
TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1942 Def->setIsKill(
false);
1944 Register &VGPRCopy = VGPRCopies[Src];
1947 TRI->getSubRegisterClass(UseRC, DestSubIdx);
1957 TRI->getSubRegisterClass(
MRI->getRegClass(Src.Reg), Src.SubReg);
1960 VGPRCopy =
MRI->createVirtualRegister(VGPRUseSubRC);
1972 B.addImm(DestSubIdx);
1979bool SIFoldOperandsImpl::tryFoldFoldableCopy(
1984 if (DstReg == AMDGPU::M0) {
1986 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1987 MI.eraseFromParent();
1999 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
2001 if (
TII->hasAnyModifiersSet(
MI))
2003 OpToFoldPtr = &
MI.getOperand(2);
2005 OpToFoldPtr = &
MI.getOperand(1);
2010 if (!FoldingImm && !OpToFold.
isReg())
2026 MRI->getRegClass(
MI.getOperand(0).getReg());
2042 if (
MI.getOpcode() == AMDGPU::COPY && OpToFold.
isReg() &&
2044 if (DstRC == &AMDGPU::SReg_32RegClass &&
2045 DstRC ==
MRI->getRegClass(OpToFold.
getReg())) {
2053 if (OpToFold.
isReg() &&
MI.isCopy() && !
MI.getOperand(1).getSubReg()) {
2054 if (foldCopyToAGPRRegSequence(&
MI))
2058 FoldableDef
Def(OpToFold, DstRC);
2059 bool Changed = foldInstOperand(
MI, Def);
2066 auto *InstToErase = &
MI;
2067 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2068 auto &
SrcOp = InstToErase->getOperand(1);
2070 InstToErase->eraseFromParent();
2072 InstToErase =
nullptr;
2075 InstToErase =
MRI->getVRegDef(SrcReg);
2076 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
2080 if (InstToErase && InstToErase->isRegSequence() &&
2081 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2082 InstToErase->eraseFromParent();
2092 return OpToFold.
isReg() &&
2093 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI);
2100 unsigned Op =
MI.getOpcode();
2102 case AMDGPU::V_MAX_F32_e64:
2103 case AMDGPU::V_MAX_F16_e64:
2104 case AMDGPU::V_MAX_F16_t16_e64:
2105 case AMDGPU::V_MAX_F16_fake16_e64:
2106 case AMDGPU::V_MAX_F64_e64:
2107 case AMDGPU::V_MAX_NUM_F64_e64:
2108 case AMDGPU::V_PK_MAX_F16:
2109 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2110 case AMDGPU::V_PK_MAX_NUM_BF16: {
2111 if (
MI.mayRaiseFPException())
2114 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
2123 Src0->
getSubReg() != AMDGPU::NoSubRegister)
2127 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2131 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
2133 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
2137 unsigned UnsetMods =
2138 (
Op == AMDGPU::V_PK_MAX_F16 ||
Op == AMDGPU::V_PK_MAX_NUM_BF16)
2141 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
2153 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
2165 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
2168 if (
Def->mayRaiseFPException())
2175 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
2181 Register MIDstReg =
MI.getOperand(0).getReg();
2182 if (
TRI->isSGPRReg(*
MRI, DefReg)) {
2189 MRI->replaceRegWith(MIDstReg, DefReg);
2191 MI.eraseFromParent();
2196 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2197 Def->eraseFromParent();
2204 case AMDGPU::V_MUL_F64_e64:
2205 case AMDGPU::V_MUL_F64_pseudo_e64: {
2207 case 0x3fe0000000000000:
2209 case 0x4000000000000000:
2211 case 0x4010000000000000:
2217 case AMDGPU::V_MUL_F32_e64: {
2218 switch (
static_cast<uint32_t>(Val)) {
2229 case AMDGPU::V_MUL_F16_e64:
2230 case AMDGPU::V_MUL_F16_t16_e64:
2231 case AMDGPU::V_MUL_F16_fake16_e64: {
2232 switch (
static_cast<uint16_t>(Val)) {
2251std::pair<const MachineOperand *, int>
2253 unsigned Op =
MI.getOpcode();
2255 case AMDGPU::V_MUL_F64_e64:
2256 case AMDGPU::V_MUL_F64_pseudo_e64:
2257 case AMDGPU::V_MUL_F32_e64:
2258 case AMDGPU::V_MUL_F16_t16_e64:
2259 case AMDGPU::V_MUL_F16_fake16_e64:
2260 case AMDGPU::V_MUL_F16_e64: {
2262 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
2264 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
2265 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
2266 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
2267 MFI->getMode().FP64FP16Denormals.Output !=
2269 MI.mayRaiseFPException())
2276 if (Src0->
isImm()) {
2279 }
else if (Src1->
isImm()) {
2287 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
2288 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
2289 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
2290 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
2293 return std::pair(RegOp, OMod);
2295 case AMDGPU::V_ADD_F64_e64:
2296 case AMDGPU::V_ADD_F64_pseudo_e64:
2297 case AMDGPU::V_ADD_F32_e64:
2298 case AMDGPU::V_ADD_F16_e64:
2299 case AMDGPU::V_ADD_F16_t16_e64:
2300 case AMDGPU::V_ADD_F16_fake16_e64: {
2302 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
2304 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
2305 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
2306 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
2316 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
2317 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
2318 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
2319 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2333 std::tie(RegOp, OMod) = isOMod(
MI);
2335 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
2336 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
2344 if (
Def->mayRaiseFPException())
2349 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
2355 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
2358 MRI->clearKillFlags(
Def->getOperand(0).getReg());
2359 MI.eraseFromParent();
2364 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2365 Def->eraseFromParent();
2374 auto Reg =
MI.getOperand(0).getReg();
2376 if (!
ST->hasGFX90AInsts() || !
TRI->isVGPR(*
MRI, Reg) ||
2377 !
MRI->hasOneNonDBGUse(Reg))
2381 if (!getRegSeqInit(Defs, Reg))
2384 for (
auto &[
Op, SubIdx] : Defs) {
2387 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
2401 if (!
TRI->isVGPR(*
MRI, Reg) || !
MRI->hasOneNonDBGUse(Reg))
2403 Op = &*
MRI->use_nodbg_begin(Reg);
2407 if (
Op->getSubReg())
2414 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
2417 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg));
2418 auto Dst =
MRI->createVirtualRegister(NewDstRC);
2420 TII->get(AMDGPU::REG_SEQUENCE), Dst);
2422 for (
auto &[Def, SubIdx] : Defs) {
2423 Def->setIsKill(
false);
2437 RS->eraseFromParent();
2445 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
2446 MI.eraseFromParent();
2454 Register &OutReg,
unsigned &OutSubReg) {
2464 if (
TRI.isAGPR(
MRI, CopySrcReg)) {
2465 OutReg = CopySrcReg;
2474 if (!CopySrcDef || !CopySrcDef->
isCopy())
2481 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2482 !
TRI.isAGPR(
MRI, OtherCopySrcReg))
2485 OutReg = OtherCopySrcReg;
2523 if (!
TRI->isVGPR(*
MRI, PhiOut))
2529 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2532 if (!Copy || !
Copy->isCopy())
2536 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2541 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2552 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2556 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2564 unsigned CopyOpc = AMDGPU::COPY;
2569 if (
Def->isCopy()) {
2571 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2585 if (IsAGPR32 && !
ST->hasGFX90AInsts() && !
MRI->hasOneNonDBGUse(Reg) &&
2587 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2590 InsertMBB =
Def->getParent();
2597 Register NewReg =
MRI->createVirtualRegister(ARC);
2599 TII->get(CopyOpc), NewReg)
2608 Register NewReg =
MRI->createVirtualRegister(ARC);
2609 PHI.getOperand(0).setReg(NewReg);
2615 TII->get(AMDGPU::COPY), PhiOut)
2625 if (!
ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
2645 while (!
Users.empty()) {
2647 if (!
I->isCopy() && !
I->isRegSequence())
2649 Register DstReg =
I->getOperand(0).getReg();
2653 if (
TRI->isAGPR(*
MRI, DstReg))
2657 Users.push_back(&U);
2661 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
2662 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2663 MRI->setRegClass(DefReg, RC);
2667 while (!MoveRegs.
empty()) {
2669 MRI->setRegClass(Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg)));
2712 if (
ST->hasGFX90AInsts())
2719 for (
auto &
MI :
MBB) {
2723 if (!
TRI->isAGPR(*
MRI,
MI.getOperand(0).getReg()))
2726 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2736 bool Changed =
false;
2737 for (
const auto &[Entry, MOs] : RegToMO) {
2738 if (MOs.size() == 1)
2749 MRI->createVirtualRegister(
TRI->getEquivalentVGPRClass(ARC));
2752 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2756 Register TempAGPR =
MRI->createVirtualRegister(ARC);
2758 TII->get(AMDGPU::COPY), TempAGPR)
2778 TII =
ST->getInstrInfo();
2779 TRI = &
TII->getRegisterInfo();
2787 bool HasNSZ = MFI->hasNoSignedZerosFPMath();
2789 bool Changed =
false;
2793 Changed |= tryFoldCndMask(
MI);
2795 if (tryFoldZeroHighBits(
MI)) {
2800 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2805 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2810 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2815 if (
TII->isFoldableCopy(
MI)) {
2816 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2821 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2822 CurrentKnownM0Val =
nullptr;
2828 Changed |= tryFoldClamp(
MI);
2831 Changed |= tryOptimizeAGPRPhis(*
MBB);
2841 bool Changed = SIFoldOperandsImpl().run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, FoldCandidate &&Entry)
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static unsigned getMovOpc(bool IsScalar)
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
static MachineOperand * lookUpCopyChain(const SIInstrInfo &TII, const MachineRegisterInfo &MRI, Register SrcReg)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
A vector that has set insertion semantics.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ Sub
Subtraction of integers.
char & SIFoldOperandsLegacyID
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
A pair composed of a register and a sub-register index.