33#define DEBUG_TYPE "si-peephole-sdwa"
35STATISTIC(NumSDWAPatternsFound,
"Number of SDWA patterns found.");
37 "Number of instruction converted to SDWA.");
56 SDWAOperandsMap PotentialMatches;
67 bool convertToSDWA(
MachineInstr &
MI,
const SDWAOperandsVector &SDWAOperands);
90using namespace AMDGPU::SDWA;
104 :
Target(TargetOp), Replaced(ReplacedOp) {
109 virtual ~SDWAOperand() =
default;
113 SDWAOperandsMap *PotentialMatches =
nullptr) = 0;
121 return &getParentInst()->getParent()->getParent()->getRegInfo();
124#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
130class SDWASrcOperand :
public SDWAOperand {
139 SdwaSel SrcSel_ =
DWORD,
bool Abs_ =
false,
bool Neg_ =
false,
141 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
142 Neg(Neg_), Sext(Sext_) {}
146 SDWAOperandsMap *PotentialMatches =
nullptr)
override;
151 SdwaSel getSrcSel()
const {
return SrcSel; }
152 bool getAbs()
const {
return Abs; }
153 bool getNeg()
const {
return Neg; }
154 bool getSext()
const {
return Sext; }
159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
164class SDWADstOperand :
public SDWAOperand {
172 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
176 SDWAOperandsMap *PotentialMatches =
nullptr)
override;
181 SdwaSel getDstSel()
const {
return DstSel; }
182 DstUnused getDstUnused()
const {
return DstUn; }
184#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
189class SDWADstPreserveOperand :
public SDWADstOperand {
197 Preserve(PreserveOp) {}
205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
215char SIPeepholeSDWALegacy::
ID = 0;
220 return new SIPeepholeSDWALegacy();
223#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
232 case DWORD:
OS <<
"DWORD";
break;
248 OS <<
"SDWA src: " << *getTargetOperand()
249 <<
" src_sel:" << getSrcSel()
250 <<
" abs:" << getAbs() <<
" neg:" << getNeg()
251 <<
" sext:" << getSext() <<
'\n';
256 OS <<
"SDWA dst: " << *getTargetOperand()
257 <<
" dst_sel:" << getDstSel()
258 <<
" dst_unused:" << getDstUnused() <<
'\n';
263 OS <<
"SDWA preserve dst: " << *getTargetOperand()
264 <<
" dst_sel:" << getDstSel()
265 <<
" preserve:" << *getPreservedOperand() <<
'\n';
283 return LHS.isReg() &&
285 LHS.getReg() ==
RHS.getReg() &&
286 LHS.getSubReg() ==
RHS.getSubReg();
291 if (!Reg->isReg() || !Reg->isDef())
320 for (
auto &DefMO : DefInstr->
defs()) {
321 if (DefMO.isReg() && DefMO.getReg() == Reg->getReg())
336 if (Sel == SdwaSel::DWORD)
339 if (Sel == OperandSel || OperandSel == SdwaSel::DWORD)
342 if (Sel == SdwaSel::WORD_1 || Sel == SdwaSel::BYTE_2 ||
343 Sel == SdwaSel::BYTE_3)
346 if (OperandSel == SdwaSel::WORD_0)
349 if (OperandSel == SdwaSel::WORD_1) {
350 if (Sel == SdwaSel::BYTE_0)
351 return SdwaSel::BYTE_2;
352 if (Sel == SdwaSel::BYTE_1)
353 return SdwaSel::BYTE_3;
354 if (Sel == SdwaSel::WORD_0)
355 return SdwaSel::WORD_1;
364 const auto *
MI =
SrcOp->getParent();
365 if (
TII->getNamedOperand(*
MI, AMDGPU::OpName::src0) ==
SrcOp) {
366 if (
auto *
Mod =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src0_modifiers)) {
367 Mods =
Mod->getImm();
369 }
else if (
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1) ==
SrcOp) {
370 if (
auto *
Mod =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1_modifiers)) {
371 Mods =
Mod->getImm();
376 "Float and integer src modifiers can't be set simultaneously");
388 SDWAOperandsMap *PotentialMatches) {
389 if (PotentialMatches !=
nullptr) {
392 if (!
Reg->isReg() || !
Reg->isDef())
397 if (!isConvertibleToSDWA(
UseMI, ST,
TII) ||
407 SDWAOperandsMap &potentialMatchesMap = *PotentialMatches;
409 potentialMatchesMap[
UseMI].push_back(
this);
422 return canCombineSelections(*Parent,
TII) ? Parent :
nullptr;
426 switch (
MI.getOpcode()) {
427 case AMDGPU::V_CVT_F32_FP8_sdwa:
428 case AMDGPU::V_CVT_F32_BF8_sdwa:
429 case AMDGPU::V_CVT_PK_F32_FP8_sdwa:
430 case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
433 case AMDGPU::V_CNDMASK_B32_sdwa:
452 bool IsPreserveSrc =
false;
456 TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers);
457 assert(Src && (Src->isReg() || Src->isImm()));
458 if (!
isSameReg(*Src, *getReplacedOperand())) {
460 Src =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
461 SrcSel =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_sel);
462 SrcMods =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers);
465 !
isSameReg(*Src, *getReplacedOperand())) {
474 TII->getNamedOperand(
MI, AMDGPU::OpName::dst_unused);
477 DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
483 TII->getNamedImmOperand(
MI, AMDGPU::OpName::dst_sel));
484 if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 &&
485 getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) {
486 IsPreserveSrc =
true;
487 auto DstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
488 AMDGPU::OpName::vdst);
489 auto TiedIdx =
MI.findTiedOperandIdx(DstIdx);
490 Src = &
MI.getOperand(TiedIdx);
499 assert(Src && Src->isReg());
501 if ((
MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
502 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
503 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
504 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
505 !
isSameReg(*Src, *getReplacedOperand())) {
512 (IsPreserveSrc || (SrcSel && SrcMods)));
515 if (!IsPreserveSrc) {
520 getTargetOperand()->setIsKill(
false);
527 AMDGPU::OpName SrcSelOpName,
SdwaSel OpSel) {
540 AMDGPU::OpName SrcOpName,
554 if (!
TII->isSDWA(
MI.getOpcode()))
557 using namespace AMDGPU;
560 getReplacedOperand(), getSrcSel()) &&
562 getReplacedOperand(), getSrcSel());
567 SDWAOperandsMap *PotentialMatches) {
579 if (&UseInst != ParentMI)
584 return canCombineSelections(*Parent,
TII) ? Parent :
nullptr;
590 if ((
MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
591 MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
592 MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
593 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
602 isSameReg(*Operand, *getReplacedOperand()));
616 getParentInst()->eraseFromParent();
622 if (!
TII->isSDWA(
MI.getOpcode()))
636 getMRI()->clearKillFlags(MO.getReg());
640 MI.getParent()->remove(&
MI);
641 getParentInst()->getParent()->insert(getParentInst(), &
MI);
645 MIB.addReg(getPreservedOperand()->
getReg(),
647 getPreservedOperand()->getSubReg());
650 MI.tieOperands(AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst),
651 MI.getNumOperands() - 1);
654 return SDWADstOperand::convertToSDWA(
MI,
TII);
657bool SDWADstPreserveOperand::canCombineSelections(
const MachineInstr &
MI,
659 return SDWADstOperand::canCombineSelections(
MI,
TII);
662std::optional<int64_t>
676 if (!
TII->isFoldableCopy(*DefInst))
690std::unique_ptr<SDWAOperand>
692 unsigned Opcode =
MI.getOpcode();
694 case AMDGPU::V_LSHRREV_B32_e32:
695 case AMDGPU::V_ASHRREV_I32_e32:
696 case AMDGPU::V_LSHLREV_B32_e32:
697 case AMDGPU::V_LSHRREV_B32_e64:
698 case AMDGPU::V_ASHRREV_I32_e64:
699 case AMDGPU::V_LSHLREV_B32_e64: {
709 auto Imm = foldToImm(*Src0);
713 if (*Imm != 16 && *Imm != 24)
719 Dst->getReg().isPhysical())
722 if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
723 Opcode == AMDGPU::V_LSHLREV_B32_e64) {
724 return std::make_unique<SDWADstOperand>(
727 return std::make_unique<SDWASrcOperand>(
729 Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
730 Opcode != AMDGPU::V_LSHRREV_B32_e64);
734 case AMDGPU::V_LSHRREV_B16_e32:
735 case AMDGPU::V_ASHRREV_I16_e32:
736 case AMDGPU::V_LSHLREV_B16_e32:
737 case AMDGPU::V_LSHRREV_B16_e64:
738 case AMDGPU::V_LSHRREV_B16_opsel_e64:
739 case AMDGPU::V_ASHRREV_I16_e64:
740 case AMDGPU::V_LSHLREV_B16_opsel_e64:
741 case AMDGPU::V_LSHLREV_B16_e64: {
751 auto Imm = foldToImm(*Src0);
752 if (!Imm || *Imm != 8)
759 Dst->getReg().isPhysical())
762 if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
763 Opcode == AMDGPU::V_LSHLREV_B16_opsel_e64 ||
764 Opcode == AMDGPU::V_LSHLREV_B16_e64)
766 return std::make_unique<SDWASrcOperand>(
767 Src1, Dst,
BYTE_1,
false,
false,
768 Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
769 Opcode != AMDGPU::V_LSHRREV_B16_opsel_e64 &&
770 Opcode != AMDGPU::V_LSHRREV_B16_e64);
774 case AMDGPU::V_BFE_I32_e64:
775 case AMDGPU::V_BFE_U32_e64: {
791 auto Offset = foldToImm(*Src1);
796 auto Width = foldToImm(*Src2);
802 if (*
Offset == 0 && *Width == 8)
804 else if (*
Offset == 0 && *Width == 16)
806 else if (*
Offset == 0 && *Width == 32)
808 else if (*
Offset == 8 && *Width == 8)
810 else if (*
Offset == 16 && *Width == 8)
812 else if (*
Offset == 16 && *Width == 16)
814 else if (*
Offset == 24 && *Width == 8)
823 Dst->getReg().isPhysical())
826 return std::make_unique<SDWASrcOperand>(
827 Src0, Dst, SrcSel,
false,
false, Opcode != AMDGPU::V_BFE_U32_e64);
830 case AMDGPU::V_AND_B32_e32:
831 case AMDGPU::V_AND_B32_e64: {
839 auto Imm = foldToImm(*Src0);
842 Imm = foldToImm(*Src1);
846 if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
851 if (!ValSrc->isReg() || ValSrc->getReg().isPhysical() ||
852 Dst->getReg().isPhysical())
855 return std::make_unique<SDWASrcOperand>(
859 case AMDGPU::V_OR_B32_e32:
860 case AMDGPU::V_OR_B32_e64: {
871 std::optional<std::pair<MachineOperand *, MachineOperand *>>;
872 auto CheckOROperandsForSDWA =
874 if (!Op1 || !Op1->
isReg() || !Op2 || !Op2->isReg())
875 return CheckRetType(std::nullopt);
879 return CheckRetType(std::nullopt);
882 if (!
TII->isSDWA(*Op1Inst))
883 return CheckRetType(std::nullopt);
887 return CheckRetType(std::nullopt);
889 return CheckRetType(std::pair(Op1Def, Op2Def));
894 assert(OrSDWA && OrOther);
895 auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
897 OrSDWA =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
898 OrOther =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
899 assert(OrSDWA && OrOther);
900 Res = CheckOROperandsForSDWA(OrSDWA, OrOther);
907 assert(OrSDWADef && OrOtherDef);
932 if (!
TII->isSDWA(*OtherInst))
936 TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));
938 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
940 bool DstSelAgree =
false;
943 (OtherDstSel ==
BYTE_3) ||
947 (OtherDstSel ==
BYTE_1) ||
951 (OtherDstSel ==
BYTE_2) ||
952 (OtherDstSel ==
BYTE_3) ||
956 (OtherDstSel ==
BYTE_2) ||
957 (OtherDstSel ==
BYTE_3) ||
961 (OtherDstSel ==
BYTE_1) ||
962 (OtherDstSel ==
BYTE_3) ||
966 (OtherDstSel ==
BYTE_1) ||
967 (OtherDstSel ==
BYTE_2) ||
970 default: DstSelAgree =
false;
978 TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused));
979 if (OtherDstUnused != DstUnused::UNUSED_PAD)
986 return std::make_unique<SDWADstPreserveOperand>(
987 OrDst, OrSDWADef, OrOtherDef, DstSel);
992 return std::unique_ptr<SDWAOperand>(
nullptr);
1004 if (
auto Operand = matchSDWAOperand(
MI)) {
1006 SDWAOperands[&
MI] = std::move(Operand);
1007 ++NumSDWAPatternsFound;
1032 int Opc =
MI.getOpcode();
1033 assert((
Opc == AMDGPU::V_ADD_CO_U32_e64 ||
Opc == AMDGPU::V_SUB_CO_U32_e64) &&
1034 "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
1065 for (
auto I = std::next(
MI.getIterator()), E = MISucc.
getIterator();
1067 if (
I->modifiesRegister(AMDGPU::VCC,
TRI))
1073 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::vdst))
1074 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::src0))
1075 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::src1))
1078 MI.eraseFromParent();
1092 assert(
MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64);
1101 *
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
1115 LLVM_DEBUG(
dbgs() <<
"VCC not known to be dead before instruction\n");
1123 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::vdst))
1124 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::src0))
1125 .
add(*
TII->getNamedOperand(
MI, AMDGPU::OpName::src1))
1127 TII->fixImplicitOperands(*Converted);
1130 MI.eraseFromParent();
1138 unsigned Opc =
MI.getOpcode();
1144 if (
Opc == AMDGPU::V_CNDMASK_B32_e64)
1154 if (!
ST.hasSDWAOmod() &&
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1158 if (!
ST.hasSDWASdst()) {
1160 if (SDst && (SDst->
getReg() != AMDGPU::VCC &&
1161 SDst->
getReg() != AMDGPU::VCC_LO))
1165 if (!
ST.hasSDWAOutModsVOPC() &&
1166 (
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) ||
1167 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod)))
1170 }
else if (
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst) ||
1171 !
TII->getNamedOperand(
MI, AMDGPU::OpName::vdst)) {
1175 if (!
ST.hasSDWAMac() && (
Opc == AMDGPU::V_FMAC_F16_e32 ||
1176 Opc == AMDGPU::V_FMAC_F32_e32 ||
1177 Opc == AMDGPU::V_MAC_F16_e32 ||
1178 Opc == AMDGPU::V_MAC_F32_e32))
1182 if (
TII->pseudoToMCOpcode(
Opc) == -1)
1200 unsigned Opcode =
MI.getOpcode();
1204 if (SDWAOpcode == -1)
1206 assert(SDWAOpcode != -1);
1220 }
else if ((Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst))) {
1233 if (
auto *
Mod =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers))
1237 SDWAInst.
add(*Src0);
1244 if (
auto *
Mod =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers))
1248 SDWAInst.
add(*Src1);
1251 if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
1252 SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
1253 SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
1254 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
1258 SDWAInst.
add(*Src2);
1265 SDWAInst.
add(*Clamp);
1274 SDWAInst.
add(*OMod);
1282 SDWAInst.
addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1285 SDWAInst.
addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
1288 SDWAInst.
addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1292 SDWAInst.
addImm(AMDGPU::SDWA::SdwaSel::DWORD);
1297 TII->fixImplicitOperands(*Ret);
1302 const SDWAOperandsVector &SDWAOperands) {
1306 if (
TII->isSDWA(
MI.getOpcode())) {
1310 SDWAInst =
MI.getParent()->getParent()->CloneMachineInstr(&
MI);
1311 MI.getParent()->insert(
MI.getIterator(), SDWAInst);
1313 SDWAInst = createSDWAVersion(
MI);
1317 bool Converted =
false;
1318 for (
auto &Operand : SDWAOperands) {
1330 if (PotentialMatches.count(Operand->getParentInst()) == 0)
1331 Converted |= Operand->convertToSDWA(*SDWAInst,
TII);
1339 ConvertedInstructions.push_back(SDWAInst);
1344 MRI->clearKillFlags(MO.getReg());
1347 ++NumSDWAInstructionsPeepholed;
1349 MI.eraseFromParent();
1358 unsigned ConstantBusCount = 0;
1360 if (!
Op.isImm() && !(
Op.isReg() && !
TRI->isVGPR(*
MRI,
Op.getReg())))
1363 unsigned I =
Op.getOperandNo();
1364 if (
Desc.operands()[
I].RegClass == -1 ||
1365 !
TRI->isVSSuperClass(
TRI->getRegClass(
Desc.operands()[
I].RegClass)))
1368 if (
ST.hasSDWAScalar() && ConstantBusCount == 0 &&
Op.isReg() &&
1369 TRI->isSGPRReg(*
MRI,
Op.getReg())) {
1374 Register VGPR =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1376 TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
1378 Copy.addImm(
Op.getImm());
1379 else if (
Op.isReg())
1382 Op.ChangeToRegister(VGPR,
false);
1390 return SIPeepholeSDWA().run(MF);
1400 TRI =
ST.getRegisterInfo();
1401 TII =
ST.getInstrInfo();
1406 bool Changed =
false;
1412 matchSDWAOperands(
MBB);
1413 for (
const auto &OperandPair : SDWAOperands) {
1414 const auto &Operand = OperandPair.second;
1420 case AMDGPU::V_ADD_CO_U32_e64:
1421 case AMDGPU::V_SUB_CO_U32_e64:
1422 pseudoOpConvertToVOP2(*PotentialMI, ST);
1424 case AMDGPU::V_CNDMASK_B32_e64:
1425 convertVcndmaskToVOP2(*PotentialMI, ST);
1429 SDWAOperands.clear();
1432 matchSDWAOperands(
MBB);
1434 for (
const auto &OperandPair : SDWAOperands) {
1435 const auto &Operand = OperandPair.second;
1437 Operand->potentialToConvert(
TII, ST, &PotentialMatches);
1439 if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST,
TII))
1440 PotentialMatches[PotentialMI].push_back(Operand.get());
1443 for (
auto &PotentialPair : PotentialMatches) {
1445 convertToSDWA(PotentialMI, PotentialPair.second);
1448 PotentialMatches.clear();
1449 SDWAOperands.clear();
1451 Changed = !ConvertedInstructions.empty();
1455 while (!ConvertedInstructions.empty())
1456 legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
BlockVerifier::State From
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static MachineOperand * findSingleRegDef(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
static void copyRegOperand(MachineOperand &To, const MachineOperand &From)
static MachineOperand * findSingleRegUse(const MachineOperand *Reg, const MachineRegisterInfo *MRI)
static std::optional< SdwaSel > combineSdwaSel(SdwaSel Sel, SdwaSel OperandSel)
Combine an SDWA instruction's existing SDWA selection Sel with the SDWA selection OperandSel of its o...
static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS)
static raw_ostream & operator<<(raw_ostream &OS, SdwaSel Sel)
static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII, AMDGPU::OpName SrcSelOpName, SdwaSel OpSel)
Verify that the SDWA selection operand SrcSelOpName of the SDWA instruction MI can be combined with t...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptNone() const
Do not optimize this function (-O0).
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class implements a map that also provides access to all stored values in a deterministic order.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int getSDWAOp(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIPeepholeSDWALegacyPass()
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SIPeepholeSDWALegacyID
Description of the encoding of one expression Op.