77#define DEBUG_TYPE "si-fix-sgpr-copies"
80 "amdgpu-enable-merge-m0",
81 cl::desc(
"Merge and hoist M0 initializations"),
94 unsigned NumSVCopies = 0;
99 unsigned NumReadfirstlanes = 0;
101 bool NeedToBeConvertedToVALU =
false;
107 unsigned SiblingPenalty = 0;
109 V2SCopyInfo() : Copy(nullptr),
ID(0){};
110 V2SCopyInfo(
unsigned Id, MachineInstr *
C,
unsigned Width)
111 : Copy(
C), NumReadfirstlanes(Width / 32), ID(
Id){};
112#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
114 dbgs() << ID <<
" : " << *Copy <<
"\n\tS:" << SChain.size()
115 <<
"\n\tSV:" << NumSVCopies <<
"\n\tSP: " << SiblingPenalty
116 <<
"\nScore: " << Score <<
"\n";
121class SIFixSGPRCopies {
122 MachineDominatorTree *MDT;
123 SmallVector<MachineInstr*, 4> SCCCopies;
124 SmallVector<MachineInstr*, 4> RegSequences;
125 SmallVector<MachineInstr*, 4> PHINodes;
126 SmallVector<MachineInstr*, 4> S2VCopies;
127 unsigned NextVGPRToSGPRCopyID = 0;
128 MapVector<unsigned, V2SCopyInfo> V2SCopies;
129 DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
130 DenseSet<MachineInstr *> PHISources;
133 MachineRegisterInfo *MRI;
134 const SIRegisterInfo *TRI;
135 const SIInstrInfo *TII;
137 SIFixSGPRCopies(MachineDominatorTree *MDT) : MDT(MDT) {}
139 bool run(MachineFunction &MF);
140 void fixSCCCopies(MachineFunction &MF);
141 void prepareRegSequenceAndPHIs(MachineFunction &MF);
142 unsigned getNextVGPRToSGPRCopyId() {
return ++NextVGPRToSGPRCopyID; }
143 bool needToBeConvertedToVALU(V2SCopyInfo *
I);
144 void analyzeVGPRToSGPRCopy(MachineInstr *
MI);
145 void lowerVGPR2SGPRCopies(MachineFunction &MF);
152 void processPHINode(MachineInstr &
MI);
157 bool tryMoveVGPRConstToSGPR(MachineOperand &MO,
Register NewDst,
158 MachineBasicBlock *BlockToInsertTo,
167 SIFixSGPRCopiesLegacy() : MachineFunctionPass(ID) {}
169 bool runOnMachineFunction(MachineFunction &MF)
override {
170 MachineDominatorTree *MDT =
171 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
172 SIFixSGPRCopies Impl(MDT);
176 StringRef getPassName()
const override {
return "SI Fix SGPR copies"; }
178 void getAnalysisUsage(AnalysisUsage &AU)
const override {
194char SIFixSGPRCopiesLegacy::
ID = 0;
199 return new SIFixSGPRCopiesLegacy();
202static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
206 Register DstReg = Copy.getOperand(0).getReg();
207 Register SrcReg = Copy.getOperand(1).getReg();
210 ?
MRI.getRegClass(SrcReg)
211 :
TRI.getPhysRegBaseClass(SrcReg);
217 ?
MRI.getRegClass(DstReg)
218 :
TRI.getPhysRegBaseClass(DstReg);
220 return std::pair(SrcRC, DstRC);
226 return SrcRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(DstRC) &&
227 TRI.hasVectorRegisters(SrcRC);
233 return DstRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(SrcRC) &&
234 TRI.hasVectorRegisters(DstRC);
241 auto &Src =
MI.getOperand(1);
247 for (
const auto &MO :
MRI.reg_nodbg_operands(DstReg)) {
248 const auto *
UseMI = MO.getParent();
251 if (MO.isDef() ||
UseMI->getParent() !=
MI.getParent() ||
252 UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
255 unsigned OpIdx = MO.getOperandNo();
256 if (
OpIdx >=
UseMI->getDesc().getNumOperands() ||
261 MRI.setRegClass(DstReg,
TRI->getEquivalentSGPRClass(
MRI.getRegClass(DstReg)));
285 if (!
TRI->isSGPRClass(
MRI.getRegClass(DstReg)))
288 if (!
MRI.hasOneUse(DstReg))
310 if (
SubReg != AMDGPU::NoSubRegister)
313 MRI.setRegClass(DstReg, DstRC);
324 bool IsAGPR =
TRI->isAGPRClass(DstRC);
326 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
328 TRI->getRegClassForOperandReg(
MRI,
MI.getOperand(
I));
330 "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
333 Register TmpReg =
MRI.createVirtualRegister(NewSrcRC);
341 Register TmpAReg =
MRI.createVirtualRegister(NewSrcRC);
342 unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
343 AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
350 MI.getOperand(
I).setReg(TmpReg);
362 if (Copy->getOpcode() != AMDGPU::COPY)
365 if (!MoveImm->isMoveImmediate())
369 TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
374 if (Copy->getOperand(1).getSubReg())
377 switch (MoveImm->getOpcode()) {
380 case AMDGPU::V_MOV_B32_e32:
381 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
382 SMovOp = AMDGPU::S_MOV_B32;
384 case AMDGPU::V_MOV_B64_PSEUDO:
385 SMovOp = AMDGPU::S_MOV_B64_IMM_PSEUDO;
392template <
class UnaryPredicate>
402 while (!Worklist.
empty()) {
442 while (
I !=
MBB->end() &&
TII->isBasicBlockPrologue(*
I))
458 using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
467 for (
auto &
MI :
MRI.def_instructions(
Reg)) {
469 for (
auto &MO :
MI.operands()) {
470 if ((MO.isReg() && ((MO.isDef() && MO.getReg() !=
Reg) || !MO.isDef())) ||
471 (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
479 Inits[Imm->getImm()].push_front(&
MI);
484 for (
auto &
Init : Inits) {
485 auto &Defs =
Init.second;
487 for (
auto I1 = Defs.begin(),
E = Defs.end(); I1 !=
E; ) {
490 for (
auto I2 = std::next(I1); I2 !=
E; ) {
499 auto interferes = [&MDT, From, To](
MachineInstr* &Clobber) ->
bool {
502 bool MayClobberFrom =
isReachable(Clobber, &*From, MBBTo, MDT);
503 bool MayClobberTo =
isReachable(Clobber, &*To, MBBTo, MDT);
504 if (!MayClobberFrom && !MayClobberTo)
506 if ((MayClobberFrom && !MayClobberTo) ||
507 (!MayClobberFrom && MayClobberTo))
513 return !((MBBFrom == MBBTo &&
521 return C.first !=
Init.first &&
527 if (!interferes(MI2, MI1)) {
537 if (!interferes(MI1, MI2)) {
555 if (!interferes(MI1,
I) && !interferes(MI2,
I)) {
559 <<
"and moving from "
576 for (
auto &
Init : Inits) {
577 auto &Defs =
Init.second;
578 auto I = Defs.begin();
579 while (
I != Defs.end()) {
580 if (MergedInstrs.
count(*
I)) {
581 (*I)->eraseFromParent();
589 for (
auto &
Init : Inits) {
590 auto &Defs =
Init.second;
591 for (
auto *
MI : Defs) {
592 auto *
MBB =
MI->getParent();
597 if (!
TII->isBasicBlockPrologue(*
B))
600 auto R = std::next(
MI->getReverseIterator());
601 const unsigned Threshold = 50;
603 for (
unsigned I = 0; R !=
B &&
I < Threshold; ++R, ++
I)
604 if (R->readsRegister(
Reg,
TRI) || R->definesRegister(
Reg,
TRI) ||
605 TII->isSchedulingBoundary(*R,
MBB, *
MBB->getParent()))
627 TRI =
ST.getRegisterInfo();
628 TII =
ST.getInstrInfo();
631 SmallVector<MachineInstr *, 8> Relegalize;
633 for (MachineBasicBlock &
MBB : MF) {
636 MachineInstr &
MI = *
I;
638 switch (
MI.getOpcode()) {
642 if (
TII->isWMMA(
MI) &&
647 const TargetRegisterClass *SrcRC, *DstRC;
663 if (lowerSpecialCase(
MI,
I))
666 analyzeVGPRToSGPRCopy(&
MI);
671 case AMDGPU::STRICT_WQM:
672 case AMDGPU::SOFT_WQM:
673 case AMDGPU::STRICT_WWM:
674 case AMDGPU::INSERT_SUBREG:
676 case AMDGPU::REG_SEQUENCE: {
677 if (
TRI->isSGPRClass(
TII->getOpRegClass(
MI, 0))) {
678 for (MachineOperand &MO :
MI.operands()) {
679 if (!MO.isReg() || !MO.getReg().isVirtual())
681 const TargetRegisterClass *SrcRC =
MRI->getRegClass(MO.getReg());
682 if (SrcRC == &AMDGPU::VReg_1RegClass)
685 if (
TRI->hasVectorRegisters(SrcRC)) {
686 const TargetRegisterClass *DestRC =
687 TRI->getEquivalentSGPRClass(SrcRC);
688 Register NewDst =
MRI->createVirtualRegister(DestRC);
689 MachineBasicBlock *BlockToInsertCopy =
690 MI.isPHI() ?
MI.getOperand(MO.getOperandNo() + 1).getMBB()
696 if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
697 PointToInsertCopy,
DL)) {
698 MachineInstr *NewCopy =
699 BuildMI(*BlockToInsertCopy, PointToInsertCopy,
DL,
700 TII->get(AMDGPU::COPY), NewDst)
703 analyzeVGPRToSGPRCopy(NewCopy);
704 PHISources.
insert(NewCopy);
712 else if (
MI.isRegSequence())
717 case AMDGPU::V_WRITELANE_B32: {
720 if (
ST.getConstantBusLimit(
MI.getOpcode()) != 1)
730 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
732 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
733 MachineOperand &Src0 =
MI.getOperand(Src0Idx);
734 MachineOperand &Src1 =
MI.getOperand(Src1Idx);
738 Src0.
getReg() != AMDGPU::M0) &&
740 Src1.
getReg() != AMDGPU::M0)) {
747 for (MachineOperand *MO : {&Src0, &Src1}) {
748 if (MO->getReg().isVirtual()) {
749 MachineInstr *
DefMI =
MRI->getVRegDef(MO->getReg());
753 MO->getReg() ==
Def.getReg() &&
754 MO->getSubReg() ==
Def.getSubReg()) {
756 if (Copied.
isImm() &&
757 TII->isInlineConstant(APInt(64, Copied.
getImm(),
true))) {
758 MO->ChangeToImmediate(Copied.
getImm());
771 TII->get(AMDGPU::COPY), AMDGPU::M0)
782 lowerVGPR2SGPRCopies(MF);
785 for (
auto *
MI : S2VCopies) {
788 const TargetRegisterClass *SrcRC, *DstRC;
794 for (
auto *
MI : RegSequences) {
796 if (
MI->isRegSequence())
799 for (
auto *
MI : PHINodes) {
802 while (!Relegalize.
empty())
805 if (MF.getTarget().getOptLevel() > CodeGenOptLevel::None &&
EnableM0Merge)
808 SiblingPenalty.clear();
811 RegSequences.clear();
819void SIFixSGPRCopies::processPHINode(MachineInstr &
MI) {
820 bool AllAGPRUses =
true;
821 SetVector<const MachineInstr *> worklist;
822 SmallPtrSet<const MachineInstr *, 4> Visited;
823 SetVector<MachineInstr *> PHIOperands;
827 bool HasUses =
false;
828 while (!worklist.
empty()) {
831 for (
const auto &Use :
MRI->use_operands(
Reg)) {
833 const MachineInstr *
UseMI =
Use.getParent();
847 const TargetRegisterClass *RC0 =
MRI->getRegClass(PHIRes);
848 if (HasUses && AllAGPRUses && !
TRI->isAGPRClass(RC0)) {
850 MRI->setRegClass(PHIRes,
TRI->getEquivalentAGPRClass(RC0));
851 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
852 MachineInstr *
DefMI =
MRI->getVRegDef(
MI.getOperand(
I).getReg());
858 if (
TRI->isVectorRegister(*
MRI, PHIRes) ||
859 RC0 == &AMDGPU::VReg_1RegClass) {
861 TII->legalizeOperands(
MI, MDT);
865 while (!PHIOperands.
empty()) {
870bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
871 MachineOperand &MaybeVGPRConstMO,
Register DstReg,
872 MachineBasicBlock *BlockToInsertTo,
879 MachineOperand *SrcConst =
TII->getNamedOperand(*
DefMI, AMDGPU::OpName::src0);
880 if (SrcConst->
isReg())
883 const TargetRegisterClass *SrcRC =
884 MRI->getRegClass(MaybeVGPRConstMO.
getReg());
885 unsigned MoveSize =
TRI->getRegSizeInBits(*SrcRC);
886 unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
887 BuildMI(*BlockToInsertTo, PointToInsertTo,
DL,
TII->get(MoveOp), DstReg)
889 if (
MRI->hasOneUse(MaybeVGPRConstMO.
getReg()))
891 MaybeVGPRConstMO.
setReg(DstReg);
895bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &
MI,
904 if (DstReg == AMDGPU::M0 &&
905 TRI->hasVectorRegisters(
MRI->getRegClass(SrcReg))) {
907 MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
909 TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
910 .
add(
MI.getOperand(1));
911 MI.getOperand(1).setReg(TmpReg);
912 }
else if (tryMoveVGPRConstToSGPR(
MI.getOperand(1), DstReg,
MI.getParent(),
913 MI,
MI.getDebugLoc())) {
915 MI.eraseFromParent();
920 SIInstrWorklist worklist;
922 TII->moveToVALU(worklist, MDT);
931 MI.getOperand(1).ChangeToImmediate(Imm);
932 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
933 MI.setDesc(
TII->get(SMovOp));
939void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr*
MI) {
943 const TargetRegisterClass *DstRC =
MRI->getRegClass(DstReg);
945 V2SCopyInfo
Info(getNextVGPRToSGPRCopyId(),
MI,
946 TRI->getRegSizeInBits(*DstRC));
947 SmallVector<MachineInstr *, 8> AnalysisWorklist;
950 DenseSet<MachineInstr *> Visited;
952 while (!AnalysisWorklist.
empty()) {
956 if (!Visited.
insert(Inst).second)
967 const TargetRegisterClass *SrcRC, *DstRC;
976 SiblingPenalty[Inst].insert(
Info.ID);
978 SmallVector<MachineInstr *, 4>
Users;
984 !
I->findRegisterDefOperand(AMDGPU::SCC,
nullptr)) {
985 if (
I->readsRegister(AMDGPU::SCC,
nullptr))
991 for (
auto &U :
MRI->use_instructions(
Reg))
995 for (
auto *U :
Users) {
997 Info.SChain.insert(U);
1006bool SIFixSGPRCopies::needToBeConvertedToVALU(V2SCopyInfo *
Info) {
1007 if (
Info->SChain.empty()) {
1012 Info->SChain, [&](MachineInstr *
A, MachineInstr *
B) ->
bool {
1013 return SiblingPenalty[A].size() < SiblingPenalty[B].size();
1015 Info->Siblings.remove_if([&](
unsigned ID) {
return ID ==
Info->ID; });
1021 SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
1022 for (
auto J :
Info->Siblings) {
1023 auto *InfoIt = V2SCopies.find(J);
1024 if (InfoIt != V2SCopies.end()) {
1025 MachineInstr *SiblingCopy = InfoIt->second.Copy;
1034 Info->SiblingPenalty = SrcRegs.
size();
1037 Info->NumSVCopies +
Info->SiblingPenalty +
Info->NumReadfirstlanes;
1038 unsigned Profit =
Info->SChain.size();
1039 Info->Score = Penalty > Profit ? 0 : Profit - Penalty;
1040 Info->NeedToBeConvertedToVALU =
Info->Score < 3;
1041 return Info->NeedToBeConvertedToVALU;
1044void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
1046 SmallVector<unsigned, 8> LoweringWorklist;
1047 for (
auto &
C : V2SCopies) {
1048 if (needToBeConvertedToVALU(&
C.second))
1056 while (!LoweringWorklist.
empty()) {
1058 auto *CurInfoIt = V2SCopies.find(CurID);
1059 if (CurInfoIt != V2SCopies.end()) {
1060 V2SCopyInfo
C = CurInfoIt->second;
1062 for (
auto S :
C.Siblings) {
1063 auto *SibInfoIt = V2SCopies.find(S);
1064 if (SibInfoIt != V2SCopies.end()) {
1065 V2SCopyInfo &
SI = SibInfoIt->second;
1067 if (!
SI.NeedToBeConvertedToVALU) {
1068 SI.SChain.set_subtract(
C.SChain);
1069 if (needToBeConvertedToVALU(&SI))
1072 SI.Siblings.remove_if([&](
unsigned ID) {
return ID ==
C.ID; });
1076 <<
" is being turned to VALU\n");
1079 V2SCopies.erase(
C.ID);
1088 for (
auto C : V2SCopies) {
1089 MachineInstr *
MI =
C.second.Copy;
1090 MachineBasicBlock *
MBB =
MI->getParent();
1094 <<
" is being turned to v_readfirstlane_b32"
1095 <<
" Score: " <<
C.second.Score <<
"\n");
1096 Register DstReg =
MI->getOperand(0).getReg();
1097 MRI->constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
1099 Register SrcReg =
MI->getOperand(1).getReg();
1100 unsigned SubReg =
MI->getOperand(1).getSubReg();
1101 const TargetRegisterClass *SrcRC =
1102 TRI->getRegClassForOperandReg(*
MRI,
MI->getOperand(1));
1103 size_t SrcSize =
TRI->getRegSizeInBits(*SrcRC);
1104 if (SrcSize == 16) {
1106 "We do not expect to see 16-bit copies from VGPR to SGPR unless "
1107 "we have 16-bit VGPRs");
1108 assert(
MRI->getRegClass(DstReg) == &AMDGPU::SReg_32RegClass ||
1109 MRI->getRegClass(DstReg) == &AMDGPU::SReg_32_XM0RegClass);
1111 MRI->setRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
1112 Register VReg32 =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1123 }
else if (SrcSize == 32) {
1125 TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg);
1126 MIB.addReg(SrcReg, 0,
SubReg);
1129 TII->get(AMDGPU::REG_SEQUENCE), DstReg);
1130 int N =
TRI->getRegSizeInBits(*SrcRC) / 32;
1131 for (
int i = 0; i <
N; i++) {
1133 Result, *
MRI,
MI->getOperand(1), SrcRC,
1134 TRI->getSubRegFromChannel(i), &AMDGPU::VGPR_32RegClass);
1136 MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1138 TII->get(AMDGPU::V_READFIRSTLANE_B32), PartialDst)
1140 Result.addReg(PartialDst).addImm(
TRI->getSubRegFromChannel(i));
1143 MI->eraseFromParent();
1147void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
1148 bool IsWave32 = MF.
getSubtarget<GCNSubtarget>().isWave32();
1149 for (MachineBasicBlock &
MBB : MF) {
1152 MachineInstr &
MI = *
I;
1158 if (SrcReg == AMDGPU::SCC) {
1160 MRI->createVirtualRegister(
TRI->getWaveMaskRegClass());
1163 TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32
1164 : AMDGPU::S_CSELECT_B64),
1168 I =
BuildMI(*
MI.getParent(), std::next(
I),
I->getDebugLoc(),
1169 TII->get(AMDGPU::COPY), DstReg)
1171 MI.eraseFromParent();
1174 if (DstReg == AMDGPU::SCC) {
1175 unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
1176 Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1179 MI.getDebugLoc(),
TII->get(Opcode))
1183 MI.eraseFromParent();
1193 SIFixSGPRCopies Impl(&MDT);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
static cl::opt< bool > EnableM0Merge("amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), cl::init(true))
static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII)
static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI)
bool searchPredecessors(const MachineBasicBlock *MBB, const MachineBasicBlock *CutOff, UnaryPredicate Predicate)
static bool isReachable(const MachineInstr *From, const MachineInstr *To, const MachineBasicBlock *CutOff, MachineDominatorTree &MDT)
static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII)
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm)
static MachineBasicBlock::iterator getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Implements a dense probed hash-table based set.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
bool isRegSequence() const
LLVM_ABI unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Resolved
Queried, materialization begun.
NodeAddr< DefNode * > Def
NodeAddr< InstrNode * > Instr
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned getDefRegState(bool B)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
char & SIFixSGPRCopiesLegacyID
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createSIFixSGPRCopiesLegacyPass()
void insert(MachineInstr *MI)