23#define DEBUG_TYPE "si-pre-emit-peephole"
27class SIPreEmitPeephole {
56 return SIPreEmitPeephole().run(MF);
63 "SI peephole optimizations",
false,
false)
65char SIPreEmitPeepholeLegacy::
ID = 0;
91 const bool IsWave32 =
ST.isWave32();
92 const unsigned CondReg =
TRI->getVCC();
93 const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
94 const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
95 const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
96 const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
100 bool ReadsCond =
false;
101 unsigned Threshold = 5;
102 for (++
A;
A != E; ++
A) {
105 if (
A->modifiesRegister(ExecReg,
TRI))
107 if (
A->modifiesRegister(CondReg,
TRI)) {
108 if (!
A->definesRegister(CondReg,
TRI) ||
109 (
A->getOpcode() !=
And &&
A->getOpcode() != AndN2))
113 ReadsCond |=
A->readsRegister(CondReg,
TRI);
121 TII->commuteInstruction(*
A);
124 if (Op1.
getReg() != ExecReg)
129 int64_t MaskValue = 0;
133 auto M = std::next(
A);
134 bool ReadsSreg =
false;
135 bool ModifiesExec =
false;
136 for (;
M != E; ++
M) {
137 if (
M->definesRegister(SReg,
TRI))
139 if (
M->modifiesRegister(SReg,
TRI))
141 ReadsSreg |=
M->readsRegister(SReg,
TRI);
142 ModifiesExec |=
M->modifiesRegister(ExecReg,
TRI);
150 if (
A->getOpcode() ==
And && SReg == CondReg && !ModifiesExec &&
152 A->eraseFromParent();
155 if (!
M->isMoveImmediate() || !
M->getOperand(1).isImm() ||
156 (
M->getOperand(1).getImm() != -1 &&
M->getOperand(1).getImm() != 0))
158 MaskValue =
M->getOperand(1).getImm();
161 if (!ReadsSreg && Op2.
isKill()) {
162 A->getOperand(2).ChangeToImmediate(MaskValue);
163 M->eraseFromParent();
165 }
else if (Op2.
isImm()) {
172 assert(MaskValue == 0 || MaskValue == -1);
173 if (
A->getOpcode() == AndN2)
174 MaskValue = ~MaskValue;
176 if (!ReadsCond &&
A->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
177 if (!
MI.killsRegister(CondReg,
TRI)) {
179 if (MaskValue == 0) {
180 BuildMI(*
A->getParent(), *
A,
A->getDebugLoc(),
TII->get(Mov), CondReg)
183 BuildMI(*
A->getParent(), *
A,
A->getDebugLoc(),
TII->get(Mov), CondReg)
188 A->eraseFromParent();
191 bool IsVCCZ =
MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
192 if (SReg == ExecReg) {
195 MI.eraseFromParent();
198 MI.setDesc(
TII->get(AMDGPU::S_BRANCH));
199 }
else if (IsVCCZ && MaskValue == 0) {
210 Found =
Term.isIdenticalTo(
MI);
213 assert(Found &&
"conditional branch is not terminator");
216 assert(Dst.isMBB() &&
"destination is not basic block");
218 BranchMI->eraseFromParent();
226 MI.setDesc(
TII->get(AMDGPU::S_BRANCH));
227 }
else if (!IsVCCZ && MaskValue == 0) {
230 assert(Dst.isMBB() &&
"destination is not basic block");
231 MI.getParent()->removeSuccessor(Dst.getMBB());
232 MI.eraseFromParent();
234 }
else if (MaskValue == -1) {
237 TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
240 MI.removeOperand(
MI.findRegisterUseOperandIdx(CondReg,
TRI,
false ));
261 E =
MI.getIterator();
265 switch (
I->getOpcode()) {
266 case AMDGPU::S_SET_GPR_IDX_MODE:
268 case AMDGPU::S_SET_GPR_IDX_OFF:
273 if (
I->modifiesRegister(AMDGPU::M0,
TRI))
275 if (IdxReg &&
I->modifiesRegister(IdxReg,
TRI))
280 TRI->isVectorRegister(MRI, MO.getReg());
284 if (!IdxOn || !(
I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
285 I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
291 MI.eraseFromBundle();
293 RI->eraseFromBundle();
297bool SIPreEmitPeephole::getBlockDestinations(
310class BranchWeightCostModel {
314 static constexpr uint64_t BranchNotTakenCost = 1;
321 :
TII(
TII), SchedModel(
TII.getSchedModel()) {
329 BranchTakenCost = SchedModel.computeInstrLatency(&Branch);
333 if (
TII.isWaitcnt(
MI.getOpcode()))
336 ThenCyclesCost += SchedModel.computeInstrLatency(&
MI);
348 return (Denominator - Numerator) * ThenCyclesCost <=
349 ((Denominator - Numerator) * BranchTakenCost +
350 Numerator * BranchNotTakenCost);
354bool SIPreEmitPeephole::mustRetainExeczBranch(
369 if (
MI.isConditionalBranch())
372 if (
MI.isUnconditionalBranch() &&
376 if (
MI.isMetaInstruction())
379 if (
TII->hasUnwantedEffectsWhenEXECEmpty(
MI))
382 if (!CostModel.isProfitable(
MI))
395 if (!
TII->getSchedModel().hasInstrSchedModel())
402 if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB,
Cond))
410 if (mustRetainExeczBranch(
MI, *FalseMBB, *TrueMBB))
414 MI.eraseFromParent();
423 if (!SIPreEmitPeephole().
run(MF))
431 TII = ST.getInstrInfo();
432 TRI = &
TII->getRegisterInfo();
433 bool Changed =
false;
442 switch (
MI.getOpcode()) {
443 case AMDGPU::S_CBRANCH_VCCZ:
444 case AMDGPU::S_CBRANCH_VCCNZ:
445 Changed |= optimizeVccBranch(
MI);
447 case AMDGPU::S_CBRANCH_EXECZ:
448 Changed |= removeExeczBranch(
MI,
MBB);
453 if (!
ST.hasVGPRIndexMode())
457 const unsigned Threshold = 20;
465 if (Count == Threshold)
470 if (
MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON)
479 if (optimizeSetGPR(*SetGPRMI,
MI))
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
A container for analyses that lazily runs them and caches their results.
static uint32_t getDenominator()
uint32_t getNumerator() const
static BranchProbability getZero()
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
LLVM_ABI BranchProbability getSuccProbability(const_succ_iterator Succ) const
Return probability of the edge from this block to MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
BasicBlockListType::const_iterator const_iterator
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SIPreEmitPeepholeID
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &)