25#define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
29class SIOptimizeExecMaskingPreRA {
38 unsigned OrSaveExecOpc;
63 return "SI optimize exec mask operations pre-RA";
76 "SI optimize exec mask operations pre-RA",
false,
false)
81char SIOptimizeExecMaskingPreRALegacy::
ID = 0;
86 return new SIOptimizeExecMaskingPreRALegacy();
133 unsigned Opc = MI.getOpcode();
134 return Opc == AMDGPU::S_CBRANCH_VCCZ ||
135 Opc == AMDGPU::S_CBRANCH_VCCNZ; });
140 TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *
I, *
MRI, LIS);
141 if (!
And ||
And->getOpcode() != AndOpc ||
142 !
And->getOperand(1).isReg() || !
And->getOperand(2).isReg())
149 AndCC = &
And->getOperand(2);
152 }
else if (
And->getOperand(2).getReg() !=
Register(ExecReg)) {
156 auto *
Cmp =
TRI->findReachingDef(CmpReg, CmpSubReg, *
And, *
MRI, LIS);
157 if (!Cmp || !(
Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
158 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
159 Cmp->getParent() !=
And->getParent())
173 auto *Sel =
TRI->findReachingDef(SelReg, Op1->
getSubReg(), *Cmp, *
MRI, LIS);
174 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
177 if (
TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
178 TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
181 Op1 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
182 Op2 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
197 SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
201 return VNI->isPHIDef();
206 LLVM_DEBUG(
dbgs() <<
"Folding sequence:\n\t" << *Sel <<
'\t' << *Cmp <<
'\t'
211 And->getOperand(0).getReg())
220 SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*
And, *Andn2);
221 And->eraseFromParent();
227 SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
232 LIS->removeInterval(CCReg);
233 LIS->createAndComputeVirtRegInterval(CCReg);
236 LIS->removeAllRegUnitsForPhysReg(CCReg);
245 return MI.readsRegister(CondReg, TRI);
249 LIS->removeVRegDefAt(*CmpLI, CmpIdx.
getRegSlot());
250 LIS->RemoveMachineInstrFromMaps(*Cmp);
251 Cmp->eraseFromParent();
256 LIS->shrinkToUses(SelLI);
258 if (
MRI->use_nodbg_empty(SelReg) && (IsKill ||
IsDead)) {
261 LIS->removeVRegDefAt(*SelLI, SelIdx.
getRegSlot());
262 LIS->RemoveMachineInstrFromMaps(*Sel);
263 bool ShrinkSel = Sel->getOperand(0).readsReg();
264 Sel->eraseFromParent();
268 LIS->shrinkToUses(SelLI);
297 if (SaveExecMI.
getOpcode() != OrSaveExecOpc)
301 return MI.getOpcode() == XorTermrOpc;
316 while (
I !=
First && !AndExecMI) {
317 if (
I->getOpcode() == AndOpc &&
I->getOperand(0).getReg() == DstReg &&
318 I->getOperand(1).getReg() ==
Register(ExecReg))
329 SlotIndex StartIdx = LIS->getInstructionIndex(SaveExecMI);
330 SlotIndex EndIdx = LIS->getInstructionIndex(*AndExecMI);
332 LiveRange &RegUnit = LIS->getRegUnit(Unit);
333 if (RegUnit.
find(StartIdx) != std::prev(RegUnit.
find(EndIdx)))
338 LIS->removeInterval(SavedExecReg);
339 LIS->removeInterval(DstReg);
343 LIS->RemoveMachineInstrFromMaps(*AndExecMI);
346 LIS->createAndComputeVirtRegInterval(DstReg);
355 SIOptimizeExecMaskingPreRA(&LIS).
run(MF);
359bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
364 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
365 return SIOptimizeExecMaskingPreRA(LIS).run(MF);
370 TRI = ST.getRegisterInfo();
371 TII = ST.getInstrInfo();
374 const bool Wave32 = ST.isWave32();
375 AndOpc =
Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
376 Andn2Opc =
Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
378 Wave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
379 XorTermrOpc =
Wave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
384 bool Changed =
false;
388 if (optimizeElseBranch(
MBB)) {
389 RecalcRegs.
insert(AMDGPU::SCC);
393 if (optimizeVcndVcmpPair(
MBB)) {
394 RecalcRegs.
insert(AMDGPU::VCC_LO);
395 RecalcRegs.insert(AMDGPU::VCC_HI);
396 RecalcRegs.insert(AMDGPU::SCC);
410 if (
Term.getOpcode() != AMDGPU::S_ENDPGM ||
Term.getNumOperands() != 1)
416 auto *CurBB =
Blocks.pop_back_val();
417 auto I = CurBB->rbegin(), E = CurBB->rend();
419 if (
I->isUnconditionalBranch() ||
I->getOpcode() == AMDGPU::S_ENDPGM)
421 else if (
I->isBranch())
426 if (
I->isDebugInstr()) {
431 if (
I->mayStore() ||
I->isBarrier() ||
I->isCall() ||
432 I->hasUnmodeledSideEffects() ||
I->hasOrderedMemoryRef())
436 <<
"Removing no effect instruction: " << *
I <<
'\n');
438 for (
auto &
Op :
I->operands()) {
440 RecalcRegs.insert(
Op.getReg());
443 auto Next = std::next(
I);
444 LIS->RemoveMachineInstrFromMaps(*
I);
445 I->eraseFromParent();
455 for (
auto *Pred : CurBB->predecessors()) {
456 if (Pred->succ_size() == 1)
470 unsigned ScanThreshold = 10;
472 && ScanThreshold--; ++
I) {
474 if (!(
I->isFullCopy() &&
I->getOperand(1).getReg() ==
Register(ExecReg)))
477 Register SavedExec =
I->getOperand(0).getReg();
478 if (SavedExec.
isVirtual() &&
MRI->hasOneNonDBGUse(SavedExec)) {
479 MachineInstr *SingleExecUser = &*
MRI->use_instr_nodbg_begin(SavedExec);
483 if (SingleExecUser->
getParent() ==
I->getParent() &&
485 TII->isOperandLegal(*SingleExecUser,
Idx, &
I->getOperand(1))) {
487 LIS->RemoveMachineInstrFromMaps(*
I);
488 I->eraseFromParent();
489 MRI->replaceRegWith(SavedExec, ExecReg);
490 LIS->removeInterval(SavedExec);
499 for (
auto Reg : RecalcRegs) {
500 if (
Reg.isVirtual()) {
501 LIS->removeInterval(Reg);
502 if (!
MRI->reg_empty(Reg))
503 LIS->createAndComputeVirtRegInterval(Reg);
505 LIS->removeAllRegUnitsForPhysReg(Reg);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
SI optimize exec mask operations pre RA
static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, SlotIndex SelIdx)
SI optimize exec mask operations
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an Operation in the Expression.
Implements a dense probed hash-table based set.
FunctionPass class - This class is used to implement most global optimizations.
LiveInterval - This class represents the liveness of a register, or stack slot.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
bool isDeadDef() const
Return true if this instruction has a dead def.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
bool isKill() const
Return true if the live-in value is killed by this instruction.
This class represents the liveness of a register, stack slot, etc.
iterator_range< vni_iterator > vnis()
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
iterator_range< iterator > terminators()
reverse_iterator rbegin()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI int findRegisterUseOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
VNInfo - Value Number Information.
std::pair< iterator, bool > insert(const ValueT &V)
self_iterator getIterator()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SIOptimizeExecMaskingPreRAID
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createSIOptimizeExecMaskingPreRAPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.