65#define DEBUG_TYPE "si-lower-control-flow"
73class SILowerControlFlow {
92 unsigned Andn2TermOpc;
95 unsigned OrSaveExecOpc;
98 bool EnableOptimizeEndCf =
false;
133 while (
I !=
End && !
I->isUnconditionalBranch())
139 void optimizeEndCf();
144 : LIS(LIS), LV(LV), MDT(MDT), PDT(PDT) {}
157 return "SI Lower control flow pseudo instructions";
174char SILowerControlFlowLegacy::ID = 0;
193 while (!Worklist.
empty()) {
208 Register SaveExecReg =
MI.getOperand(0).getReg();
209 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
211 if (U ==
MRI->use_instr_nodbg_end() ||
212 std::next(U) !=
MRI->use_instr_nodbg_end() ||
213 U->getOpcode() != AMDGPU::SI_END_CF)
223 Register SaveExecReg =
MI.getOperand(0).getReg();
225 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
238 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
244 Register CopyReg = SimpleIf ? SaveExecReg
245 :
MRI->createVirtualRegister(BoolRC);
250 LoweredIf.
insert(CopyReg);
261 setImpSCCDefDead(*
And,
true);
269 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
282 I = skipToUncondBrOrEnd(
MBB,
I);
287 .
add(
MI.getOperand(2));
290 MI.eraseFromParent();
306 MI.eraseFromParent();
311 RecomputeRegs.
insert(SaveExecReg);
328 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
331 .
add(
MI.getOperand(1));
352 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
359 MI.eraseFromParent();
364 MI.eraseFromParent();
372 RecomputeRegs.
insert(SrcReg);
373 RecomputeRegs.
insert(DstReg);
383 auto Dst =
MI.getOperand(0).getReg();
389 bool SkipAnding =
false;
390 if (
MI.getOperand(1).isReg()) {
392 SkipAnding =
Def->getParent() ==
MI.getParent()
402 AndReg =
MRI->createVirtualRegister(BoolRC);
405 .
add(
MI.getOperand(1));
410 .
add(
MI.getOperand(2));
413 .
add(
MI.getOperand(1))
414 .
add(
MI.getOperand(2));
425 RecomputeRegs.
insert(
And->getOperand(2).getReg());
431 MI.eraseFromParent();
441 .
add(
MI.getOperand(0));
445 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
448 .
add(
MI.getOperand(1));
451 RecomputeRegs.
insert(
MI.getOperand(0).getReg());
456 MI.eraseFromParent();
460SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
470 for ( ; It != E; ++It) {
471 if (
TII->mayReadEXEC(*
MRI, *It))
478 if (
B->succ_size() != 1)
498 bool NeedBlockSplit =
false;
502 if (
I->modifiesRegister(DataReg,
TRI)) {
503 NeedBlockSplit =
true;
508 unsigned Opcode = OrOpc;
510 if (NeedBlockSplit) {
512 if (SplitBB != &
MBB && (MDT || PDT)) {
516 DTUpdates.
push_back({DomTreeT::Insert, SplitBB, Succ});
532 .
add(
MI.getOperand(0));
536 if (SplitBB != &
MBB) {
546 if (
Op.getReg().isVirtual())
552 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
560 if (
Kill->getParent() == SplitBB && !DefInOrigBlock.
contains(Reg))
568 LoweredEndCf.
insert(NewMI);
573 MI.eraseFromParent();
582void SILowerControlFlow::findMaskOperands(
MachineInstr &
MI,
unsigned OpNo,
585 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
591 if (!Def ||
Def->getParent() !=
MI.getParent() ||
592 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
598 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
599 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
600 !(
I->isCopy() &&
I->getOperand(0).getReg() != Exec))
603 for (
const auto &
SrcOp :
Def->explicit_operands())
606 Src.push_back(
SrcOp);
614 assert(
MI.getNumExplicitOperands() == 3);
616 unsigned OpToReplace = 1;
617 findMaskOperands(
MI, 1, Ops);
618 if (Ops.
size() == 1) OpToReplace = 2;
619 findMaskOperands(
MI, 2, Ops);
620 if (Ops.
size() != 3)
return;
622 unsigned UniqueOpndIdx;
623 if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
624 else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
625 else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
629 MI.removeOperand(OpToReplace);
630 MI.addOperand(Ops[UniqueOpndIdx]);
631 if (
MRI->use_empty(Reg))
632 MRI->getUniqueVRegDef(Reg)->eraseFromParent();
635void SILowerControlFlow::optimizeEndCf() {
638 if (!EnableOptimizeEndCf)
644 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
645 if (Next ==
MBB.
end() || !LoweredEndCf.
count(&*Next))
650 =
TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
654 if (Def && LoweredIf.
count(SavedExec)) {
660 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
661 MI->eraseFromParent();
664 removeMBBifRedundant(
MBB);
676 switch (
MI.getOpcode()) {
681 case AMDGPU::SI_ELSE:
685 case AMDGPU::SI_IF_BREAK:
689 case AMDGPU::SI_LOOP:
693 case AMDGPU::SI_WATERFALL_LOOP:
694 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
697 case AMDGPU::SI_END_CF:
698 SplitBB = emitEndCf(
MI);
702 assert(
false &&
"Attempt to process unsupported instruction");
711 case AMDGPU::S_AND_B64:
712 case AMDGPU::S_OR_B64:
713 case AMDGPU::S_AND_B32:
714 case AMDGPU::S_OR_B32:
716 combineMasks(MaskMI);
729 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
743 if (
P->getFallThrough(
false) == &
MBB)
745 P->ReplaceUsesOfBlockWith(&
MBB, Succ);
746 DTUpdates.
push_back({DomTreeT::Insert,
P, Succ});
776 TII =
ST.getInstrInfo();
777 TRI = &
TII->getRegisterInfo();
782 BoolRC =
TRI->getBoolRC();
785 AndOpc = AMDGPU::S_AND_B32;
786 OrOpc = AMDGPU::S_OR_B32;
787 XorOpc = AMDGPU::S_XOR_B32;
788 MovTermOpc = AMDGPU::S_MOV_B32_term;
789 Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
790 XorTermrOpc = AMDGPU::S_XOR_B32_term;
791 OrTermrOpc = AMDGPU::S_OR_B32_term;
792 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
793 Exec = AMDGPU::EXEC_LO;
795 AndOpc = AMDGPU::S_AND_B64;
796 OrOpc = AMDGPU::S_OR_B64;
797 XorOpc = AMDGPU::S_XOR_B64;
798 MovTermOpc = AMDGPU::S_MOV_B64_term;
799 Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
800 XorTermrOpc = AMDGPU::S_XOR_B64_term;
801 OrTermrOpc = AMDGPU::S_OR_B64_term;
802 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
807 const bool CanDemote =
809 for (
auto &
MBB : MF) {
810 bool IsKillBlock =
false;
812 if (
TII->isKillTerminator(
Term.getOpcode())) {
818 if (CanDemote && !IsKillBlock) {
819 for (
auto &
MI :
MBB) {
820 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
828 bool Changed =
false;
831 BI != MF.end(); BI = NextBB) {
832 NextBB = std::next(BI);
842 switch (
MI.getOpcode()) {
844 case AMDGPU::SI_ELSE:
845 case AMDGPU::SI_IF_BREAK:
846 case AMDGPU::SI_WATERFALL_LOOP:
847 case AMDGPU::SI_LOOP:
848 case AMDGPU::SI_END_CF:
849 SplitMBB = process(
MI);
854 if (SplitMBB !=
MBB) {
855 MBB = Next->getParent();
864 for (
Register Reg : RecomputeRegs) {
870 RecomputeRegs.
clear();
871 LoweredEndCf.
clear();
878bool SILowerControlFlowLegacy::runOnMachineFunction(
MachineFunction &MF) {
880 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
881 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
883 auto *LVWrapper = getAnalysisIfAvailable<LiveVariablesWrapperPass>();
884 LiveVariables *LV = LVWrapper ? &LVWrapper->getLV() :
nullptr;
885 auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
888 getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
890 PDTWrapper ? &PDTWrapper->getPostDomTree() :
nullptr;
891 return SILowerControlFlow(LIS, LV, MDT, PDT).run(MF);
904 bool Changed = SILowerControlFlow(LIS, LV, MDT, PDT).run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
Implements a dense probed hash-table based set.
Core dominator tree base class.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
iterator_range< iterator > terminators()
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SILowerControlFlowLegacyID
@ Xor
Bitwise or logical XOR of integers.
VarInfo - This represents the regions where a virtual register is live in the program.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...