78#define DEBUG_TYPE "x86-cmov-conversion"
80STATISTIC(NumOfSkippedCmovGroups,
"Number of unsupported CMOV-groups");
81STATISTIC(NumOfCmovGroupCandidate,
"Number of CMOV-group candidates");
82STATISTIC(NumOfLoopCandidate,
"Number of CMOV-conversion profitable loops");
83STATISTIC(NumOfOptimizedCmovGroups,
"Number of optimized CMOV-groups");
88 cl::desc(
"Enable the X86 cmov-to-branch optimization."),
93 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
97 "x86-cmov-converter-force-mem-operand",
98 cl::desc(
"Convert cmovs to branches whenever they have memory operands."),
102 "x86-cmov-converter-force-all",
103 cl::desc(
"Convert all cmovs to branches."),
138 CmovGroups &CmovInstGroups,
139 bool IncludeLoads =
false);
148 CmovGroups &CmovInstGroups);
158char X86CmovConverterPass::ID = 0;
160void X86CmovConverterPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
178 bool Changed =
false;
179 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
184 TSchedModel.init(&STI);
192 CmovGroups AllCmovGroups;
194 if (collectCmovCandidates(
Blocks, AllCmovGroups,
true)) {
195 for (
auto &Group : AllCmovGroups) {
205 convertCmovInstsToBranches(Group);
240 for (
int i = 0; i < (int)
Loops.size(); ++i)
245 if (!CurrLoop->getSubLoops().empty())
249 CmovGroups CmovInstGroups;
251 if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
254 if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
259 for (
auto &Group : CmovInstGroups)
260 convertCmovInstsToBranches(Group);
266bool X86CmovConverterPass::collectCmovCandidates(
297 bool FoundNonCMOVInst =
false;
299 bool SkipGroup =
false;
301 for (
auto &
I : *
MBB) {
303 if (
I.isDebugInstr())
310 !
I.getFlag(MachineInstr::MIFlag::Unpredictable) &&
311 (IncludeLoads || !
I.mayLoad())) {
318 FoundNonCMOVInst =
false;
324 if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
331 else if (CC != MemOpCC)
338 MRI->use_nodbg_instructions(
I.defs().begin()->getReg()),
340 return UseI.getOpcode() == X86::SUBREG_TO_REG;
352 FoundNonCMOVInst =
true;
355 if (
I.definesRegister(X86::EFLAGS,
nullptr)) {
359 CmovInstGroups.push_back(Group);
361 ++NumOfSkippedCmovGroups;
370 CmovInstGroups.push_back(Group);
372 ++NumOfSkippedCmovGroups;
375 NumOfCmovGroupCandidate += CmovInstGroups.size();
376 return !CmovInstGroups.empty();
388 divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
389 divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
392bool X86CmovConverterPass::checkForProfitableCmovCandidates(
401 static const unsigned LoopIterations = 2;
403 DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
404 enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
412 DepthMap[
nullptr] = {0, 0};
415 for (
auto &Group : CmovInstGroups)
441 for (DepthInfo &MaxDepth : LoopDepth) {
444 RegDefMaps[PhyRegType].
clear();
447 if (
MI.isDebugInstr())
449 unsigned MIDepth = 0;
450 unsigned MIDepthOpt = 0;
451 bool IsCMOV = CmovInstructions.
count(&
MI);
452 for (
auto &MO :
MI.uses()) {
454 if (!MO.isReg() || !MO.isUse())
457 auto &RDM = RegDefMaps[
Reg.isVirtual()];
459 OperandToDefMap[&MO] =
DefMI;
461 MIDepth = std::max(MIDepth,
Info.Depth);
463 MIDepthOpt = std::max(MIDepthOpt,
Info.OptDepth);
469 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(1))].OptDepth,
470 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(2))].OptDepth);
473 for (
auto &MO :
MI.operands()) {
474 if (!MO.isReg() || !MO.isDef())
477 RegDefMaps[
Reg.isVirtual()][
Reg] = &
MI;
480 unsigned Latency = TSchedModel.computeInstrLatency(&
MI);
482 MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
483 MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
488 unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
489 LoopDepth[1].Depth - LoopDepth[1].OptDepth};
521 bool WorthOptLoop =
false;
522 if (Diff[1] == Diff[0])
523 WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
524 else if (Diff[1] > Diff[0])
526 (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].
Depth - LoopDepth[0].
Depth) &&
527 (Diff[1] * 8 >= LoopDepth[1].Depth);
532 ++NumOfLoopCandidate;
545 unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
546 CmovGroups TempGroups;
548 for (
auto &Group : TempGroups) {
549 bool WorthOpGroup =
true;
550 for (
auto *
MI : Group) {
554 auto UIs =
MRI->use_instructions(
MI->defs().begin()->getReg());
556 unsigned Op = UIs.begin()->getOpcode();
557 if (
Op == X86::MOV64rm ||
Op == X86::MOV32rm) {
558 WorthOpGroup =
false;
564 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(4))].Depth;
566 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(1))].Depth,
567 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(2))].Depth);
568 if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
569 WorthOpGroup =
false;
575 CmovInstGroups.push_back(Group);
578 return !CmovInstGroups.empty();
582 if (
MI->killsRegister(X86::EFLAGS,
nullptr))
591 for (
auto I = std::next(ItrMI), E = BB->
end();
I != E; ++
I) {
592 if (
I->readsRegister(X86::EFLAGS,
nullptr))
594 if (
I->definesRegister(X86::EFLAGS,
nullptr))
600 if (Succ->isLiveIn(X86::EFLAGS))
612 "Last instruction in a CMOV group must be a CMOV instruction");
615 for (
auto I =
First->getIterator(), E =
Last->getIterator();
I != E;
I++) {
616 if (
I->isDebugInstr())
622 for (
auto *
MI : DBGInstructions)
626void X86CmovConverterPass::convertCmovInstsToBranches(
628 assert(!Group.
empty() &&
"No CMOV instructions to convert");
629 ++NumOfOptimizedCmovGroups;
687 F->insert(It, FalseMBB);
688 F->insert(It, SinkMBB);
733 auto FRIt = FalseBBRegRewriteTable.
find(FalseReg);
734 if (FRIt == FalseBBRegRewriteTable.
end())
736 FalseReg = FRIt->second;
738 FalseBBRegRewriteTable[
MI.getOperand(0).getReg()] = FalseReg;
746 "Can only handle memory-operand cmov instructions with a condition "
747 "opposite to the selected branch direction.");
775 unsigned OldDebugInstrNum =
MI.peekDebugInstrNum();
781 assert(Unfolded &&
"Should never fail to unfold a loading cmov!");
787 "Last new instruction isn't the expected CMOV!");
790 if (&*MIItBegin == &
MI)
793 if (OldDebugInstrNum)
794 NewCMOV->setDebugInstrNum(OldDebugInstrNum);
798 for (
auto *NewMI : NewMIs) {
800 FalseMBB->
insert(FalseInsertionPoint, NewMI);
802 for (
auto &MOp : NewMI->uses()) {
805 auto It = FalseBBRegRewriteTable.
find(MOp.getReg());
806 if (It == FalseBBRegRewriteTable.
end())
809 MOp.setReg(It->second);
815 MOp.setIsKill(
false);
821 FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
833 Register DestReg = MIIt->getOperand(0).getReg();
834 Register Op1Reg = MIIt->getOperand(1).getReg();
835 Register Op2Reg = MIIt->getOperand(2).getReg();
843 auto Op1Itr = RegRewriteTable.
find(Op1Reg);
844 if (Op1Itr != RegRewriteTable.
end())
845 Op1Reg = Op1Itr->second.first;
847 auto Op2Itr = RegRewriteTable.
find(Op2Reg);
848 if (Op2Itr != RegRewriteTable.
end())
849 Op2Reg = Op2Itr->second.second;
854 MIB =
BuildMI(*SinkMBB, SinkInsertionPoint,
DL,
TII->get(X86::PHI), DestReg)
865 if (
unsigned InstrNum = MIIt->peekDebugInstrNum())
869 RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
874 if (MIItBegin != MIItEnd)
875 F->getProperties().resetNoPHIs();
882 L->addBasicBlockToLoop(FalseMBB, *MLI);
883 L->addBasicBlockToLoop(SinkMBB, *MLI);
894 return new X86CmovConverterPass();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
static cl::opt< bool > ForceAll("x86-cmov-converter-force-all", cl::desc("Convert all cmovs to branches."), cl::init(false), cl::Hidden)
static bool checkEFLAGSLive(MachineInstr *MI)
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
FunctionPass class - This class is used to implement most global optimizations.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
void setDebugInstrNum(unsigned Num)
Set instruction number of this MachineInstr.
LLVM_ABI void dump() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
self_iterator getIterator()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
CondCode getCondFromCMov(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool hasSingleElement(ContainerTy &&C)
Returns true if the given container only contains a single element.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.