40#define DEBUG_TYPE "x86-vzeroupper"
44 cl::desc(
"Minimize AVX to SSE transition penalty"),
47STATISTIC(NumVZU,
"Number of vzeroupper instructions inserted");
69 using BlockExitState =
enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
71 static const char* getBlockExitStateName(BlockExitState ST);
92 BlockExitState ExitState = PASS_THROUGH;
93 bool AddedToDirtySuccessors =
false;
96 BlockState() =
default;
102 BlockStateMap BlockStates;
103 DirtySuccessorsWorkList DirtySuccessors;
113char VZeroUpperInserter::ID = 0;
116 return new VZeroUpperInserter();
120const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
122 case PASS_THROUGH:
return "Pass-through";
123 case EXITS_DIRTY:
return "Exits-dirty";
124 case EXITS_CLEAN:
return "Exits-clean";
133 return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
134 (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
138 for (std::pair<MCRegister, Register> LI :
MRI.liveins())
146 for (
unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
150 for (
unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
173 assert(
MI.isCall() &&
"Can only be called on call instructions.");
186 EverMadeChange =
true;
191 if (!BlockStates[
MBB.
getNumber()].AddedToDirtySuccessors) {
192 DirtySuccessors.push_back(&
MBB);
193 BlockStates[
MBB.
getNumber()].AddedToDirtySuccessors =
true;
202 BlockExitState CurState = PASS_THROUGH;
206 bool IsCall =
MI.isCall();
207 bool IsReturn =
MI.isReturn();
208 bool IsControlFlow = IsCall || IsReturn;
212 if (IsX86INTR && IsReturn)
216 if (
MI.getOpcode() == X86::VZEROALL ||
MI.getOpcode() == X86::VZEROUPPER) {
217 CurState = EXITS_CLEAN;
222 if (!IsControlFlow && CurState == EXITS_DIRTY)
228 CurState = EXITS_DIRTY;
252 if (CurState == EXITS_DIRTY) {
256 insertVZeroUpper(
MI,
MBB);
257 CurState = EXITS_CLEAN;
258 }
else if (CurState == PASS_THROUGH) {
265 CurState = EXITS_CLEAN;
270 << getBlockExitStateName(CurState) <<
'\n');
272 if (CurState == EXITS_DIRTY)
274 addDirtySuccessor(*Succ);
286 if (!
ST.hasAVX() || !
ST.insertVZEROUPPER())
288 TII =
ST.getInstrInfo();
290 EverMadeChange =
false;
298 bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
299 for (
const auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) {
302 if (!
MRI.reg_nodbg_empty(R)) {
312 assert(BlockStates.empty() && DirtySuccessors.empty() &&
313 "X86VZeroUpper state should be clear");
320 processBasicBlock(
MBB);
324 if (FnHasLiveInYmmOrZmm)
325 addDirtySuccessor(MF.front());
330 while (!DirtySuccessors.empty()) {
332 DirtySuccessors.pop_back();
337 if (BBState.FirstUnguardedCall !=
MBB.
end())
338 insertVZeroUpper(BBState.FirstUnguardedCall,
MBB);
343 if (BBState.ExitState == PASS_THROUGH) {
345 <<
" was Pass-through, is now Dirty-out.\n");
347 addDirtySuccessor(*Succ);
352 return EverMadeChange;
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const HexagonInstrInfo * TII
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool callHasRegMask(MachineInstr &MI)
Check if given call instruction has a RegMask operand.
static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI)
static bool hasYmmOrZmmReg(MachineInstr &MI)
static bool isYmmOrZmmReg(MCRegister Reg)
VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO)
static cl::opt< bool > UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, cl::desc("Minimize AVX to SSE transition penalty"), cl::init(true))
FunctionPass class - This class is used to implement most global optimizations.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Wrapper class representing physical registers. Should be passed by value.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< succ_iterator > successors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ X86_INTR
x86 hardware interrupt context.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createX86IssueVZeroUpperPass()
This pass inserts AVX vzeroupper instructions before each call to avoid transition penalty between fu...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.