40#define DEBUG_TYPE "x86-vzeroupper"
44 cl::desc(
"Minimize AVX to SSE transition penalty"),
47STATISTIC(NumVZU,
"Number of vzeroupper instructions inserted");
55 bool runOnMachineFunction(MachineFunction &MF)
override;
57 MachineFunctionProperties getRequiredProperties()
const override {
58 return MachineFunctionProperties().setNoVRegs();
61 StringRef getPassName()
const override {
return "X86 vzeroupper inserter"; }
64 void processBasicBlock(MachineBasicBlock &
MBB);
66 MachineBasicBlock &
MBB);
67 void addDirtySuccessor(MachineBasicBlock &
MBB);
69 using BlockExitState =
enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
71 static const char* getBlockExitStateName(BlockExitState ST);
92 BlockExitState ExitState = PASS_THROUGH;
93 bool AddedToDirtySuccessors =
false;
96 BlockState() =
default;
99 using BlockStateMap = SmallVector<BlockState, 8>;
100 using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
102 BlockStateMap BlockStates;
103 DirtySuccessorsWorkList DirtySuccessors;
106 const TargetInstrInfo *TII;
113char VZeroUpperInserter::ID = 0;
116 return new VZeroUpperInserter();
120const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
122 case PASS_THROUGH:
return "Pass-through";
123 case EXITS_DIRTY:
return "Exits-dirty";
124 case EXITS_CLEAN:
return "Exits-clean";
133 return (
Reg >= X86::YMM0 &&
Reg <= X86::YMM15) ||
134 (
Reg >= X86::ZMM0 &&
Reg <= X86::ZMM15);
138 for (std::pair<MCRegister, Register> LI :
MRI.liveins())
146 for (
unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
150 for (
unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
173 assert(
MI.isCall() &&
"Can only be called on call instructions.");
183 MachineBasicBlock &
MBB) {
186 EverMadeChange =
true;
190void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &
MBB) {
191 if (!BlockStates[
MBB.
getNumber()].AddedToDirtySuccessors) {
193 BlockStates[
MBB.
getNumber()].AddedToDirtySuccessors =
true;
199void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &
MBB) {
202 BlockExitState CurState = PASS_THROUGH;
205 for (MachineInstr &
MI :
MBB) {
206 bool IsCall =
MI.isCall();
207 bool IsReturn =
MI.isReturn();
208 bool IsControlFlow = IsCall || IsReturn;
212 if (IsX86INTR && IsReturn)
216 if (
MI.getOpcode() == X86::VZEROALL ||
MI.getOpcode() == X86::VZEROUPPER) {
217 CurState = EXITS_CLEAN;
222 if (!IsControlFlow && CurState == EXITS_DIRTY)
228 CurState = EXITS_DIRTY;
252 if (CurState == EXITS_DIRTY) {
256 insertVZeroUpper(
MI,
MBB);
257 CurState = EXITS_CLEAN;
258 }
else if (CurState == PASS_THROUGH) {
265 CurState = EXITS_CLEAN;
270 << getBlockExitStateName(CurState) <<
'\n');
272 if (CurState == EXITS_DIRTY)
274 addDirtySuccessor(*Succ);
281bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
286 if (!
ST.hasAVX() || !
ST.insertVZEROUPPER())
288 TII =
ST.getInstrInfo();
290 EverMadeChange =
false;
298 bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
299 for (
const auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) {
302 if (!
MRI.reg_nodbg_empty(R)) {
313 "X86VZeroUpper state should be clear");
319 for (MachineBasicBlock &
MBB : MF)
320 processBasicBlock(
MBB);
324 if (FnHasLiveInYmmOrZmm)
325 addDirtySuccessor(MF.front());
330 while (!DirtySuccessors.
empty()) {
331 MachineBasicBlock &
MBB = *DirtySuccessors.
back();
337 if (BBState.FirstUnguardedCall !=
MBB.
end())
338 insertVZeroUpper(BBState.FirstUnguardedCall,
MBB);
343 if (BBState.ExitState == PASS_THROUGH) {
345 <<
" was Pass-through, is now Dirty-out.\n");
347 addDirtySuccessor(*Succ);
352 return EverMadeChange;
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const HexagonInstrInfo * TII
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool callHasRegMask(MachineInstr &MI)
Check if given call instruction has a RegMask operand.
static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI)
static bool hasYmmOrZmmReg(MachineInstr &MI)
static bool isYmmOrZmmReg(MCRegister Reg)
VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO)
static cl::opt< bool > UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, cl::desc("Minimize AVX to SSE transition penalty"), cl::init(true))
FunctionPass class - This class is used to implement most global optimizations.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Wrapper class representing physical registers. Should be passed by value.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< succ_iterator > successors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void push_back(const T &Elt)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createX86IssueVZeroUpperPass()
This pass inserts AVX vzeroupper instructions before each call to avoid transition penalty between fu...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...