LLVM 21.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/LEB128.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
73 const MachineFunction &MF) const {
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
88 const MachineFunction &MF) const {
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFPImpl - Return true if the specified function should have a dedicated
94/// frame pointer register. This is true if the function has variable sized
95/// allocas or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
143// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144// value written by the PUSH from the stack. The processor tracks these marked
145// instructions internally and fast-forwards register data between matching PUSH
146// and POP instructions, without going through memory or through the training
147// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148// memory-renaming optimization can be used.
149//
150// The PPX hint is purely a performance hint. Instructions with this hint have
151// the same functional semantics as those without. PPX hints set by the
152// compiler that violate the balancing rule may turn off the PPX optimization,
153// but they will not affect program semantics.
154//
155// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156// are not considered).
157//
158// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159// GPRs at a time to/from the stack.
160static unsigned getPUSHOpcode(const X86Subtarget &ST) {
161 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
162 : X86::PUSH32r;
163}
164static unsigned getPOPOpcode(const X86Subtarget &ST) {
165 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
166 : X86::POP32r;
167}
168static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
169 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
170}
171static unsigned getPOP2Opcode(const X86Subtarget &ST) {
172 return ST.hasPPX() ? X86::POP2P : X86::POP2;
173}
174
177 MCRegister Reg = RegMask.PhysReg;
178
179 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
180 Reg == X86::AH || Reg == X86::AL)
181 return true;
182 }
183
184 return false;
185}
186
187/// Check if the flags need to be preserved before the terminators.
188/// This would be the case, if the eflags is live-in of the region
189/// composed by the terminators or live-out of that region, without
190/// being defined by a terminator.
191static bool
193 for (const MachineInstr &MI : MBB.terminators()) {
194 bool BreakNext = false;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isReg())
197 continue;
198 Register Reg = MO.getReg();
199 if (Reg != X86::EFLAGS)
200 continue;
201
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
205 if (!MO.isDef())
206 return true;
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
210 BreakNext = true;
211 }
212 // We found a definition of the eflags, no need to preserve them.
213 if (BreakNext)
214 return false;
215 }
216
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock *Succ : MBB.successors())
220 if (Succ->isLiveIn(X86::EFLAGS))
221 return true;
222
223 return false;
224}
225
226constexpr int64_t MaxSPChunk = (1LL << 31) - 1;
227
228/// emitSPUpdate - Emit a series of instructions to increment / decrement the
229/// stack pointer by a constant value.
232 const DebugLoc &DL, int64_t NumBytes,
233 bool InEpilogue) const {
234 bool isSub = NumBytes < 0;
235 uint64_t Offset = isSub ? -NumBytes : NumBytes;
238
239 if (!Uses64BitFramePtr && !isUInt<32>(Offset)) {
240 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
241 // This might be unreachable code, so don't complain now; just trap if
242 // it's reached at runtime.
243 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
244 return;
245 }
246
247 uint64_t Chunk = MaxSPChunk;
248
252 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
253
254 // It's ok to not take into account large chunks when probing, as the
255 // allocation is split in smaller chunks anyway.
256 if (EmitInlineStackProbe && !InEpilogue) {
257
258 // This pseudo-instruction is going to be expanded, potentially using a
259 // loop, by inlineStackProbe().
260 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
261 return;
262 } else if (Offset > Chunk) {
263 // Rather than emit a long series of instructions for large offsets,
264 // load the offset into a register and do one sub/add
265 unsigned Reg = 0;
266 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
267
268 if (isSub && !isEAXLiveIn(MBB))
269 Reg = Rax;
270 else
272 Uses64BitFramePtr ? 64 : 32);
273
274 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
276 if (Reg) {
278 Reg)
279 .addImm(Offset)
280 .setMIFlag(Flag);
281 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
283 .addReg(Reg);
284 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
285 return;
286 } else if (Offset > 8 * Chunk) {
287 // If we would need more than 8 add or sub instructions (a >16GB stack
288 // frame), it's worth spilling RAX to materialize this immediate.
289 // pushq %rax
290 // movabsq +-$Offset+-SlotSize, %rax
291 // addq %rsp, %rax
292 // xchg %rax, (%rsp)
293 // movq (%rsp), %rsp
294 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
295 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
297 .setMIFlag(Flag);
298 // Subtract is not commutative, so negate the offset and always use add.
299 // Subtract 8 less and add 8 more to account for the PUSH we just did.
300 if (isSub)
301 Offset = -(Offset - SlotSize);
302 else
305 Rax)
306 .addImm(Offset)
307 .setMIFlag(Flag);
308 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
309 .addReg(Rax)
311 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
312 // Exchange the new SP in RAX with the top of the stack.
314 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
315 StackPtr, false, 0);
316 // Load new SP from the top of the stack into RSP.
317 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
318 StackPtr, false, 0);
319 return;
320 }
321 }
322
323 while (Offset) {
324 uint64_t ThisVal = std::min(Offset, Chunk);
325 if (ThisVal == SlotSize) {
326 // Use push / pop for slot sized adjustments as a size optimization. We
327 // need to find a dead register when using pop.
328 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
330 if (Reg) {
331 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
332 : (Is64Bit ? X86::POP64r : X86::POP32r);
333 BuildMI(MBB, MBBI, DL, TII.get(Opc))
334 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
335 .setMIFlag(Flag);
336 Offset -= ThisVal;
337 continue;
338 }
339 }
340
341 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
342 .setMIFlag(Flag);
343
344 Offset -= ThisVal;
345 }
346}
347
348MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
350 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
351 assert(Offset != 0 && "zero offset stack adjustment requested");
352
353 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
354 // is tricky.
355 bool UseLEA;
356 if (!InEpilogue) {
357 // Check if inserting the prologue at the beginning
358 // of MBB would require to use LEA operations.
359 // We need to use LEA operations if EFLAGS is live in, because
360 // it means an instruction will read it before it gets defined.
361 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
362 } else {
363 // If we can use LEA for SP but we shouldn't, check that none
364 // of the terminators uses the eflags. Otherwise we will insert
365 // a ADD that will redefine the eflags and break the condition.
366 // Alternatively, we could move the ADD, but this may not be possible
367 // and is an optimization anyway.
369 if (UseLEA && !STI.useLeaForSP())
371 // If that assert breaks, that means we do not do the right thing
372 // in canUseAsEpilogue.
374 "We shouldn't have allowed this insertion point");
375 }
376
378 if (UseLEA) {
381 StackPtr),
382 StackPtr, false, Offset);
383 } else {
384 bool IsSub = Offset < 0;
385 uint64_t AbsOffset = IsSub ? -Offset : Offset;
386 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
388 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
390 .addImm(AbsOffset);
391 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
392 }
393 return MI;
394}
395
396template <typename FoundT, typename CalcT>
397int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
399 FoundT FoundStackAdjust,
400 CalcT CalcNewOffset,
401 bool doMergeWithPrevious) const {
402 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
403 (!doMergeWithPrevious && MBBI == MBB.end()))
404 return CalcNewOffset(0);
405
406 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
407
409 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
410 // instruction, and that there are no DBG_VALUE or other instructions between
411 // ADD/SUB/LEA and its corresponding CFI instruction.
412 /* TODO: Add support for the case where there are multiple CFI instructions
413 below the ADD/SUB/LEA, e.g.:
414 ...
415 add
416 cfi_def_cfa_offset
417 cfi_offset
418 ...
419 */
420 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
421 PI = std::prev(PI);
422
423 int64_t Offset = 0;
424 for (;;) {
425 unsigned Opc = PI->getOpcode();
426
427 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
428 PI->getOperand(0).getReg() == StackPtr) {
429 assert(PI->getOperand(1).getReg() == StackPtr);
430 Offset = PI->getOperand(2).getImm();
431 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
432 PI->getOperand(0).getReg() == StackPtr &&
433 PI->getOperand(1).getReg() == StackPtr &&
434 PI->getOperand(2).getImm() == 1 &&
435 PI->getOperand(3).getReg() == X86::NoRegister &&
436 PI->getOperand(5).getReg() == X86::NoRegister) {
437 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
438 Offset = PI->getOperand(4).getImm();
439 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
440 PI->getOperand(0).getReg() == StackPtr) {
441 assert(PI->getOperand(1).getReg() == StackPtr);
442 Offset = -PI->getOperand(2).getImm();
443 } else
444 return CalcNewOffset(0);
445
446 FoundStackAdjust(PI, Offset);
447 if (std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
448 break;
449
450 if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
451 return CalcNewOffset(0);
452
453 PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
454 }
455
456 PI = MBB.erase(PI);
457 if (PI != MBB.end() && PI->isCFIInstruction()) {
458 auto CIs = MBB.getParent()->getFrameInstructions();
459 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
462 PI = MBB.erase(PI);
463 }
464 if (!doMergeWithPrevious)
466
467 return CalcNewOffset(Offset);
468}
469
472 int64_t AddOffset,
473 bool doMergeWithPrevious) const {
474 return mergeSPUpdates(
475 MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },
476 doMergeWithPrevious);
477}
478
481 const DebugLoc &DL,
482 const MCCFIInstruction &CFIInst,
483 MachineInstr::MIFlag Flag) const {
485 unsigned CFIIndex = MF.addFrameInst(CFIInst);
486
488 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
489
490 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
491 .addCFIIndex(CFIIndex)
492 .setMIFlag(Flag);
493}
494
495/// Emits Dwarf Info specifying offsets of callee saved registers and
496/// frame pointer. This is called only when basic block sections are enabled.
500 if (!hasFP(MF)) {
502 return;
503 }
506 const Register MachineFramePtr =
508 : FramePtr;
509 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
510 // Offset = space for return address + size of the frame pointer itself.
511 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
513 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
515}
516
519 const DebugLoc &DL, bool IsPrologue) const {
521 MachineFrameInfo &MFI = MF.getFrameInfo();
524
525 // Add callee saved registers to move list.
526 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
527
528 // Calculate offsets.
529 for (const CalleeSavedInfo &I : CSI) {
530 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
531 Register Reg = I.getReg();
532 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
533
534 if (IsPrologue) {
535 if (X86FI->getStackPtrSaveMI()) {
536 // +2*SlotSize because there is return address and ebp at the bottom
537 // of the stack.
538 // | retaddr |
539 // | ebp |
540 // | |<--ebp
541 Offset += 2 * SlotSize;
542 SmallString<64> CfaExpr;
543 CfaExpr.push_back(dwarf::DW_CFA_expression);
544 uint8_t buffer[16];
545 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
546 CfaExpr.push_back(2);
548 const Register MachineFramePtr =
551 : FramePtr;
552 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
553 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
554 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
556 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
558 } else {
560 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
561 }
562 } else {
564 MCCFIInstruction::createRestore(nullptr, DwarfReg));
565 }
566 }
567 if (auto *MI = X86FI->getStackPtrSaveMI()) {
568 int FI = MI->getOperand(1).getIndex();
569 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
570 SmallString<64> CfaExpr;
572 const Register MachineFramePtr =
575 : FramePtr;
576 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
577 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
578 uint8_t buffer[16];
579 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
580 CfaExpr.push_back(dwarf::DW_OP_deref);
581
582 SmallString<64> DefCfaExpr;
583 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
584 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
585 DefCfaExpr.append(CfaExpr.str());
586 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
588 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
590 }
591}
592
593void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
594 MachineBasicBlock &MBB) const {
595 const MachineFunction &MF = *MBB.getParent();
596
597 // Insertion point.
599
600 // Fake a debug loc.
601 DebugLoc DL;
602 if (MBBI != MBB.end())
603 DL = MBBI->getDebugLoc();
604
605 // Zero out FP stack if referenced. Do this outside of the loop below so that
606 // it's done only once.
607 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
608 for (MCRegister Reg : RegsToZero.set_bits()) {
609 if (!X86::RFP80RegClass.contains(Reg))
610 continue;
611
612 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
613 for (unsigned i = 0; i != NumFPRegs; ++i)
614 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
615
616 for (unsigned i = 0; i != NumFPRegs; ++i)
617 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
618 break;
619 }
620
621 // For GPRs, we only care to clear out the 32-bit register.
622 BitVector GPRsToZero(TRI->getNumRegs());
623 for (MCRegister Reg : RegsToZero.set_bits())
624 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
625 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
626 RegsToZero.reset(Reg);
627 }
628
629 // Zero out the GPRs first.
630 for (MCRegister Reg : GPRsToZero.set_bits())
632
633 // Zero out the remaining registers.
634 for (MCRegister Reg : RegsToZero.set_bits())
636}
637
640 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
641 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
644 if (InProlog) {
645 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
646 .addImm(0 /* no explicit stack size */);
647 } else {
648 emitStackProbeInline(MF, MBB, MBBI, DL, false);
649 }
650 } else {
651 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
652 }
653}
654
656 return STI.isOSWindows() && !STI.isTargetWin64();
657}
658
660 MachineBasicBlock &PrologMBB) const {
661 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
662 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
663 });
664 if (Where != PrologMBB.end()) {
665 DebugLoc DL = PrologMBB.findDebugLoc(Where);
666 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
667 Where->eraseFromParent();
668 }
669}
670
671void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
674 const DebugLoc &DL,
675 bool InProlog) const {
677 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
678 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
679 else
680 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
681}
682
683void X86FrameLowering::emitStackProbeInlineGeneric(
685 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
686 MachineInstr &AllocWithProbe = *MBBI;
687 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
688
691 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
692 "different expansion expected for CoreCLR 64 bit");
693
694 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
695 uint64_t ProbeChunk = StackProbeSize * 8;
696
697 uint64_t MaxAlign =
698 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
699
700 // Synthesize a loop or unroll it, depending on the number of iterations.
701 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
702 // between the unaligned rsp and current rsp.
703 if (Offset > ProbeChunk) {
704 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
705 MaxAlign % StackProbeSize);
706 } else {
707 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
708 MaxAlign % StackProbeSize);
709 }
710}
711
712void X86FrameLowering::emitStackProbeInlineGenericBlock(
715 uint64_t AlignOffset) const {
716
717 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
718 const bool HasFP = hasFP(MF);
721 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
722 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
723
724 uint64_t CurrentOffset = 0;
725
726 assert(AlignOffset < StackProbeSize);
727
728 // If the offset is so small it fits within a page, there's nothing to do.
729 if (StackProbeSize < Offset + AlignOffset) {
730
731 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
732 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
734 if (!HasFP && NeedsDwarfCFI) {
735 BuildCFI(
736 MBB, MBBI, DL,
737 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
738 }
739
740 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
742 StackPtr, false, 0)
743 .addImm(0)
745 NumFrameExtraProbe++;
746 CurrentOffset = StackProbeSize - AlignOffset;
747 }
748
749 // For the next N - 1 pages, just probe. I tried to take advantage of
750 // natural probes but it implies much more logic and there was very few
751 // interesting natural probes to interleave.
752 while (CurrentOffset + StackProbeSize < Offset) {
753 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
755
756 if (!HasFP && NeedsDwarfCFI) {
757 BuildCFI(
758 MBB, MBBI, DL,
759 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
760 }
761 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
763 StackPtr, false, 0)
764 .addImm(0)
766 NumFrameExtraProbe++;
767 CurrentOffset += StackProbeSize;
768 }
769
770 // No need to probe the tail, it is smaller than a Page.
771 uint64_t ChunkSize = Offset - CurrentOffset;
772 if (ChunkSize == SlotSize) {
773 // Use push for slot sized adjustments as a size optimization,
774 // like emitSPUpdate does when not probing.
775 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
776 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
777 BuildMI(MBB, MBBI, DL, TII.get(Opc))
780 } else {
781 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
783 }
784 // No need to adjust Dwarf CFA offset here, the last position of the stack has
785 // been defined
786}
787
788void X86FrameLowering::emitStackProbeInlineGenericLoop(
791 uint64_t AlignOffset) const {
792 assert(Offset && "null offset");
793
794 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
796 "Inline stack probe loop will clobber live EFLAGS.");
797
798 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
799 const bool HasFP = hasFP(MF);
802 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
803 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
804
805 if (AlignOffset) {
806 if (AlignOffset < StackProbeSize) {
807 // Perform a first smaller allocation followed by a probe.
808 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
810
811 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
813 StackPtr, false, 0)
814 .addImm(0)
816 NumFrameExtraProbe++;
817 Offset -= AlignOffset;
818 }
819 }
820
821 // Synthesize a loop
822 NumFrameLoopProbe++;
823 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
824
825 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
826 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
827
829 MF.insert(MBBIter, testMBB);
830 MF.insert(MBBIter, tailMBB);
831
832 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
833 : Is64Bit ? X86::R11D
834 : X86::EAX;
835
836 // save loop bound
837 {
838 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
839
840 // Can we calculate the loop bound using SUB with a 32-bit immediate?
841 // Note that the immediate gets sign-extended when used with a 64-bit
842 // register, so in that case we only have 31 bits to work with.
843 bool canUseSub =
844 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
845
846 if (canUseSub) {
847 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
848
849 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
852 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
853 .addReg(FinalStackProbed)
854 .addImm(BoundOffset)
856 } else if (Uses64BitFramePtr) {
857 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
858 .addImm(-BoundOffset)
860 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
861 .addReg(FinalStackProbed)
864 } else {
865 llvm_unreachable("Offset too large for 32-bit stack pointer");
866 }
867
868 // while in the loop, use loop-invariant reg for CFI,
869 // instead of the stack pointer, which changes during the loop
870 if (!HasFP && NeedsDwarfCFI) {
871 // x32 uses the same DWARF register numbers as x86-64,
872 // so there isn't a register number for r11d, we must use r11 instead
873 const Register DwarfFinalStackProbed =
875 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
876 : FinalStackProbed;
877
880 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
882 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
883 }
884 }
885
886 // allocate a page
887 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
888 /*InEpilogue=*/false)
890
891 // touch the page
892 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
894 StackPtr, false, 0)
895 .addImm(0)
897
898 // cmp with stack pointer bound
899 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
901 .addReg(FinalStackProbed)
903
904 // jump
905 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
906 .addMBB(testMBB)
909 testMBB->addSuccessor(testMBB);
910 testMBB->addSuccessor(tailMBB);
911
912 // BB management
913 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
915 MBB.addSuccessor(testMBB);
916
917 // handle tail
918 const uint64_t TailOffset = Offset % StackProbeSize;
919 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
920 if (TailOffset) {
921 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
922 /*InEpilogue=*/false)
924 }
925
926 // after the loop, switch back to stack pointer for CFI
927 if (!HasFP && NeedsDwarfCFI) {
928 // x32 uses the same DWARF register numbers as x86-64,
929 // so there isn't a register number for esp, we must use rsp instead
930 const Register DwarfStackPtr =
934
935 BuildCFI(*tailMBB, TailMBBIter, DL,
937 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
938 }
939
940 // Update Live In information
941 fullyRecomputeLiveIns({tailMBB, testMBB});
942}
943
944void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
946 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
948 assert(STI.is64Bit() && "different expansion needed for 32 bit");
949 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
951 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
952
953 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
955 "Inline stack probe loop will clobber live EFLAGS.");
956
957 // RAX contains the number of bytes of desired stack adjustment.
958 // The handling here assumes this value has already been updated so as to
959 // maintain stack alignment.
960 //
961 // We need to exit with RSP modified by this amount and execute suitable
962 // page touches to notify the OS that we're growing the stack responsibly.
963 // All stack probing must be done without modifying RSP.
964 //
965 // MBB:
966 // SizeReg = RAX;
967 // ZeroReg = 0
968 // CopyReg = RSP
969 // Flags, TestReg = CopyReg - SizeReg
970 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
971 // LimitReg = gs magic thread env access
972 // if FinalReg >= LimitReg goto ContinueMBB
973 // RoundBB:
974 // RoundReg = page address of FinalReg
975 // LoopMBB:
976 // LoopReg = PHI(LimitReg,ProbeReg)
977 // ProbeReg = LoopReg - PageSize
978 // [ProbeReg] = 0
979 // if (ProbeReg > RoundReg) goto LoopMBB
980 // ContinueMBB:
981 // RSP = RSP - RAX
982 // [rest of original MBB]
983
984 // Set up the new basic blocks
985 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
986 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
987 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
988
989 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
990 MF.insert(MBBIter, RoundMBB);
991 MF.insert(MBBIter, LoopMBB);
992 MF.insert(MBBIter, ContinueMBB);
993
994 // Split MBB and move the tail portion down to ContinueMBB.
995 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
996 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
998
999 // Some useful constants
1000 const int64_t ThreadEnvironmentStackLimit = 0x10;
1001 const int64_t PageSize = 0x1000;
1002 const int64_t PageMask = ~(PageSize - 1);
1003
1004 // Registers we need. For the normal case we use virtual
1005 // registers. For the prolog expansion we use RAX, RCX and RDX.
1007 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
1008 const Register
1009 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
1010 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1011 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1012 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1013 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1014 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1015 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1016 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1017 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
1018
1019 // SP-relative offsets where we can save RCX and RDX.
1020 int64_t RCXShadowSlot = 0;
1021 int64_t RDXShadowSlot = 0;
1022
1023 // If inlining in the prolog, save RCX and RDX.
1024 if (InProlog) {
1025 // Compute the offsets. We need to account for things already
1026 // pushed onto the stack at this point: return address, frame
1027 // pointer (if used), and callee saves.
1029 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1030 const bool HasFP = hasFP(MF);
1031
1032 // Check if we need to spill RCX and/or RDX.
1033 // Here we assume that no earlier prologue instruction changes RCX and/or
1034 // RDX, so checking the block live-ins is enough.
1035 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1036 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1037 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1038 // Assign the initial slot to both registers, then change RDX's slot if both
1039 // need to be spilled.
1040 if (IsRCXLiveIn)
1041 RCXShadowSlot = InitSlot;
1042 if (IsRDXLiveIn)
1043 RDXShadowSlot = InitSlot;
1044 if (IsRDXLiveIn && IsRCXLiveIn)
1045 RDXShadowSlot += 8;
1046 // Emit the saves if needed.
1047 if (IsRCXLiveIn)
1048 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1049 RCXShadowSlot)
1050 .addReg(X86::RCX);
1051 if (IsRDXLiveIn)
1052 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1053 RDXShadowSlot)
1054 .addReg(X86::RDX);
1055 } else {
1056 // Not in the prolog. Copy RAX to a virtual reg.
1057 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1058 }
1059
1060 // Add code to MBB to check for overflow and set the new target stack pointer
1061 // to zero if so.
1062 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1063 .addReg(ZeroReg, RegState::Undef)
1064 .addReg(ZeroReg, RegState::Undef);
1065 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1066 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1067 .addReg(CopyReg)
1068 .addReg(SizeReg);
1069 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1070 .addReg(TestReg)
1071 .addReg(ZeroReg)
1073
1074 // FinalReg now holds final stack pointer value, or zero if
1075 // allocation would overflow. Compare against the current stack
1076 // limit from the thread environment block. Note this limit is the
1077 // lowest touched page on the stack, not the point at which the OS
1078 // will cause an overflow exception, so this is just an optimization
1079 // to avoid unnecessarily touching pages that are below the current
1080 // SP but already committed to the stack by the OS.
1081 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1082 .addReg(0)
1083 .addImm(1)
1084 .addReg(0)
1085 .addImm(ThreadEnvironmentStackLimit)
1086 .addReg(X86::GS);
1087 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1088 // Jump if the desired stack pointer is at or above the stack limit.
1089 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1090 .addMBB(ContinueMBB)
1092
1093 // Add code to roundMBB to round the final stack pointer to a page boundary.
1094 if (InProlog)
1095 RoundMBB->addLiveIn(FinalReg);
1096 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1097 .addReg(FinalReg)
1098 .addImm(PageMask);
1099 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1100
1101 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1102 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1103 // and probe until we reach RoundedReg.
1104 if (!InProlog) {
1105 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1106 .addReg(LimitReg)
1107 .addMBB(RoundMBB)
1108 .addReg(ProbeReg)
1109 .addMBB(LoopMBB);
1110 }
1111
1112 if (InProlog)
1113 LoopMBB->addLiveIn(JoinReg);
1114 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1115 false, -PageSize);
1116
1117 // Probe by storing a byte onto the stack.
1118 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1119 .addReg(ProbeReg)
1120 .addImm(1)
1121 .addReg(0)
1122 .addImm(0)
1123 .addReg(0)
1124 .addImm(0);
1125
1126 if (InProlog)
1127 LoopMBB->addLiveIn(RoundedReg);
1128 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1129 .addReg(RoundedReg)
1130 .addReg(ProbeReg);
1131 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1132 .addMBB(LoopMBB)
1134
1135 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1136
1137 // If in prolog, restore RDX and RCX.
1138 if (InProlog) {
1139 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1140 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1141 TII.get(X86::MOV64rm), X86::RCX),
1142 X86::RSP, false, RCXShadowSlot);
1143 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1144 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1145 TII.get(X86::MOV64rm), X86::RDX),
1146 X86::RSP, false, RDXShadowSlot);
1147 }
1148
1149 // Now that the probing is done, add code to continueMBB to update
1150 // the stack pointer for real.
1151 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1152 .addReg(X86::RSP)
1153 .addReg(SizeReg);
1154
1155 // Add the control flow edges we need.
1156 MBB.addSuccessor(ContinueMBB);
1157 MBB.addSuccessor(RoundMBB);
1158 RoundMBB->addSuccessor(LoopMBB);
1159 LoopMBB->addSuccessor(ContinueMBB);
1160 LoopMBB->addSuccessor(LoopMBB);
1161
1162 if (InProlog) {
1163 LivePhysRegs LiveRegs;
1164 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1165 }
1166
1167 // Mark all the instructions added to the prolog as frame setup.
1168 if (InProlog) {
1169 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1170 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1171 }
1172 for (MachineInstr &MI : *RoundMBB) {
1174 }
1175 for (MachineInstr &MI : *LoopMBB) {
1177 }
1178 for (MachineInstr &MI :
1179 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1181 }
1182 }
1183}
1184
1185void X86FrameLowering::emitStackProbeCall(
1187 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1188 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1189 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1190
1191 // FIXME: Add indirect thunk support and remove this.
1192 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1193 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1194 "code model and indirect thunks not yet implemented.");
1195
1196 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1198 "Stack probe calls will clobber live EFLAGS.");
1199
1200 unsigned CallOp;
1201 if (Is64Bit)
1202 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1203 else
1204 CallOp = X86::CALLpcrel32;
1205
1207
1209 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1210
1211 // All current stack probes take AX and SP as input, clobber flags, and
1212 // preserve all registers. x86_64 probes leave RSP unmodified.
1214 // For the large code model, we have to call through a register. Use R11,
1215 // as it is scratch in all supported calling conventions.
1216 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1218 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1219 } else {
1220 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1222 }
1223
1224 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1225 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1231
1232 MachineInstr *ModInst = CI;
1233 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1234 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1235 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1236 // themselves. They also does not clobber %rax so we can reuse it when
1237 // adjusting %rsp.
1238 // All other platforms do not specify a particular ABI for the stack probe
1239 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1240 ModInst =
1242 .addReg(SP)
1243 .addReg(AX);
1244 }
1245
1246 // DebugInfo variable locations -- if there's an instruction number for the
1247 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1248 // modifies SP.
1249 if (InstrNum) {
1250 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1251 // Label destination operand of the subtract.
1252 MF.makeDebugValueSubstitution(*InstrNum,
1253 {ModInst->getDebugInstrNum(), 0});
1254 } else {
1255 // Label the call. The operand number is the penultimate operand, zero
1256 // based.
1257 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1259 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1260 }
1261 }
1262
1263 if (InProlog) {
1264 // Apply the frame setup flag to all inserted instrs.
1265 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1266 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1267 }
1268}
1269
1270static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1271 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1272 // and might require smaller successive adjustments.
1273 const uint64_t Win64MaxSEHOffset = 128;
1274 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1275 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1276 return SEHFrameOffset & -16;
1277}
1278
1279// If we're forcing a stack realignment we can't rely on just the frame
1280// info, we need to know the ABI stack alignment as well in case we
1281// have a call out. Otherwise just make sure we have some alignment - we'll
1282// go with the minimum SlotSize.
1284X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1285 const MachineFrameInfo &MFI = MF.getFrameInfo();
1286 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1287 Align StackAlign = getStackAlign();
1288 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1289 if (HasRealign) {
1290 if (MFI.hasCalls())
1291 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1292 else if (MaxAlign < SlotSize)
1293 MaxAlign = Align(SlotSize);
1294 }
1295
1297 if (HasRealign)
1298 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1299 else
1300 MaxAlign = Align(16);
1301 }
1302 return MaxAlign.value();
1303}
1304
1305void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1307 const DebugLoc &DL, unsigned Reg,
1308 uint64_t MaxAlign) const {
1309 uint64_t Val = -MaxAlign;
1310 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1311
1312 MachineFunction &MF = *MBB.getParent();
1314 const X86TargetLowering &TLI = *STI.getTargetLowering();
1315 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1316 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1317
1318 // We want to make sure that (in worst case) less than StackProbeSize bytes
1319 // are not probed after the AND. This assumption is used in
1320 // emitStackProbeInlineGeneric.
1321 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1322 {
1323 NumFrameLoopProbe++;
1324 MachineBasicBlock *entryMBB =
1326 MachineBasicBlock *headMBB =
1328 MachineBasicBlock *bodyMBB =
1330 MachineBasicBlock *footMBB =
1332
1334 MF.insert(MBBIter, entryMBB);
1335 MF.insert(MBBIter, headMBB);
1336 MF.insert(MBBIter, bodyMBB);
1337 MF.insert(MBBIter, footMBB);
1338 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1339 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1340 : Is64Bit ? X86::R11D
1341 : X86::EAX;
1342
1343 // Setup entry block
1344 {
1345
1346 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1347 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1350 MachineInstr *MI =
1351 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1352 .addReg(FinalStackProbed)
1353 .addImm(Val)
1355
1356 // The EFLAGS implicit def is dead.
1357 MI->getOperand(3).setIsDead();
1358
1359 BuildMI(entryMBB, DL,
1360 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1361 .addReg(FinalStackProbed)
1364 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1365 .addMBB(&MBB)
1368 entryMBB->addSuccessor(headMBB);
1369 entryMBB->addSuccessor(&MBB);
1370 }
1371
1372 // Loop entry block
1373
1374 {
1375 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1376 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1378 .addImm(StackProbeSize)
1380
1381 BuildMI(headMBB, DL,
1382 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1384 .addReg(FinalStackProbed)
1386
1387 // jump to the footer if StackPtr < FinalStackProbed
1388 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1389 .addMBB(footMBB)
1392
1393 headMBB->addSuccessor(bodyMBB);
1394 headMBB->addSuccessor(footMBB);
1395 }
1396
1397 // setup loop body
1398 {
1399 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1401 StackPtr, false, 0)
1402 .addImm(0)
1404
1405 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1406 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1408 .addImm(StackProbeSize)
1410
1411 // cmp with stack pointer bound
1412 BuildMI(bodyMBB, DL,
1413 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1414 .addReg(FinalStackProbed)
1417
1418 // jump back while FinalStackProbed < StackPtr
1419 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1420 .addMBB(bodyMBB)
1423 bodyMBB->addSuccessor(bodyMBB);
1424 bodyMBB->addSuccessor(footMBB);
1425 }
1426
1427 // setup loop footer
1428 {
1429 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1430 .addReg(FinalStackProbed)
1432 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1434 StackPtr, false, 0)
1435 .addImm(0)
1437 footMBB->addSuccessor(&MBB);
1438 }
1439
1440 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1441 }
1442 } else {
1443 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1444 .addReg(Reg)
1445 .addImm(Val)
1447
1448 // The EFLAGS implicit def is dead.
1449 MI->getOperand(3).setIsDead();
1450 }
1451}
1452
1454 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1455 // clobbered by any interrupt handler.
1456 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1457 "MF used frame lowering for wrong subtarget");
1458 const Function &Fn = MF.getFunction();
1459 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1460 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1461}
1462
1463/// Return true if we need to use the restricted Windows x64 prologue and
1464/// epilogue code patterns that can be described with WinCFI (.seh_*
1465/// directives).
1466bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1467 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1468}
1469
1470bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1471 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1472}
1473
1474/// Return true if an opcode is part of the REP group of instructions
1475static bool isOpcodeRep(unsigned Opcode) {
1476 switch (Opcode) {
1477 case X86::REPNE_PREFIX:
1478 case X86::REP_MOVSB_32:
1479 case X86::REP_MOVSB_64:
1480 case X86::REP_MOVSD_32:
1481 case X86::REP_MOVSD_64:
1482 case X86::REP_MOVSQ_32:
1483 case X86::REP_MOVSQ_64:
1484 case X86::REP_MOVSW_32:
1485 case X86::REP_MOVSW_64:
1486 case X86::REP_PREFIX:
1487 case X86::REP_STOSB_32:
1488 case X86::REP_STOSB_64:
1489 case X86::REP_STOSD_32:
1490 case X86::REP_STOSD_64:
1491 case X86::REP_STOSQ_32:
1492 case X86::REP_STOSQ_64:
1493 case X86::REP_STOSW_32:
1494 case X86::REP_STOSW_64:
1495 return true;
1496 default:
1497 break;
1498 }
1499 return false;
1500}
1501
1502/// emitPrologue - Push callee-saved registers onto the stack, which
1503/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1504/// space for local variables. Also emit labels used by the exception handler to
1505/// generate the exception handling frames.
1506
1507/*
1508 Here's a gist of what gets emitted:
1509
1510 ; Establish frame pointer, if needed
1511 [if needs FP]
1512 push %rbp
1513 .cfi_def_cfa_offset 16
1514 .cfi_offset %rbp, -16
1515 .seh_pushreg %rpb
1516 mov %rsp, %rbp
1517 .cfi_def_cfa_register %rbp
1518
1519 ; Spill general-purpose registers
1520 [for all callee-saved GPRs]
1521 pushq %<reg>
1522 [if not needs FP]
1523 .cfi_def_cfa_offset (offset from RETADDR)
1524 .seh_pushreg %<reg>
1525
1526 ; If the required stack alignment > default stack alignment
1527 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1528 ; of unknown size in the stack frame.
1529 [if stack needs re-alignment]
1530 and $MASK, %rsp
1531
1532 ; Allocate space for locals
1533 [if target is Windows and allocated space > 4096 bytes]
1534 ; Windows needs special care for allocations larger
1535 ; than one page.
1536 mov $NNN, %rax
1537 call ___chkstk_ms/___chkstk
1538 sub %rax, %rsp
1539 [else]
1540 sub $NNN, %rsp
1541
1542 [if needs FP]
1543 .seh_stackalloc (size of XMM spill slots)
1544 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1545 [else]
1546 .seh_stackalloc NNN
1547
1548 ; Spill XMMs
1549 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1550 ; they may get spilled on any platform, if the current function
1551 ; calls @llvm.eh.unwind.init
1552 [if needs FP]
1553 [for all callee-saved XMM registers]
1554 movaps %<xmm reg>, -MMM(%rbp)
1555 [for all callee-saved XMM registers]
1556 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1557 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1558 [else]
1559 [for all callee-saved XMM registers]
1560 movaps %<xmm reg>, KKK(%rsp)
1561 [for all callee-saved XMM registers]
1562 .seh_savexmm %<xmm reg>, KKK
1563
1564 .seh_endprologue
1565
1566 [if needs base pointer]
1567 mov %rsp, %rbx
1568 [if needs to restore base pointer]
1569 mov %rsp, -MMM(%rbp)
1570
1571 ; Emit CFI info
1572 [if needs FP]
1573 [for all callee-saved registers]
1574 .cfi_offset %<reg>, (offset from %rbp)
1575 [else]
1576 .cfi_def_cfa_offset (offset from RETADDR)
1577 [for all callee-saved registers]
1578 .cfi_offset %<reg>, (offset from %rsp)
1579
1580 Notes:
1581 - .seh directives are emitted only for Windows 64 ABI
1582 - .cv_fpo directives are emitted on win32 when emitting CodeView
1583 - .cfi directives are emitted for all other ABIs
1584 - for 32-bit code, substitute %e?? registers for %r??
1585*/
1586
1588 MachineBasicBlock &MBB) const {
1589 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1590 "MF used frame lowering for wrong subtarget");
1592 MachineFrameInfo &MFI = MF.getFrameInfo();
1593 const Function &Fn = MF.getFunction();
1595 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1596 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1597 bool IsFunclet = MBB.isEHFuncletEntry();
1599 if (Fn.hasPersonalityFn())
1600 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1601 bool FnHasClrFunclet =
1602 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1603 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1604 bool HasFP = hasFP(MF);
1605 bool IsWin64Prologue = isWin64Prologue(MF);
1606 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1607 // FIXME: Emit FPO data for EH funclets.
1608 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1610 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1611 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1613 const Register MachineFramePtr =
1615 : FramePtr;
1616 Register BasePtr = TRI->getBaseRegister();
1617 bool HasWinCFI = false;
1618
1619 // Debug location must be unknown since the first debug location is used
1620 // to determine the end of the prologue.
1621 DebugLoc DL;
1622 Register ArgBaseReg;
1623
1624 // Emit extra prolog for argument stack slot reference.
1625 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1626 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1627 // Creat extra prolog for stack realignment.
1628 ArgBaseReg = MI->getOperand(0).getReg();
1629 // leal 4(%esp), %basereg
1630 // .cfi_def_cfa %basereg, 0
1631 // andl $-128, %esp
1632 // pushl -4(%basereg)
1633 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1634 ArgBaseReg)
1636 .addImm(1)
1637 .addUse(X86::NoRegister)
1639 .addUse(X86::NoRegister)
1641 if (NeedsDwarfCFI) {
1642 // .cfi_def_cfa %basereg, 0
1643 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1644 BuildCFI(MBB, MBBI, DL,
1645 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1647 }
1648 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1649 int64_t Offset = -(int64_t)SlotSize;
1650 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1651 .addReg(ArgBaseReg)
1652 .addImm(1)
1653 .addReg(X86::NoRegister)
1654 .addImm(Offset)
1655 .addReg(X86::NoRegister)
1657 }
1658
1659 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1660 // tail call.
1661 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1662 if (TailCallArgReserveSize && IsWin64Prologue)
1663 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1664
1665 const bool EmitStackProbeCall =
1667 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1668
1669 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1673 // The special symbol below is absolute and has a *value* suitable to be
1674 // combined with the frame pointer directly.
1675 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1676 .addUse(MachineFramePtr)
1677 .addUse(X86::RIP)
1678 .addImm(1)
1679 .addUse(X86::NoRegister)
1680 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1682 .addUse(X86::NoRegister);
1683 break;
1684 }
1685 [[fallthrough]];
1686
1688 assert(
1689 !IsWin64Prologue &&
1690 "win64 prologue does not set the bit 60 in the saved frame pointer");
1691 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1692 .addUse(MachineFramePtr)
1693 .addImm(60)
1695 break;
1696
1698 break;
1699 }
1700 }
1701
1702 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1703 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1704 // stack alignment.
1706 Fn.arg_size() == 2) {
1707 StackSize += 8;
1708 MFI.setStackSize(StackSize);
1709
1710 // Update the stack pointer by pushing a register. This is the instruction
1711 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1712 // Hard-coding the update to a push avoids emitting a second
1713 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1714 // probing isn't needed anyways for an 8-byte update.
1715 // Pushing a register leaves us in a similar situation to a regular
1716 // function call where we know that the address at (rsp-8) is writeable.
1717 // That way we avoid any off-by-ones with stack probing for additional
1718 // stack pointer updates later on.
1719 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1720 .addReg(X86::RAX, RegState::Undef)
1722 }
1723
1724 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1725 // function, and use up to 128 bytes of stack space, don't have a frame
1726 // pointer, calls, or dynamic alloca then we do not need to adjust the
1727 // stack pointer (we fit in the Red Zone). We also check that we don't
1728 // push and pop from the stack.
1729 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1730 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1731 !MFI.adjustsStack() && // No calls.
1732 !EmitStackProbeCall && // No stack probes.
1733 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1734 !MF.shouldSplitStack()) { // Regular stack
1735 uint64_t MinSize =
1737 if (HasFP)
1738 MinSize += SlotSize;
1739 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1740 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1741 MFI.setStackSize(StackSize);
1742 }
1743
1744 // Insert stack pointer adjustment for later moving of return addr. Only
1745 // applies to tail call optimized functions where the callee argument stack
1746 // size is bigger than the callers.
1747 if (TailCallArgReserveSize != 0) {
1748 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1749 /*InEpilogue=*/false)
1751 }
1752
1753 // Mapping for machine moves:
1754 //
1755 // DST: VirtualFP AND
1756 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1757 // ELSE => DW_CFA_def_cfa
1758 //
1759 // SRC: VirtualFP AND
1760 // DST: Register => DW_CFA_def_cfa_register
1761 //
1762 // ELSE
1763 // OFFSET < 0 => DW_CFA_offset_extended_sf
1764 // REG < 64 => DW_CFA_offset + Reg
1765 // ELSE => DW_CFA_offset_extended
1766
1767 uint64_t NumBytes = 0;
1768 int stackGrowth = -SlotSize;
1769
1770 // Find the funclet establisher parameter
1771 Register Establisher = X86::NoRegister;
1772 if (IsClrFunclet)
1773 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1774 else if (IsFunclet)
1775 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1776
1777 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1778 // Immediately spill establisher into the home slot.
1779 // The runtime cares about this.
1780 // MOV64mr %rdx, 16(%rsp)
1781 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1782 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1783 .addReg(Establisher)
1785 MBB.addLiveIn(Establisher);
1786 }
1787
1788 if (HasFP) {
1789 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1790
1791 // Calculate required stack adjustment.
1792 uint64_t FrameSize = StackSize - SlotSize;
1793 NumBytes =
1794 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1795
1796 // Callee-saved registers are pushed on stack before the stack is realigned.
1797 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1798 NumBytes = alignTo(NumBytes, MaxAlign);
1799
1800 // Save EBP/RBP into the appropriate stack slot.
1801 BuildMI(MBB, MBBI, DL,
1803 .addReg(MachineFramePtr, RegState::Kill)
1805
1806 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1807 // Mark the place where EBP/RBP was saved.
1808 // Define the current CFA rule to use the provided offset.
1809 assert(StackSize);
1810 BuildCFI(MBB, MBBI, DL,
1812 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1814
1815 // Change the rule for the FramePtr to be an "offset" rule.
1816 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1817 BuildCFI(MBB, MBBI, DL,
1818 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1819 2 * stackGrowth -
1820 (int)TailCallArgReserveSize),
1822 }
1823
1824 if (NeedsWinCFI) {
1825 HasWinCFI = true;
1826 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1829 }
1830
1831 if (!IsFunclet) {
1832 if (X86FI->hasSwiftAsyncContext()) {
1833 assert(!IsWin64Prologue &&
1834 "win64 prologue does not store async context right below rbp");
1835 const auto &Attrs = MF.getFunction().getAttributes();
1836
1837 // Before we update the live frame pointer we have to ensure there's a
1838 // valid (or null) asynchronous context in its slot just before FP in
1839 // the frame record, so store it now.
1840 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1841 // We have an initial context in r14, store it just before the frame
1842 // pointer.
1843 MBB.addLiveIn(X86::R14);
1844 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1845 .addReg(X86::R14)
1847 } else {
1848 // No initial context, store null so that there's no pointer that
1849 // could be misused.
1850 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1851 .addImm(0)
1853 }
1854
1855 if (NeedsWinCFI) {
1856 HasWinCFI = true;
1857 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1858 .addImm(X86::R14)
1860 }
1861
1862 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1863 .addUse(X86::RSP)
1864 .addImm(1)
1865 .addUse(X86::NoRegister)
1866 .addImm(8)
1867 .addUse(X86::NoRegister)
1869 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1870 .addUse(X86::RSP)
1871 .addImm(8)
1873 }
1874
1875 if (!IsWin64Prologue && !IsFunclet) {
1876 // Update EBP with the new base value.
1877 if (!X86FI->hasSwiftAsyncContext())
1878 BuildMI(MBB, MBBI, DL,
1879 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1880 FramePtr)
1883
1884 if (NeedsDwarfCFI) {
1885 if (ArgBaseReg.isValid()) {
1886 SmallString<64> CfaExpr;
1887 CfaExpr.push_back(dwarf::DW_CFA_expression);
1888 uint8_t buffer[16];
1889 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1890 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1891 CfaExpr.push_back(2);
1892 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1893 CfaExpr.push_back(0);
1894 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1895 BuildCFI(MBB, MBBI, DL,
1896 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1898 } else {
1899 // Mark effective beginning of when frame pointer becomes valid.
1900 // Define the current CFA to use the EBP/RBP register.
1901 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1902 BuildCFI(
1903 MBB, MBBI, DL,
1904 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1906 }
1907 }
1908
1909 if (NeedsWinFPO) {
1910 // .cv_fpo_setframe $FramePtr
1911 HasWinCFI = true;
1912 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1914 .addImm(0)
1916 }
1917 }
1918 }
1919 } else {
1920 assert(!IsFunclet && "funclets without FPs not yet implemented");
1921 NumBytes =
1922 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1923 }
1924
1925 // Update the offset adjustment, which is mainly used by codeview to translate
1926 // from ESP to VFRAME relative local variable offsets.
1927 if (!IsFunclet) {
1928 if (HasFP && TRI->hasStackRealignment(MF))
1929 MFI.setOffsetAdjustment(-NumBytes);
1930 else
1931 MFI.setOffsetAdjustment(-StackSize);
1932 }
1933
1934 // For EH funclets, only allocate enough space for outgoing calls. Save the
1935 // NumBytes value that we would've used for the parent frame.
1936 unsigned ParentFrameNumBytes = NumBytes;
1937 if (IsFunclet)
1938 NumBytes = getWinEHFuncletFrameSize(MF);
1939
1940 // Skip the callee-saved push instructions.
1941 bool PushedRegs = false;
1942 int StackOffset = 2 * stackGrowth;
1944 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1945 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1946 return false;
1947 unsigned Opc = MBBI->getOpcode();
1948 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1949 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1950 };
1951
1952 while (IsCSPush(MBBI)) {
1953 PushedRegs = true;
1954 Register Reg = MBBI->getOperand(0).getReg();
1955 LastCSPush = MBBI;
1956 ++MBBI;
1957 unsigned Opc = LastCSPush->getOpcode();
1958
1959 if (!HasFP && NeedsDwarfCFI) {
1960 // Mark callee-saved push instruction.
1961 // Define the current CFA rule to use the provided offset.
1962 assert(StackSize);
1963 // Compared to push, push2 introduces more stack offset (one more
1964 // register).
1965 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1966 StackOffset += stackGrowth;
1967 BuildCFI(MBB, MBBI, DL,
1970 StackOffset += stackGrowth;
1971 }
1972
1973 if (NeedsWinCFI) {
1974 HasWinCFI = true;
1975 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1976 .addImm(Reg)
1978 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1979 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1980 .addImm(LastCSPush->getOperand(1).getReg())
1982 }
1983 }
1984
1985 // Realign stack after we pushed callee-saved registers (so that we'll be
1986 // able to calculate their offsets from the frame pointer).
1987 // Don't do this for Win64, it needs to realign the stack after the prologue.
1988 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1989 !ArgBaseReg.isValid()) {
1990 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1991 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1992
1993 if (NeedsWinCFI) {
1994 HasWinCFI = true;
1995 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1996 .addImm(MaxAlign)
1998 }
1999 }
2000
2001 // If there is an SUB32ri of ESP immediately before this instruction, merge
2002 // the two. This can be the case when tail call elimination is enabled and
2003 // the callee has more arguments than the caller.
2004 NumBytes = mergeSPUpdates(
2005 MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2006 true);
2007
2008 // Adjust stack pointer: ESP -= numbytes.
2009
2010 // Windows and cygwin/mingw require a prologue helper routine when allocating
2011 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
2012 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
2013 // stack and adjust the stack pointer in one go. The 64-bit version of
2014 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
2015 // responsible for adjusting the stack pointer. Touching the stack at 4K
2016 // increments is necessary to ensure that the guard pages used by the OS
2017 // virtual memory manager are allocated in correct sequence.
2018 uint64_t AlignedNumBytes = NumBytes;
2019 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
2020 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
2021 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
2022 assert(!X86FI->getUsesRedZone() &&
2023 "The Red Zone is not accounted for in stack probes");
2024
2025 // Check whether EAX is livein for this block.
2026 bool isEAXAlive = isEAXLiveIn(MBB);
2027
2028 if (isEAXAlive) {
2029 if (Is64Bit) {
2030 // Save RAX
2031 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2032 .addReg(X86::RAX, RegState::Kill)
2034 } else {
2035 // Save EAX
2036 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2037 .addReg(X86::EAX, RegState::Kill)
2039 }
2040 }
2041
2042 if (Is64Bit) {
2043 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2044 // Function prologue is responsible for adjusting the stack pointer.
2045 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2046 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2047 .addImm(Alloc)
2049 } else {
2050 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2051 // We'll also use 4 already allocated bytes for EAX.
2052 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2053 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2055 }
2056
2057 // Call __chkstk, __chkstk_ms, or __alloca.
2058 emitStackProbe(MF, MBB, MBBI, DL, true);
2059
2060 if (isEAXAlive) {
2061 // Restore RAX/EAX
2063 if (Is64Bit)
2064 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2065 StackPtr, false, NumBytes - 8);
2066 else
2067 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2068 StackPtr, false, NumBytes - 4);
2069 MI->setFlag(MachineInstr::FrameSetup);
2070 MBB.insert(MBBI, MI);
2071 }
2072 } else if (NumBytes) {
2073 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2074 }
2075
2076 if (NeedsWinCFI && NumBytes) {
2077 HasWinCFI = true;
2078 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2079 .addImm(NumBytes)
2081 }
2082
2083 int SEHFrameOffset = 0;
2084 unsigned SPOrEstablisher;
2085 if (IsFunclet) {
2086 if (IsClrFunclet) {
2087 // The establisher parameter passed to a CLR funclet is actually a pointer
2088 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2089 // to find the root function establisher frame by loading the PSPSym from
2090 // the intermediate frame.
2091 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2092 MachinePointerInfo NoInfo;
2093 MBB.addLiveIn(Establisher);
2094 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2095 Establisher, false, PSPSlotOffset)
2098 ;
2099 // Save the root establisher back into the current funclet's (mostly
2100 // empty) frame, in case a sub-funclet or the GC needs it.
2101 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2102 false, PSPSlotOffset)
2103 .addReg(Establisher)
2105 NoInfo,
2108 }
2109 SPOrEstablisher = Establisher;
2110 } else {
2111 SPOrEstablisher = StackPtr;
2112 }
2113
2114 if (IsWin64Prologue && HasFP) {
2115 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2116 // this calculation on the incoming establisher, which holds the value of
2117 // RSP from the parent frame at the end of the prologue.
2118 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2119 if (SEHFrameOffset)
2120 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2121 SPOrEstablisher, false, SEHFrameOffset);
2122 else
2123 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2124 .addReg(SPOrEstablisher);
2125
2126 // If this is not a funclet, emit the CFI describing our frame pointer.
2127 if (NeedsWinCFI && !IsFunclet) {
2128 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2129 HasWinCFI = true;
2130 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2132 .addImm(SEHFrameOffset)
2134 if (isAsynchronousEHPersonality(Personality))
2135 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2136 }
2137 } else if (IsFunclet && STI.is32Bit()) {
2138 // Reset EBP / ESI to something good for funclets.
2140 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2141 // into the registration node so that the runtime will restore it for us.
2142 if (!MBB.isCleanupFuncletEntry()) {
2143 assert(Personality == EHPersonality::MSVC_CXX);
2144 Register FrameReg;
2146 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2147 // ESP is the first field, so no extra displacement is needed.
2148 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2149 false, EHRegOffset)
2150 .addReg(X86::ESP);
2151 }
2152 }
2153
2154 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2155 const MachineInstr &FrameInstr = *MBBI;
2156 ++MBBI;
2157
2158 if (NeedsWinCFI) {
2159 int FI;
2160 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2161 if (X86::FR64RegClass.contains(Reg)) {
2162 int Offset;
2163 Register IgnoredFrameReg;
2164 if (IsWin64Prologue && IsFunclet)
2165 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2166 else
2167 Offset =
2168 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2169 SEHFrameOffset;
2170
2171 HasWinCFI = true;
2172 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2173 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2174 .addImm(Reg)
2175 .addImm(Offset)
2177 }
2178 }
2179 }
2180 }
2181
2182 if (NeedsWinCFI && HasWinCFI)
2183 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2185
2186 if (FnHasClrFunclet && !IsFunclet) {
2187 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2188 // immediately after the prolog) into the PSPSlot so that funclets
2189 // and the GC can recover it.
2190 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2191 auto PSPInfo = MachinePointerInfo::getFixedStack(
2193 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2194 PSPSlotOffset)
2199 }
2200
2201 // Realign stack after we spilled callee-saved registers (so that we'll be
2202 // able to calculate their offsets from the frame pointer).
2203 // Win64 requires aligning the stack after the prologue.
2204 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2205 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2206 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2207 }
2208
2209 // We already dealt with stack realignment and funclets above.
2210 if (IsFunclet && STI.is32Bit())
2211 return;
2212
2213 // If we need a base pointer, set it up here. It's whatever the value
2214 // of the stack pointer is at this point. Any variable size objects
2215 // will be allocated after this, so we can still use the base pointer
2216 // to reference locals.
2217 if (TRI->hasBasePointer(MF)) {
2218 // Update the base pointer with the current stack pointer.
2219 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2220 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2221 .addReg(SPOrEstablisher)
2223 if (X86FI->getRestoreBasePointer()) {
2224 // Stash value of base pointer. Saving RSP instead of EBP shortens
2225 // dependence chain. Used by SjLj EH.
2226 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2227 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2229 .addReg(SPOrEstablisher)
2231 }
2232
2233 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2234 // Stash the value of the frame pointer relative to the base pointer for
2235 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2236 // it recovers the frame pointer from the base pointer rather than the
2237 // other way around.
2238 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2239 Register UsedReg;
2240 int Offset =
2241 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2242 .getFixed();
2243 assert(UsedReg == BasePtr);
2244 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2247 }
2248 }
2249 if (ArgBaseReg.isValid()) {
2250 // Save argument base pointer.
2251 auto *MI = X86FI->getStackPtrSaveMI();
2252 int FI = MI->getOperand(1).getIndex();
2253 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2254 // movl %basereg, offset(%ebp)
2255 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2256 .addReg(ArgBaseReg)
2258 }
2259
2260 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2261 // Mark end of stack pointer adjustment.
2262 if (!HasFP && NumBytes) {
2263 // Define the current CFA rule to use the provided offset.
2264 assert(StackSize);
2265 BuildCFI(
2266 MBB, MBBI, DL,
2267 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2269 }
2270
2271 // Emit DWARF info specifying the offsets of the callee-saved registers.
2273 }
2274
2275 // X86 Interrupt handling function cannot assume anything about the direction
2276 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2277 // in each prologue of interrupt handler function.
2278 //
2279 // Create "cld" instruction only in these cases:
2280 // 1. The interrupt handling function uses any of the "rep" instructions.
2281 // 2. Interrupt handling function calls another function.
2282 // 3. If there are any inline asm blocks, as we do not know what they do
2283 //
2284 // TODO: We should also emit cld if we detect the use of std, but as of now,
2285 // the compiler does not even emit that instruction or even define it, so in
2286 // practice, this would only happen with inline asm, which we cover anyway.
2288 bool NeedsCLD = false;
2289
2290 for (const MachineBasicBlock &B : MF) {
2291 for (const MachineInstr &MI : B) {
2292 if (MI.isCall()) {
2293 NeedsCLD = true;
2294 break;
2295 }
2296
2297 if (isOpcodeRep(MI.getOpcode())) {
2298 NeedsCLD = true;
2299 break;
2300 }
2301
2302 if (MI.isInlineAsm()) {
2303 // TODO: Parse asm for rep instructions or call sites?
2304 // For now, let's play it safe and emit a cld instruction
2305 // just in case.
2306 NeedsCLD = true;
2307 break;
2308 }
2309 }
2310 }
2311
2312 if (NeedsCLD) {
2313 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2315 }
2316 }
2317
2318 // At this point we know if the function has WinCFI or not.
2319 MF.setHasWinCFI(HasWinCFI);
2320}
2321
2323 const MachineFunction &MF) const {
2324 // We can't use LEA instructions for adjusting the stack pointer if we don't
2325 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2326 // to deallocate the stack.
2327 // This means that we can use LEA for SP in two situations:
2328 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2329 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2330 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2331}
2332
2334 switch (MI.getOpcode()) {
2335 case X86::CATCHRET:
2336 case X86::CLEANUPRET:
2337 return true;
2338 default:
2339 return false;
2340 }
2341 llvm_unreachable("impossible");
2342}
2343
2344// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2345// stack. It holds a pointer to the bottom of the root function frame. The
2346// establisher frame pointer passed to a nested funclet may point to the
2347// (mostly empty) frame of its parent funclet, but it will need to find
2348// the frame of the root function to access locals. To facilitate this,
2349// every funclet copies the pointer to the bottom of the root function
2350// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2351// same offset for the PSPSym in the root function frame that's used in the
2352// funclets' frames allows each funclet to dynamically accept any ancestor
2353// frame as its establisher argument (the runtime doesn't guarantee the
2354// immediate parent for some reason lost to history), and also allows the GC,
2355// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2356// frame with only a single offset reported for the entire method.
2357unsigned
2358X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2359 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2361 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2362 /*IgnoreSPUpdates*/ true)
2363 .getFixed();
2364 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2365 return static_cast<unsigned>(Offset);
2366}
2367
2368unsigned
2369X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2371 // This is the size of the pushed CSRs.
2372 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2373 // This is the size of callee saved XMMs.
2374 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2375 unsigned XMMSize =
2376 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2377 // This is the amount of stack a funclet needs to allocate.
2378 unsigned UsedSize;
2379 EHPersonality Personality =
2381 if (Personality == EHPersonality::CoreCLR) {
2382 // CLR funclets need to hold enough space to include the PSPSym, at the
2383 // same offset from the stack pointer (immediately after the prolog) as it
2384 // resides at in the main function.
2385 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2386 } else {
2387 // Other funclets just need enough stack for outgoing call arguments.
2388 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2389 }
2390 // RBP is not included in the callee saved register block. After pushing RBP,
2391 // everything is 16 byte aligned. Everything we allocate before an outgoing
2392 // call must also be 16 byte aligned.
2393 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2394 // Subtract out the size of the callee saved registers. This is how much stack
2395 // each funclet will allocate.
2396 return FrameSizeMinusRBP + XMMSize - CSSize;
2397}
2398
2399static bool isTailCallOpcode(unsigned Opc) {
2400 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2401 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2402 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2403}
2404
2406 MachineBasicBlock &MBB) const {
2407 const MachineFrameInfo &MFI = MF.getFrameInfo();
2410 MachineBasicBlock::iterator MBBI = Terminator;
2411 DebugLoc DL;
2412 if (MBBI != MBB.end())
2413 DL = MBBI->getDebugLoc();
2414 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2415 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2417 Register MachineFramePtr =
2418 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2419
2420 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2421 bool NeedsWin64CFI =
2422 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2423 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2424
2425 // Get the number of bytes to allocate from the FrameInfo.
2426 uint64_t StackSize = MFI.getStackSize();
2427 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2428 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2429 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2430 bool HasFP = hasFP(MF);
2431 uint64_t NumBytes = 0;
2432
2433 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2435 MF.needsFrameMoves();
2436
2437 Register ArgBaseReg;
2438 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2439 unsigned Opc = X86::LEA32r;
2440 Register StackReg = X86::ESP;
2441 ArgBaseReg = MI->getOperand(0).getReg();
2442 if (STI.is64Bit()) {
2443 Opc = X86::LEA64r;
2444 StackReg = X86::RSP;
2445 }
2446 // leal -4(%basereg), %esp
2447 // .cfi_def_cfa %esp, 4
2448 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2449 .addUse(ArgBaseReg)
2450 .addImm(1)
2451 .addUse(X86::NoRegister)
2452 .addImm(-(int64_t)SlotSize)
2453 .addUse(X86::NoRegister)
2455 if (NeedsDwarfCFI) {
2456 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2457 BuildCFI(MBB, MBBI, DL,
2458 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2460 --MBBI;
2461 }
2462 --MBBI;
2463 }
2464
2465 if (IsFunclet) {
2466 assert(HasFP && "EH funclets without FP not yet implemented");
2467 NumBytes = getWinEHFuncletFrameSize(MF);
2468 } else if (HasFP) {
2469 // Calculate required stack adjustment.
2470 uint64_t FrameSize = StackSize - SlotSize;
2471 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2472
2473 // Callee-saved registers were pushed on stack before the stack was
2474 // realigned.
2475 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2476 NumBytes = alignTo(FrameSize, MaxAlign);
2477 } else {
2478 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2479 }
2480 uint64_t SEHStackAllocAmt = NumBytes;
2481
2482 // AfterPop is the position to insert .cfi_restore.
2484 if (HasFP) {
2485 if (X86FI->hasSwiftAsyncContext()) {
2486 // Discard the context.
2487 int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true);
2488 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2489 }
2490 // Pop EBP.
2491 BuildMI(MBB, MBBI, DL,
2493 MachineFramePtr)
2495
2496 // We need to reset FP to its untagged state on return. Bit 60 is currently
2497 // used to show the presence of an extended frame.
2498 if (X86FI->hasSwiftAsyncContext()) {
2499 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2500 .addUse(MachineFramePtr)
2501 .addImm(60)
2503 }
2504
2505 if (NeedsDwarfCFI) {
2506 if (!ArgBaseReg.isValid()) {
2507 unsigned DwarfStackPtr =
2508 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2509 BuildCFI(MBB, MBBI, DL,
2510 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2512 }
2513 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2514 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2515 BuildCFI(MBB, AfterPop, DL,
2516 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2518 --MBBI;
2519 --AfterPop;
2520 }
2521 --MBBI;
2522 }
2523 }
2524
2525 MachineBasicBlock::iterator FirstCSPop = MBBI;
2526 // Skip the callee-saved pop instructions.
2527 while (MBBI != MBB.begin()) {
2528 MachineBasicBlock::iterator PI = std::prev(MBBI);
2529 unsigned Opc = PI->getOpcode();
2530
2531 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2532 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2533 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2534 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2535 Opc != X86::POP2P && Opc != X86::LEA64r))
2536 break;
2537 FirstCSPop = PI;
2538 }
2539
2540 --MBBI;
2541 }
2542 if (ArgBaseReg.isValid()) {
2543 // Restore argument base pointer.
2544 auto *MI = X86FI->getStackPtrSaveMI();
2545 int FI = MI->getOperand(1).getIndex();
2546 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2547 // movl offset(%ebp), %basereg
2548 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2550 }
2551 MBBI = FirstCSPop;
2552
2553 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2554 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2555
2556 if (MBBI != MBB.end())
2557 DL = MBBI->getDebugLoc();
2558 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2559 // instruction, merge the two instructions.
2560 if (NumBytes || MFI.hasVarSizedObjects())
2561 NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true);
2562
2563 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2564 // slot before popping them off! Same applies for the case, when stack was
2565 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2566 // will not do realignment or dynamic stack allocation.
2567 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2568 !IsFunclet) {
2569 if (TRI->hasStackRealignment(MF))
2570 MBBI = FirstCSPop;
2571 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2572 uint64_t LEAAmount =
2573 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2574
2575 if (X86FI->hasSwiftAsyncContext())
2576 LEAAmount -= 16;
2577
2578 // There are only two legal forms of epilogue:
2579 // - add SEHAllocationSize, %rsp
2580 // - lea SEHAllocationSize(%FramePtr), %rsp
2581 //
2582 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2583 // However, we may use this sequence if we have a frame pointer because the
2584 // effects of the prologue can safely be undone.
2585 if (LEAAmount != 0) {
2586 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2588 false, LEAAmount);
2589 --MBBI;
2590 } else {
2591 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2592 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2593 --MBBI;
2594 }
2595 } else if (NumBytes) {
2596 // Adjust stack pointer back: ESP += numbytes.
2597 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2598 if (!HasFP && NeedsDwarfCFI) {
2599 // Define the current CFA rule to use the provided offset.
2600 BuildCFI(MBB, MBBI, DL,
2602 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2604 }
2605 --MBBI;
2606 }
2607
2608 if (NeedsWin64CFI && MF.hasWinCFI())
2609 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_BeginEpilogue));
2610
2611 if (!HasFP && NeedsDwarfCFI) {
2612 MBBI = FirstCSPop;
2613 int64_t Offset = -(int64_t)CSSize - SlotSize;
2614 // Mark callee-saved pop instruction.
2615 // Define the current CFA rule to use the provided offset.
2616 while (MBBI != MBB.end()) {
2618 unsigned Opc = PI->getOpcode();
2619 ++MBBI;
2620 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2621 Opc == X86::POP2 || Opc == X86::POP2P) {
2622 Offset += SlotSize;
2623 // Compared to pop, pop2 introduces more stack offset (one more
2624 // register).
2625 if (Opc == X86::POP2 || Opc == X86::POP2P)
2626 Offset += SlotSize;
2627 BuildCFI(MBB, MBBI, DL,
2630 }
2631 }
2632 }
2633
2634 // Emit DWARF info specifying the restores of the callee-saved registers.
2635 // For epilogue with return inside or being other block without successor,
2636 // no need to generate .cfi_restore for callee-saved registers.
2637 if (NeedsDwarfCFI && !MBB.succ_empty())
2638 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2639
2640 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2641 // Add the return addr area delta back since we are not tail calling.
2642 int64_t Offset = -1 * X86FI->getTCReturnAddrDelta();
2643 assert(Offset >= 0 && "TCDelta should never be positive");
2644 if (Offset) {
2645 // Check for possible merge with preceding ADD instruction.
2646 Offset = mergeSPAdd(MBB, Terminator, Offset, true);
2647 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2648 }
2649 }
2650
2651 // Emit tilerelease for AMX kernel.
2653 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2654
2655 if (NeedsWin64CFI && MF.hasWinCFI())
2656 BuildMI(MBB, Terminator, DL, TII.get(X86::SEH_EndEpilogue));
2657}
2658
2660 int FI,
2661 Register &FrameReg) const {
2662 const MachineFrameInfo &MFI = MF.getFrameInfo();
2663
2664 bool IsFixed = MFI.isFixedObjectIndex(FI);
2665 // We can't calculate offset from frame pointer if the stack is realigned,
2666 // so enforce usage of stack/base pointer. The base pointer is used when we
2667 // have dynamic allocas in addition to dynamic realignment.
2668 if (TRI->hasBasePointer(MF))
2669 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2670 else if (TRI->hasStackRealignment(MF))
2671 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2672 else
2673 FrameReg = TRI->getFrameRegister(MF);
2674
2675 // Offset will hold the offset from the stack pointer at function entry to the
2676 // object.
2677 // We need to factor in additional offsets applied during the prologue to the
2678 // frame, base, and stack pointer depending on which is used.
2681 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2682 uint64_t StackSize = MFI.getStackSize();
2683 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2684 int64_t FPDelta = 0;
2685
2686 // In an x86 interrupt, remove the offset we added to account for the return
2687 // address from any stack object allocated in the caller's frame. Interrupts
2688 // do not have a standard return address. Fixed objects in the current frame,
2689 // such as SSE register spills, should not get this treatment.
2691 Offset >= 0) {
2693 }
2694
2695 if (IsWin64Prologue) {
2696 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2697
2698 // Calculate required stack adjustment.
2699 uint64_t FrameSize = StackSize - SlotSize;
2700 // If required, include space for extra hidden slot for stashing base
2701 // pointer.
2702 if (X86FI->getRestoreBasePointer())
2703 FrameSize += SlotSize;
2704 uint64_t NumBytes = FrameSize - CSSize;
2705
2706 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2707 if (FI && FI == X86FI->getFAIndex())
2708 return StackOffset::getFixed(-SEHFrameOffset);
2709
2710 // FPDelta is the offset from the "traditional" FP location of the old base
2711 // pointer followed by return address and the location required by the
2712 // restricted Win64 prologue.
2713 // Add FPDelta to all offsets below that go through the frame pointer.
2714 FPDelta = FrameSize - SEHFrameOffset;
2715 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2716 "FPDelta isn't aligned per the Win64 ABI!");
2717 }
2718
2719 if (FrameReg == TRI->getFramePtr()) {
2720 // Skip saved EBP/RBP
2721 Offset += SlotSize;
2722
2723 // Account for restricted Windows prologue.
2724 Offset += FPDelta;
2725
2726 // Skip the RETADDR move area
2727 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2728 if (TailCallReturnAddrDelta < 0)
2729 Offset -= TailCallReturnAddrDelta;
2730
2732 }
2733
2734 // FrameReg is either the stack pointer or a base pointer. But the base is
2735 // located at the end of the statically known StackSize so the distinction
2736 // doesn't really matter.
2737 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2738 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2739 return StackOffset::getFixed(Offset + StackSize);
2740}
2741
2743 Register &FrameReg) const {
2744 const MachineFrameInfo &MFI = MF.getFrameInfo();
2746 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2747 const auto it = WinEHXMMSlotInfo.find(FI);
2748
2749 if (it == WinEHXMMSlotInfo.end())
2750 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2751
2752 FrameReg = TRI->getStackRegister();
2754 it->second;
2755}
2756
2759 Register &FrameReg,
2760 int Adjustment) const {
2761 const MachineFrameInfo &MFI = MF.getFrameInfo();
2762 FrameReg = TRI->getStackRegister();
2763 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2764 getOffsetOfLocalArea() + Adjustment);
2765}
2766
2769 int FI, Register &FrameReg,
2770 bool IgnoreSPUpdates) const {
2771
2772 const MachineFrameInfo &MFI = MF.getFrameInfo();
2773 // Does not include any dynamic realign.
2774 const uint64_t StackSize = MFI.getStackSize();
2775 // LLVM arranges the stack as follows:
2776 // ...
2777 // ARG2
2778 // ARG1
2779 // RETADDR
2780 // PUSH RBP <-- RBP points here
2781 // PUSH CSRs
2782 // ~~~~~~~ <-- possible stack realignment (non-win64)
2783 // ...
2784 // STACK OBJECTS
2785 // ... <-- RSP after prologue points here
2786 // ~~~~~~~ <-- possible stack realignment (win64)
2787 //
2788 // if (hasVarSizedObjects()):
2789 // ... <-- "base pointer" (ESI/RBX) points here
2790 // DYNAMIC ALLOCAS
2791 // ... <-- RSP points here
2792 //
2793 // Case 1: In the simple case of no stack realignment and no dynamic
2794 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2795 // with fixed offsets from RSP.
2796 //
2797 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2798 // stack objects are addressed with RBP and regular stack objects with RSP.
2799 //
2800 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2801 // to address stack arguments for outgoing calls and nothing else. The "base
2802 // pointer" points to local variables, and RBP points to fixed objects.
2803 //
2804 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2805 // answer we give is relative to the SP after the prologue, and not the
2806 // SP in the middle of the function.
2807
2808 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2809 !STI.isTargetWin64())
2810 return getFrameIndexReference(MF, FI, FrameReg);
2811
2812 // If !hasReservedCallFrame the function might have SP adjustement in the
2813 // body. So, even though the offset is statically known, it depends on where
2814 // we are in the function.
2815 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2816 return getFrameIndexReference(MF, FI, FrameReg);
2817
2818 // We don't handle tail calls, and shouldn't be seeing them either.
2820 "we don't handle this case!");
2821
2822 // This is how the math works out:
2823 //
2824 // %rsp grows (i.e. gets lower) left to right. Each box below is
2825 // one word (eight bytes). Obj0 is the stack slot we're trying to
2826 // get to.
2827 //
2828 // ----------------------------------
2829 // | BP | Obj0 | Obj1 | ... | ObjN |
2830 // ----------------------------------
2831 // ^ ^ ^ ^
2832 // A B C E
2833 //
2834 // A is the incoming stack pointer.
2835 // (B - A) is the local area offset (-8 for x86-64) [1]
2836 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2837 //
2838 // |(E - B)| is the StackSize (absolute value, positive). For a
2839 // stack that grown down, this works out to be (B - E). [3]
2840 //
2841 // E is also the value of %rsp after stack has been set up, and we
2842 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2843 // (C - E) == (C - A) - (B - A) + (B - E)
2844 // { Using [1], [2] and [3] above }
2845 // == getObjectOffset - LocalAreaOffset + StackSize
2846
2847 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2848}
2849
2852 std::vector<CalleeSavedInfo> &CSI) const {
2853 MachineFrameInfo &MFI = MF.getFrameInfo();
2855
2856 unsigned CalleeSavedFrameSize = 0;
2857 unsigned XMMCalleeSavedFrameSize = 0;
2858 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2859 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2860
2861 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2862
2863 if (TailCallReturnAddrDelta < 0) {
2864 // create RETURNADDR area
2865 // arg
2866 // arg
2867 // RETADDR
2868 // { ...
2869 // RETADDR area
2870 // ...
2871 // }
2872 // [EBP]
2873 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2874 TailCallReturnAddrDelta - SlotSize, true);
2875 }
2876
2877 // Spill the BasePtr if it's used.
2878 if (this->TRI->hasBasePointer(MF)) {
2879 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2880 if (MF.hasEHFunclets()) {
2882 X86FI->setHasSEHFramePtrSave(true);
2883 X86FI->setSEHFramePtrSaveIndex(FI);
2884 }
2885 }
2886
2887 if (hasFP(MF)) {
2888 // emitPrologue always spills frame register the first thing.
2889 SpillSlotOffset -= SlotSize;
2890 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2891
2892 // The async context lives directly before the frame pointer, and we
2893 // allocate a second slot to preserve stack alignment.
2894 if (X86FI->hasSwiftAsyncContext()) {
2895 SpillSlotOffset -= SlotSize;
2896 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2897 SpillSlotOffset -= SlotSize;
2898 }
2899
2900 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2901 // the frame register, we can delete it from CSI list and not have to worry
2902 // about avoiding it later.
2904 for (unsigned i = 0; i < CSI.size(); ++i) {
2905 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2906 CSI.erase(CSI.begin() + i);
2907 break;
2908 }
2909 }
2910 }
2911
2912 // Strategy:
2913 // 1. Use push2 when
2914 // a) number of CSR > 1 if no need padding
2915 // b) number of CSR > 2 if need padding
2916 // 2. When the number of CSR push is odd
2917 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2918 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2919 // 3. When the number of CSR push is even, start to use push2 from the 1st
2920 // push and make the stack 16B aligned before the push
2921 unsigned NumRegsForPush2 = 0;
2922 if (STI.hasPush2Pop2()) {
2923 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2924 return X86::GR64RegClass.contains(I.getReg());
2925 });
2926 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2927 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2928 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2929 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2930 if (X86FI->padForPush2Pop2()) {
2931 SpillSlotOffset -= SlotSize;
2932 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2933 }
2934 }
2935
2936 // Assign slots for GPRs. It increases frame size.
2937 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2938 Register Reg = I.getReg();
2939
2940 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2941 continue;
2942
2943 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2944 // or only an odd number of registers in the candidates.
2945 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2946 (SpillSlotOffset % 16 == 0 ||
2947 X86FI->getNumCandidatesForPush2Pop2() % 2))
2948 X86FI->addCandidateForPush2Pop2(Reg);
2949
2950 SpillSlotOffset -= SlotSize;
2951 CalleeSavedFrameSize += SlotSize;
2952
2953 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2954 I.setFrameIdx(SlotIndex);
2955 }
2956
2957 // Adjust the offset of spill slot as we know the accurate callee saved frame
2958 // size.
2959 if (X86FI->getRestoreBasePointer()) {
2960 SpillSlotOffset -= SlotSize;
2961 CalleeSavedFrameSize += SlotSize;
2962
2963 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2964 // TODO: saving the slot index is better?
2965 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2966 }
2967 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2968 "Expect even candidates for push2/pop2");
2969 if (X86FI->getNumCandidatesForPush2Pop2())
2970 ++NumFunctionUsingPush2Pop2;
2971 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2972 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2973
2974 // Assign slots for XMMs.
2975 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2976 Register Reg = I.getReg();
2977 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2978 continue;
2979
2980 // If this is k-register make sure we lookup via the largest legal type.
2981 MVT VT = MVT::Other;
2982 if (X86::VK16RegClass.contains(Reg))
2983 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2984
2985 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2986 unsigned Size = TRI->getSpillSize(*RC);
2987 Align Alignment = TRI->getSpillAlign(*RC);
2988 // ensure alignment
2989 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2990 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2991
2992 // spill into slot
2993 SpillSlotOffset -= Size;
2994 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2995 I.setFrameIdx(SlotIndex);
2996 MFI.ensureMaxAlignment(Alignment);
2997
2998 // Save the start offset and size of XMM in stack frame for funclets.
2999 if (X86::VR128RegClass.contains(Reg)) {
3000 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
3001 XMMCalleeSavedFrameSize += Size;
3002 }
3003 }
3004
3005 return true;
3006}
3007
3012
3013 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
3014 // for us, and there are no XMM CSRs on Win32.
3015 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
3016 return true;
3017
3018 // Push GPRs. It increases frame size.
3019 const MachineFunction &MF = *MBB.getParent();
3021 if (X86FI->padForPush2Pop2())
3022 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
3023
3024 // Update LiveIn of the basic block and decide whether we can add a kill flag
3025 // to the use.
3026 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3027 const MachineRegisterInfo &MRI = MF.getRegInfo();
3028 // Do not set a kill flag on values that are also marked as live-in. This
3029 // happens with the @llvm-returnaddress intrinsic and with arguments
3030 // passed in callee saved registers.
3031 // Omitting the kill flags is conservatively correct even if the live-in
3032 // is not used after all.
3033 if (MRI.isLiveIn(Reg))
3034 return false;
3035 MBB.addLiveIn(Reg);
3036 // Check if any subregister is live-in
3037 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3038 if (MRI.isLiveIn(*AReg))
3039 return false;
3040 return true;
3041 };
3042 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3043 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3044 };
3045
3046 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3047 Register Reg = RI->getReg();
3048 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3049 continue;
3050
3051 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3052 Register Reg2 = (++RI)->getReg();
3054 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3055 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3057 } else {
3058 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3059 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3061 }
3062 }
3063
3064 if (X86FI->getRestoreBasePointer()) {
3065 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3066 Register BaseReg = this->TRI->getBaseRegister();
3067 BuildMI(MBB, MI, DL, TII.get(Opc))
3068 .addReg(BaseReg, getKillRegState(true))
3070 }
3071
3072 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3073 // It can be done by spilling XMMs to stack frame.
3074 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3075 Register Reg = I.getReg();
3076 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3077 continue;
3078
3079 // If this is k-register make sure we lookup via the largest legal type.
3080 MVT VT = MVT::Other;
3081 if (X86::VK16RegClass.contains(Reg))
3082 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3083
3084 // Add the callee-saved register as live-in. It's killed at the spill.
3085 MBB.addLiveIn(Reg);
3086 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3087
3088 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3090 }
3091
3092 return true;
3093}
3094
3095void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3097 MachineInstr *CatchRet) const {
3098 // SEH shouldn't use catchret.
3101 "SEH should not use CATCHRET");
3102 const DebugLoc &DL = CatchRet->getDebugLoc();
3103 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3104
3105 // Fill EAX/RAX with the address of the target block.
3106 if (STI.is64Bit()) {
3107 // LEA64r CatchRetTarget(%rip), %rax
3108 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3109 .addReg(X86::RIP)
3110 .addImm(0)
3111 .addReg(0)
3112 .addMBB(CatchRetTarget)
3113 .addReg(0);
3114 } else {
3115 // MOV32ri $CatchRetTarget, %eax
3116 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3117 .addMBB(CatchRetTarget);
3118 }
3119
3120 // Record that we've taken the address of CatchRetTarget and no longer just
3121 // reference it in a terminator.
3122 CatchRetTarget->setMachineBlockAddressTaken();
3123}
3124
3128 if (CSI.empty())
3129 return false;
3130
3131 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3132 // Don't restore CSRs in 32-bit EH funclets. Matches
3133 // spillCalleeSavedRegisters.
3134 if (STI.is32Bit())
3135 return true;
3136 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3137 // funclets. emitEpilogue transforms these to normal jumps.
3138 if (MI->getOpcode() == X86::CATCHRET) {
3139 const Function &F = MBB.getParent()->getFunction();
3140 bool IsSEH = isAsynchronousEHPersonality(
3141 classifyEHPersonality(F.getPersonalityFn()));
3142 if (IsSEH)
3143 return true;
3144 }
3145 }
3146
3148
3149 // Reload XMMs from stack frame.
3150 for (const CalleeSavedInfo &I : CSI) {
3151 Register Reg = I.getReg();
3152 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3153 continue;
3154
3155 // If this is k-register make sure we lookup via the largest legal type.
3156 MVT VT = MVT::Other;
3157 if (X86::VK16RegClass.contains(Reg))
3158 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3159
3160 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3161 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3162 Register());
3163 }
3164
3165 // Clear the stack slot for spill base pointer register.
3166 MachineFunction &MF = *MBB.getParent();
3168 if (X86FI->getRestoreBasePointer()) {
3169 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3170 Register BaseReg = this->TRI->getBaseRegister();
3171 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3173 }
3174
3175 // POP GPRs.
3176 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3177 Register Reg = I->getReg();
3178 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3179 continue;
3180
3181 if (X86FI->isCandidateForPush2Pop2(Reg))
3182 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3185 else
3186 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3188 }
3189 if (X86FI->padForPush2Pop2())
3190 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3191
3192 return true;
3193}
3194
3196 BitVector &SavedRegs,
3197 RegScavenger *RS) const {
3199
3200 // Spill the BasePtr if it's used.
3201 if (TRI->hasBasePointer(MF)) {
3202 Register BasePtr = TRI->getBaseRegister();
3203 if (STI.isTarget64BitILP32())
3204 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3205 SavedRegs.set(BasePtr);
3206 }
3207}
3208
3209static bool HasNestArgument(const MachineFunction *MF) {
3210 const Function &F = MF->getFunction();
3211 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3212 I++) {
3213 if (I->hasNestAttr() && !I->use_empty())
3214 return true;
3215 }
3216 return false;
3217}
3218
3219/// GetScratchRegister - Get a temp register for performing work in the
3220/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3221/// and the properties of the function either one or two registers will be
3222/// needed. Set primary to true for the first register, false for the second.
3223static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3224 const MachineFunction &MF, bool Primary) {
3225 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3226
3227 // Erlang stuff.
3228 if (CallingConvention == CallingConv::HiPE) {
3229 if (Is64Bit)
3230 return Primary ? X86::R14 : X86::R13;
3231 else
3232 return Primary ? X86::EBX : X86::EDI;
3233 }
3234
3235 if (Is64Bit) {
3236 if (IsLP64)
3237 return Primary ? X86::R11 : X86::R12;
3238 else
3239 return Primary ? X86::R11D : X86::R12D;
3240 }
3241
3242 bool IsNested = HasNestArgument(&MF);
3243
3244 if (CallingConvention == CallingConv::X86_FastCall ||
3245 CallingConvention == CallingConv::Fast ||
3246 CallingConvention == CallingConv::Tail) {
3247 if (IsNested)
3248 report_fatal_error("Segmented stacks does not support fastcall with "
3249 "nested function.");
3250 return Primary ? X86::EAX : X86::ECX;
3251 }
3252 if (IsNested)
3253 return Primary ? X86::EDX : X86::EAX;
3254 return Primary ? X86::ECX : X86::EAX;
3255}
3256
3257// The stack limit in the TCB is set to this many bytes above the actual stack
3258// limit.
3260
3262 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3263 MachineFrameInfo &MFI = MF.getFrameInfo();
3264 uint64_t StackSize;
3265 unsigned TlsReg, TlsOffset;
3266 DebugLoc DL;
3267
3268 // To support shrink-wrapping we would need to insert the new blocks
3269 // at the right place and update the branches to PrologueMBB.
3270 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3271
3272 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3273 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3274 "Scratch register is live-in");
3275
3276 if (MF.getFunction().isVarArg())
3277 report_fatal_error("Segmented stacks do not support vararg functions.");
3278 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3281 report_fatal_error("Segmented stacks not supported on this platform.");
3282
3283 // Eventually StackSize will be calculated by a link-time pass; which will
3284 // also decide whether checking code needs to be injected into this particular
3285 // prologue.
3286 StackSize = MFI.getStackSize();
3287
3288 if (!MFI.needsSplitStackProlog())
3289 return;
3290
3294 bool IsNested = false;
3295
3296 // We need to know if the function has a nest argument only in 64 bit mode.
3297 if (Is64Bit)
3298 IsNested = HasNestArgument(&MF);
3299
3300 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3301 // allocMBB needs to be last (terminating) instruction.
3302
3303 for (const auto &LI : PrologueMBB.liveins()) {
3304 allocMBB->addLiveIn(LI);
3305 checkMBB->addLiveIn(LI);
3306 }
3307
3308 if (IsNested)
3309 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3310
3311 MF.push_front(allocMBB);
3312 MF.push_front(checkMBB);
3313
3314 // When the frame size is less than 256 we just compare the stack
3315 // boundary directly to the value of the stack pointer, per gcc.
3316 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3317
3318 // Read the limit off the current stacklet off the stack_guard location.
3319 if (Is64Bit) {
3320 if (STI.isTargetLinux()) {
3321 TlsReg = X86::FS;
3322 TlsOffset = IsLP64 ? 0x70 : 0x40;
3323 } else if (STI.isTargetDarwin()) {
3324 TlsReg = X86::GS;
3325 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3326 } else if (STI.isTargetWin64()) {
3327 TlsReg = X86::GS;
3328 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3329 } else if (STI.isTargetFreeBSD()) {
3330 TlsReg = X86::FS;
3331 TlsOffset = 0x18;
3332 } else if (STI.isTargetDragonFly()) {
3333 TlsReg = X86::FS;
3334 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3335 } else {
3336 report_fatal_error("Segmented stacks not supported on this platform.");
3337 }
3338
3339 if (CompareStackPointer)
3340 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3341 else
3342 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3343 ScratchReg)
3344 .addReg(X86::RSP)
3345 .addImm(1)
3346 .addReg(0)
3347 .addImm(-StackSize)
3348 .addReg(0);
3349
3350 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3351 .addReg(ScratchReg)
3352 .addReg(0)
3353 .addImm(1)
3354 .addReg(0)
3355 .addImm(TlsOffset)
3356 .addReg(TlsReg);
3357 } else {
3358 if (STI.isTargetLinux()) {
3359 TlsReg = X86::GS;
3360 TlsOffset = 0x30;
3361 } else if (STI.isTargetDarwin()) {
3362 TlsReg = X86::GS;
3363 TlsOffset = 0x48 + 90 * 4;
3364 } else if (STI.isTargetWin32()) {
3365 TlsReg = X86::FS;
3366 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3367 } else if (STI.isTargetDragonFly()) {
3368 TlsReg = X86::FS;
3369 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3370 } else if (STI.isTargetFreeBSD()) {
3371 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3372 } else {
3373 report_fatal_error("Segmented stacks not supported on this platform.");
3374 }
3375
3376 if (CompareStackPointer)
3377 ScratchReg = X86::ESP;
3378 else
3379 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3380 .addReg(X86::ESP)
3381 .addImm(1)
3382 .addReg(0)
3383 .addImm(-StackSize)
3384 .addReg(0);
3385
3388 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3389 .addReg(ScratchReg)
3390 .addReg(0)
3391 .addImm(0)
3392 .addReg(0)
3393 .addImm(TlsOffset)
3394 .addReg(TlsReg);
3395 } else if (STI.isTargetDarwin()) {
3396
3397 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3398 unsigned ScratchReg2;
3399 bool SaveScratch2;
3400 if (CompareStackPointer) {
3401 // The primary scratch register is available for holding the TLS offset.
3402 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3403 SaveScratch2 = false;
3404 } else {
3405 // Need to use a second register to hold the TLS offset
3406 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3407
3408 // Unfortunately, with fastcc the second scratch register may hold an
3409 // argument.
3410 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3411 }
3412
3413 // If Scratch2 is live-in then it needs to be saved.
3414 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3415 "Scratch register is live-in and not saved");
3416
3417 if (SaveScratch2)
3418 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3419 .addReg(ScratchReg2, RegState::Kill);
3420
3421 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3422 .addImm(TlsOffset);
3423 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3424 .addReg(ScratchReg)
3425 .addReg(ScratchReg2)
3426 .addImm(1)
3427 .addReg(0)
3428 .addImm(0)
3429 .addReg(TlsReg);
3430
3431 if (SaveScratch2)
3432 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3433 }
3434 }
3435
3436 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3437 // It jumps to normal execution of the function body.
3438 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3439 .addMBB(&PrologueMBB)
3441
3442 // On 32 bit we first push the arguments size and then the frame size. On 64
3443 // bit, we pass the stack frame size in r10 and the argument size in r11.
3444 if (Is64Bit) {
3445 // Functions with nested arguments use R10, so it needs to be saved across
3446 // the call to _morestack
3447
3448 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3449 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3450 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3451 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3452
3453 if (IsNested)
3454 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3455
3456 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3457 .addImm(StackSize);
3458 BuildMI(allocMBB, DL,
3460 Reg11)
3461 .addImm(X86FI->getArgumentStackSize());
3462 } else {
3463 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3464 .addImm(X86FI->getArgumentStackSize());
3465 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3466 }
3467
3468 // __morestack is in libgcc
3470 // Under the large code model, we cannot assume that __morestack lives
3471 // within 2^31 bytes of the call site, so we cannot use pc-relative
3472 // addressing. We cannot perform the call via a temporary register,
3473 // as the rax register may be used to store the static chain, and all
3474 // other suitable registers may be either callee-save or used for
3475 // parameter passing. We cannot use the stack at this point either
3476 // because __morestack manipulates the stack directly.
3477 //
3478 // To avoid these issues, perform an indirect call via a read-only memory
3479 // location containing the address.
3480 //
3481 // This solution is not perfect, as it assumes that the .rodata section
3482 // is laid out within 2^31 bytes of each function body, but this seems
3483 // to be sufficient for JIT.
3484 // FIXME: Add retpoline support and remove the error here..
3486 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3487 "code model and thunks not yet implemented.");
3488 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3489 .addReg(X86::RIP)
3490 .addImm(0)
3491 .addReg(0)
3492 .addExternalSymbol("__morestack_addr")
3493 .addReg(0);
3494 } else {
3495 if (Is64Bit)
3496 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3497 .addExternalSymbol("__morestack");
3498 else
3499 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3500 .addExternalSymbol("__morestack");
3501 }
3502
3503 if (IsNested)
3504 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3505 else
3506 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3507
3508 allocMBB->addSuccessor(&PrologueMBB);
3509
3510 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3511 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3512
3513#ifdef EXPENSIVE_CHECKS
3514 MF.verify();
3515#endif
3516}
3517
3518/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3519/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3520/// to fields it needs, through a named metadata node "hipe.literals" containing
3521/// name-value pairs.
3522static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3523 const StringRef LiteralName) {
3524 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3525 MDNode *Node = HiPELiteralsMD->getOperand(i);
3526 if (Node->getNumOperands() != 2)
3527 continue;
3528 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3529 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3530 if (!NodeName || !NodeVal)
3531 continue;
3532 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3533 if (ValConst && NodeName->getString() == LiteralName) {
3534 return ValConst->getZExtValue();
3535 }
3536 }
3537
3538 report_fatal_error("HiPE literal " + LiteralName +
3539 " required but not provided");
3540}
3541
3542// Return true if there are no non-ehpad successors to MBB and there are no
3543// non-meta instructions between MBBI and MBB.end().
3546 return llvm::all_of(
3547 MBB.successors(),
3548 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3549 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3550 return MI.isMetaInstruction();
3551 });
3552}
3553
3554/// Erlang programs may need a special prologue to handle the stack size they
3555/// might need at runtime. That is because Erlang/OTP does not implement a C
3556/// stack but uses a custom implementation of hybrid stack/heap architecture.
3557/// (for more information see Eric Stenman's Ph.D. thesis:
3558/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3559///
3560/// CheckStack:
3561/// temp0 = sp - MaxStack
3562/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3563/// OldStart:
3564/// ...
3565/// IncStack:
3566/// call inc_stack # doubles the stack space
3567/// temp0 = sp - MaxStack
3568/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3570 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3571 MachineFrameInfo &MFI = MF.getFrameInfo();
3572 DebugLoc DL;
3573
3574 // To support shrink-wrapping we would need to insert the new blocks
3575 // at the right place and update the branches to PrologueMBB.
3576 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3577
3578 // HiPE-specific values
3579 NamedMDNode *HiPELiteralsMD =
3580 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3581 if (!HiPELiteralsMD)
3583 "Can't generate HiPE prologue without runtime parameters");
3584 const unsigned HipeLeafWords = getHiPELiteral(
3585 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3586 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3587 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3588 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3589 ? MF.getFunction().arg_size() - CCRegisteredArgs
3590 : 0;
3591 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3592
3594 "HiPE prologue is only supported on Linux operating systems.");
3595
3596 // Compute the largest caller's frame that is needed to fit the callees'
3597 // frames. This 'MaxStack' is computed from:
3598 //
3599 // a) the fixed frame size, which is the space needed for all spilled temps,
3600 // b) outgoing on-stack parameter areas, and
3601 // c) the minimum stack space this function needs to make available for the
3602 // functions it calls (a tunable ABI property).
3603 if (MFI.hasCalls()) {
3604 unsigned MoreStackForCalls = 0;
3605
3606 for (auto &MBB : MF) {
3607 for (auto &MI : MBB) {
3608 if (!MI.isCall())
3609 continue;
3610
3611 // Get callee operand.
3612 const MachineOperand &MO = MI.getOperand(0);
3613
3614 // Only take account of global function calls (no closures etc.).
3615 if (!MO.isGlobal())
3616 continue;
3617
3618 const Function *F = dyn_cast<Function>(MO.getGlobal());
3619 if (!F)
3620 continue;
3621
3622 // Do not update 'MaxStack' for primitive and built-in functions
3623 // (encoded with names either starting with "erlang."/"bif_" or not
3624 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3625 // "_", such as the BIF "suspend_0") as they are executed on another
3626 // stack.
3627 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3628 F->getName().find_first_of("._") == StringRef::npos)
3629 continue;
3630
3631 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3632 ? F->arg_size() - CCRegisteredArgs
3633 : 0;
3634 if (HipeLeafWords - 1 > CalleeStkArity)
3635 MoreStackForCalls =
3636 std::max(MoreStackForCalls,
3637 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3638 }
3639 }
3640 MaxStack += MoreStackForCalls;
3641 }
3642
3643 // If the stack frame needed is larger than the guaranteed then runtime checks
3644 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3645 if (MaxStack > Guaranteed) {
3646 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3647 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3648
3649 for (const auto &LI : PrologueMBB.liveins()) {
3650 stackCheckMBB->addLiveIn(LI);
3651 incStackMBB->addLiveIn(LI);
3652 }
3653
3654 MF.push_front(incStackMBB);
3655 MF.push_front(stackCheckMBB);
3656
3657 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3658 unsigned LEAop, CMPop, CALLop;
3659 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3660 if (Is64Bit) {
3661 SPReg = X86::RSP;
3662 PReg = X86::RBP;
3663 LEAop = X86::LEA64r;
3664 CMPop = X86::CMP64rm;
3665 CALLop = X86::CALL64pcrel32;
3666 } else {
3667 SPReg = X86::ESP;
3668 PReg = X86::EBP;
3669 LEAop = X86::LEA32r;
3670 CMPop = X86::CMP32rm;
3671 CALLop = X86::CALLpcrel32;
3672 }
3673
3674 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3675 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3676 "HiPE prologue scratch register is live-in");
3677
3678 // Create new MBB for StackCheck:
3679 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3680 false, -MaxStack);
3681 // SPLimitOffset is in a fixed heap location (pointed by BP).
3682 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3683 PReg, false, SPLimitOffset);
3684 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3685 .addMBB(&PrologueMBB)
3687
3688 // Create new MBB for IncStack:
3689 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3690 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3691 false, -MaxStack);
3692 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3693 PReg, false, SPLimitOffset);
3694 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3695 .addMBB(incStackMBB)
3697
3698 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3699 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3700 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3701 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3702 }
3703#ifdef EXPENSIVE_CHECKS
3704 MF.verify();
3705#endif
3706}
3707
3708bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3710 const DebugLoc &DL,
3711 int Offset) const {
3712 if (Offset <= 0)
3713 return false;
3714
3715 if (Offset % SlotSize)
3716 return false;
3717
3718 int NumPops = Offset / SlotSize;
3719 // This is only worth it if we have at most 2 pops.
3720 if (NumPops != 1 && NumPops != 2)
3721 return false;
3722
3723 // Handle only the trivial case where the adjustment directly follows
3724 // a call. This is the most common one, anyway.
3725 if (MBBI == MBB.begin())
3726 return false;
3727 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3728 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3729 return false;
3730
3731 unsigned Regs[2];
3732 unsigned FoundRegs = 0;
3733
3735 const MachineOperand &RegMask = Prev->getOperand(1);
3736
3737 auto &RegClass =
3738 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3739 // Try to find up to NumPops free registers.
3740 for (auto Candidate : RegClass) {
3741 // Poor man's liveness:
3742 // Since we're immediately after a call, any register that is clobbered
3743 // by the call and not defined by it can be considered dead.
3744 if (!RegMask.clobbersPhysReg(Candidate))
3745 continue;
3746
3747 // Don't clobber reserved registers
3748 if (MRI.isReserved(Candidate))
3749 continue;
3750
3751 bool IsDef = false;
3752 for (const MachineOperand &MO : Prev->implicit_operands()) {
3753 if (MO.isReg() && MO.isDef() &&
3754 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3755 IsDef = true;
3756 break;
3757 }
3758 }
3759
3760 if (IsDef)
3761 continue;
3762
3763 Regs[FoundRegs++] = Candidate;
3764 if (FoundRegs == (unsigned)NumPops)
3765 break;
3766 }
3767
3768 if (FoundRegs == 0)
3769 return false;
3770
3771 // If we found only one free register, but need two, reuse the same one twice.
3772 while (FoundRegs < (unsigned)NumPops)
3773 Regs[FoundRegs++] = Regs[0];
3774
3775 for (int i = 0; i < NumPops; ++i)
3776 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3777 Regs[i]);
3778
3779 return true;
3780}
3781
3785 bool reserveCallFrame = hasReservedCallFrame(MF);
3786 unsigned Opcode = I->getOpcode();
3787 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3788 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3789 uint64_t Amount = TII.getFrameSize(*I);
3790 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3791 I = MBB.erase(I);
3792 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3793
3794 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3795 // typically because the function is marked noreturn (abort, throw,
3796 // assert_fail, etc).
3797 if (isDestroy && blockEndIsUnreachable(MBB, I))
3798 return I;
3799
3800 if (!reserveCallFrame) {
3801 // If the stack pointer can be changed after prologue, turn the
3802 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3803 // adjcallstackdown instruction into 'add ESP, <amt>'
3804
3805 // We need to keep the stack aligned properly. To do this, we round the
3806 // amount of space needed for the outgoing arguments up to the next
3807 // alignment boundary.
3808 Amount = alignTo(Amount, getStackAlign());
3809
3810 const Function &F = MF.getFunction();
3811 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3812 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3813
3814 // If we have any exception handlers in this function, and we adjust
3815 // the SP before calls, we may need to indicate this to the unwinder
3816 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3817 // Amount == 0, because the preceding function may have set a non-0
3818 // GNU_ARGS_SIZE.
3819 // TODO: We don't need to reset this between subsequent functions,
3820 // if it didn't change.
3821 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3822
3823 if (HasDwarfEHHandlers && !isDestroy &&
3825 BuildCFI(MBB, InsertPos, DL,
3826 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3827
3828 if (Amount == 0)
3829 return I;
3830
3831 // Factor out the amount that gets handled inside the sequence
3832 // (Pushes of argument for frame setup, callee pops for frame destroy)
3833 Amount -= InternalAmt;
3834
3835 // TODO: This is needed only if we require precise CFA.
3836 // If this is a callee-pop calling convention, emit a CFA adjust for
3837 // the amount the callee popped.
3838 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3839 BuildCFI(MBB, InsertPos, DL,
3840 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3841
3842 // Add Amount to SP to destroy a frame, or subtract to setup.
3843 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3844 int64_t CfaAdjustment = StackAdjustment;
3845
3846 if (StackAdjustment) {
3847 // Merge with any previous or following adjustment instruction. Note: the
3848 // instructions merged with here do not have CFI, so their stack
3849 // adjustments do not feed into CfaAdjustment
3850
3851 auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI,
3852 int64_t Offset) {
3853 CfaAdjustment += Offset;
3854 };
3855 auto CalcNewOffset = [&StackAdjustment](int64_t Offset) {
3856 return StackAdjustment + Offset;
3857 };
3858 StackAdjustment =
3859 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, true);
3860 StackAdjustment =
3861 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, false);
3862
3863 if (StackAdjustment) {
3864 if (!(F.hasMinSize() &&
3865 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3866 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3867 /*InEpilogue=*/false);
3868 }
3869 }
3870
3871 if (DwarfCFI && !hasFP(MF) && CfaAdjustment) {
3872 // If we don't have FP, but need to generate unwind information,
3873 // we need to set the correct CFA offset after the stack adjustment.
3874 // How much we adjust the CFA offset depends on whether we're emitting
3875 // CFI only for EH purposes or for debugging. EH only requires the CFA
3876 // offset to be correct at each call site, while for debugging we want
3877 // it to be more precise.
3878
3879 // TODO: When not using precise CFA, we also need to adjust for the
3880 // InternalAmt here.
3881 BuildCFI(
3882 MBB, InsertPos, DL,
3883 MCCFIInstruction::createAdjustCfaOffset(nullptr, -CfaAdjustment));
3884 }
3885
3886 return I;
3887 }
3888
3889 if (InternalAmt) {
3892 while (CI != B && !std::prev(CI)->isCall())
3893 --CI;
3894 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3895 }
3896
3897 return I;
3898}
3899
3901 assert(MBB.getParent() && "Block is not attached to a function!");
3902 const MachineFunction &MF = *MBB.getParent();
3903 if (!MBB.isLiveIn(X86::EFLAGS))
3904 return true;
3905
3906 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3907 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3909 const X86TargetLowering &TLI = *STI.getTargetLowering();
3910 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3911 return false;
3912
3914 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3915}
3916
3918 assert(MBB.getParent() && "Block is not attached to a function!");
3919
3920 // Win64 has strict requirements in terms of epilogue and we are
3921 // not taking a chance at messing with them.
3922 // I.e., unless this block is already an exit block, we can't use
3923 // it as an epilogue.
3924 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3925 return false;
3926
3927 // Swift async context epilogue has a BTR instruction that clobbers parts of
3928 // EFLAGS.
3929 const MachineFunction &MF = *MBB.getParent();
3932
3934 return true;
3935
3936 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3937 // clobbers the EFLAGS. Check that we do not need to preserve it,
3938 // otherwise, conservatively assume this is not
3939 // safe to insert the epilogue here.
3941}
3942
3944 // If we may need to emit frameless compact unwind information, give
3945 // up as this is currently broken: PR25614.
3946 bool CompactUnwind =
3948 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3949 !CompactUnwind) &&
3950 // The lowering of segmented stack and HiPE only support entry
3951 // blocks as prologue blocks: PR26107. This limitation may be
3952 // lifted if we fix:
3953 // - adjustForSegmentedStacks
3954 // - adjustForHiPEPrologue
3956 !MF.shouldSplitStack();
3957}
3958
3961 const DebugLoc &DL, bool RestoreSP) const {
3962 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3963 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3964 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3965 "restoring EBP/ESI on non-32-bit target");
3966
3967 MachineFunction &MF = *MBB.getParent();
3969 Register BasePtr = TRI->getBaseRegister();
3970 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3972 MachineFrameInfo &MFI = MF.getFrameInfo();
3973
3974 // FIXME: Don't set FrameSetup flag in catchret case.
3975
3976 int FI = FuncInfo.EHRegNodeFrameIndex;
3977 int EHRegSize = MFI.getObjectSize(FI);
3978
3979 if (RestoreSP) {
3980 // MOV32rm -EHRegSize(%ebp), %esp
3981 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3982 X86::EBP, true, -EHRegSize)
3984 }
3985
3986 Register UsedReg;
3987 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3988 int EndOffset = -EHRegOffset - EHRegSize;
3989 FuncInfo.EHRegNodeEndOffset = EndOffset;
3990
3991 if (UsedReg == FramePtr) {
3992 // ADD $offset, %ebp
3993 unsigned ADDri = getADDriOpcode(false);
3994 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3996 .addImm(EndOffset)
3998 ->getOperand(3)
3999 .setIsDead();
4000 assert(EndOffset >= 0 &&
4001 "end of registration object above normal EBP position!");
4002 } else if (UsedReg == BasePtr) {
4003 // LEA offset(%ebp), %esi
4004 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
4005 FramePtr, false, EndOffset)
4007 // MOV32rm SavedEBPOffset(%esi), %ebp
4008 assert(X86FI->getHasSEHFramePtrSave());
4009 int Offset =
4010 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
4011 .getFixed();
4012 assert(UsedReg == BasePtr);
4013 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
4014 UsedReg, true, Offset)
4016 } else {
4017 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
4018 }
4019 return MBBI;
4020}
4021
4023 return TRI->getSlotSize();
4024}
4025
4028 return StackPtr;
4029}
4030
4034 Register FrameRegister = RI->getFrameRegister(MF);
4035 if (getInitialCFARegister(MF) == FrameRegister &&
4037 DwarfFrameBase FrameBase;
4038 FrameBase.Kind = DwarfFrameBase::CFA;
4039 FrameBase.Location.Offset =
4041 return FrameBase;
4042 }
4043
4044 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4045}
4046
4047namespace {
4048// Struct used by orderFrameObjects to help sort the stack objects.
4049struct X86FrameSortingObject {
4050 bool IsValid = false; // true if we care about this Object.
4051 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4052 unsigned ObjectSize = 0; // Size of Object in bytes.
4053 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4054 unsigned ObjectNumUses = 0; // Object static number of uses.
4055};
4056
4057// The comparison function we use for std::sort to order our local
4058// stack symbols. The current algorithm is to use an estimated
4059// "density". This takes into consideration the size and number of
4060// uses each object has in order to roughly minimize code size.
4061// So, for example, an object of size 16B that is referenced 5 times
4062// will get higher priority than 4 4B objects referenced 1 time each.
4063// It's not perfect and we may be able to squeeze a few more bytes out of
4064// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4065// fringe end can have special consideration, given their size is less
4066// important, etc.), but the algorithmic complexity grows too much to be
4067// worth the extra gains we get. This gets us pretty close.
4068// The final order leaves us with objects with highest priority going
4069// at the end of our list.
4070struct X86FrameSortingComparator {
4071 inline bool operator()(const X86FrameSortingObject &A,
4072 const X86FrameSortingObject &B) const {
4073 uint64_t DensityAScaled, DensityBScaled;
4074
4075 // For consistency in our comparison, all invalid objects are placed
4076 // at the end. This also allows us to stop walking when we hit the
4077 // first invalid item after it's all sorted.
4078 if (!A.IsValid)
4079 return false;
4080 if (!B.IsValid)
4081 return true;
4082
4083 // The density is calculated by doing :
4084 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4085 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4086 // Since this approach may cause inconsistencies in
4087 // the floating point <, >, == comparisons, depending on the floating
4088 // point model with which the compiler was built, we're going
4089 // to scale both sides by multiplying with
4090 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4091 // the division and, with it, the need for any floating point
4092 // arithmetic.
4093 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4094 static_cast<uint64_t>(B.ObjectSize);
4095 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4096 static_cast<uint64_t>(A.ObjectSize);
4097
4098 // If the two densities are equal, prioritize highest alignment
4099 // objects. This allows for similar alignment objects
4100 // to be packed together (given the same density).
4101 // There's room for improvement here, also, since we can pack
4102 // similar alignment (different density) objects next to each
4103 // other to save padding. This will also require further
4104 // complexity/iterations, and the overall gain isn't worth it,
4105 // in general. Something to keep in mind, though.
4106 if (DensityAScaled == DensityBScaled)
4107 return A.ObjectAlignment < B.ObjectAlignment;
4108
4109 return DensityAScaled < DensityBScaled;
4110 }
4111};
4112} // namespace
4113
4114// Order the symbols in the local stack.
4115// We want to place the local stack objects in some sort of sensible order.
4116// The heuristic we use is to try and pack them according to static number
4117// of uses and size of object in order to minimize code size.
4119 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4120 const MachineFrameInfo &MFI = MF.getFrameInfo();
4121
4122 // Don't waste time if there's nothing to do.
4123 if (ObjectsToAllocate.empty())
4124 return;
4125
4126 // Create an array of all MFI objects. We won't need all of these
4127 // objects, but we're going to create a full array of them to make
4128 // it easier to index into when we're counting "uses" down below.
4129 // We want to be able to easily/cheaply access an object by simply
4130 // indexing into it, instead of having to search for it every time.
4131 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4132
4133 // Walk the objects we care about and mark them as such in our working
4134 // struct.
4135 for (auto &Obj : ObjectsToAllocate) {
4136 SortingObjects[Obj].IsValid = true;
4137 SortingObjects[Obj].ObjectIndex = Obj;
4138 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4139 // Set the size.
4140 int ObjectSize = MFI.getObjectSize(Obj);
4141 if (ObjectSize == 0)
4142 // Variable size. Just use 4.
4143 SortingObjects[Obj].ObjectSize = 4;
4144 else
4145 SortingObjects[Obj].ObjectSize = ObjectSize;
4146 }
4147
4148 // Count the number of uses for each object.
4149 for (auto &MBB : MF) {
4150 for (auto &MI : MBB) {
4151 if (MI.isDebugInstr())
4152 continue;
4153 for (const MachineOperand &MO : MI.operands()) {
4154 // Check to see if it's a local stack symbol.
4155 if (!MO.isFI())
4156 continue;
4157 int Index = MO.getIndex();
4158 // Check to see if it falls within our range, and is tagged
4159 // to require ordering.
4160 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4161 SortingObjects[Index].IsValid)
4162 SortingObjects[Index].ObjectNumUses++;
4163 }
4164 }
4165 }
4166
4167 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4168 // info).
4169 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4170
4171 // Now modify the original list to represent the final order that
4172 // we want. The order will depend on whether we're going to access them
4173 // from the stack pointer or the frame pointer. For SP, the list should
4174 // end up with the END containing objects that we want with smaller offsets.
4175 // For FP, it should be flipped.
4176 int i = 0;
4177 for (auto &Obj : SortingObjects) {
4178 // All invalid items are sorted at the end, so it's safe to stop.
4179 if (!Obj.IsValid)
4180 break;
4181 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4182 }
4183
4184 // Flip it if we're accessing off of the FP.
4185 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4186 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4187}
4188
4189unsigned
4191 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4192 unsigned Offset = 16;
4193 // RBP is immediately pushed.
4194 Offset += SlotSize;
4195 // All callee-saved registers are then pushed.
4196 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4197 // Every funclet allocates enough stack space for the largest outgoing call.
4198 Offset += getWinEHFuncletFrameSize(MF);
4199 return Offset;
4200}
4201
4203 MachineFunction &MF, RegScavenger *RS) const {
4204 // Mark the function as not having WinCFI. We will set it back to true in
4205 // emitPrologue if it gets called and emits CFI.
4206 MF.setHasWinCFI(false);
4207
4208 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4209 // aligned. The format doesn't support misaligned stack adjustments.
4212
4213 // If this function isn't doing Win64-style C++ EH, we don't need to do
4214 // anything.
4215 if (STI.is64Bit() && MF.hasEHFunclets() &&
4218 adjustFrameForMsvcCxxEh(MF);
4219 }
4220}
4221
4222void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4223 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4224 // relative to RSP after the prologue. Find the offset of the last fixed
4225 // object, so that we can allocate a slot immediately following it. If there
4226 // were no fixed objects, use offset -SlotSize, which is immediately after the
4227 // return address. Fixed objects have negative frame indices.
4228 MachineFrameInfo &MFI = MF.getFrameInfo();
4229 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4230 int64_t MinFixedObjOffset = -SlotSize;
4231 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4232 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4233
4234 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4235 for (WinEHHandlerType &H : TBME.HandlerArray) {
4236 int FrameIndex = H.CatchObj.FrameIndex;
4237 if (FrameIndex != INT_MAX) {
4238 // Ensure alignment.
4239 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4240 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4241 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4242 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4243 }
4244 }
4245 }
4246
4247 // Ensure alignment.
4248 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4249 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4250 int UnwindHelpFI =
4251 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4252 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4253
4254 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4255 // other frame setup instructions.
4256 MachineBasicBlock &MBB = MF.front();
4257 auto MBBI = MBB.begin();
4258 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4259 ++MBBI;
4260
4262 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4263 UnwindHelpFI)
4264 .addImm(-2);
4265}
4266
4268 MachineFunction &MF, RegScavenger *RS) const {
4269 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4270
4271 if (STI.is32Bit() && MF.hasEHFunclets())
4273 // We have emitted prolog and epilog. Don't need stack pointer saving
4274 // instruction any more.
4275 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4276 MI->eraseFromParent();
4277 X86FI->setStackPtrSaveMI(nullptr);
4278 }
4279}
4280
4282 MachineFunction &MF) const {
4283 // 32-bit functions have to restore stack pointers when control is transferred
4284 // back to the parent function. These blocks are identified as eh pads that
4285 // are not funclet entries.
4286 bool IsSEH = isAsynchronousEHPersonality(
4288 for (MachineBasicBlock &MBB : MF) {
4289 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4290 if (NeedsRestore)
4292 /*RestoreSP=*/IsSEH);
4293 }
4294}
4295
4296// Compute the alignment gap between current SP after spilling FP/BP and the
4297// next properly aligned stack offset.
4299 const TargetRegisterClass *RC,
4300 unsigned NumSpilledRegs) {
4302 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4303 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4304 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4305 return AlignedSize - AllocSize;
4306}
4307
4308void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4310 Register FP, Register BP,
4311 int SPAdjust) const {
4312 assert(FP.isValid() || BP.isValid());
4313
4314 MachineBasicBlock *MBB = BeforeMI->getParent();
4315 DebugLoc DL = BeforeMI->getDebugLoc();
4316
4317 // Spill FP.
4318 if (FP.isValid()) {
4319 BuildMI(*MBB, BeforeMI, DL,
4321 .addReg(FP);
4322 }
4323
4324 // Spill BP.
4325 if (BP.isValid()) {
4326 BuildMI(*MBB, BeforeMI, DL,
4328 .addReg(BP);
4329 }
4330
4331 // Make sure SP is aligned.
4332 if (SPAdjust)
4333 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4334
4335 // Emit unwinding information.
4336 if (FP.isValid() && needsDwarfCFI(MF)) {
4337 // Emit .cfi_remember_state to remember old frame.
4338 unsigned CFIIndex =
4340 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4341 .addCFIIndex(CFIIndex);
4342
4343 // Setup new CFA value with DW_CFA_def_cfa_expression:
4344 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4345 SmallString<64> CfaExpr;
4346 uint8_t buffer[16];
4347 int Offset = SPAdjust;
4348 if (BP.isValid())
4349 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4350 // If BeforeMI is a frame setup instruction, we need to adjust the position
4351 // and offset of the new cfi instruction.
4352 if (TII.isFrameSetup(*BeforeMI)) {
4353 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4354 BeforeMI = std::next(BeforeMI);
4355 }
4357 if (STI.isTarget64BitILP32())
4359 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4360 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4361 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4362 CfaExpr.push_back(dwarf::DW_OP_deref);
4363 CfaExpr.push_back(dwarf::DW_OP_consts);
4364 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4365 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4366
4367 SmallString<64> DefCfaExpr;
4368 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4369 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4370 DefCfaExpr.append(CfaExpr.str());
4371 BuildCFI(*MBB, BeforeMI, DL,
4372 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4374 }
4375}
4376
4377void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4379 Register FP, Register BP,
4380 int SPAdjust) const {
4381 assert(FP.isValid() || BP.isValid());
4382
4383 // Adjust SP so it points to spilled FP or BP.
4384 MachineBasicBlock *MBB = AfterMI->getParent();
4385 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4386 DebugLoc DL = AfterMI->getDebugLoc();
4387 if (SPAdjust)
4388 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4389
4390 // Restore BP.
4391 if (BP.isValid()) {
4392 BuildMI(*MBB, Pos, DL,
4393 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4394 }
4395
4396 // Restore FP.
4397 if (FP.isValid()) {
4398 BuildMI(*MBB, Pos, DL,
4400
4401 // Emit unwinding information.
4402 if (needsDwarfCFI(MF)) {
4403 // Restore original frame with .cfi_restore_state.
4404 unsigned CFIIndex =
4406 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4407 .addCFIIndex(CFIIndex);
4408 }
4409 }
4410}
4411
4412void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4414 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4415 assert(SpillFP || SpillBP);
4416
4417 Register FP, BP;
4418 const TargetRegisterClass *RC;
4419 unsigned NumRegs = 0;
4420
4421 if (SpillFP) {
4422 FP = TRI->getFrameRegister(MF);
4423 if (STI.isTarget64BitILP32())
4425 RC = TRI->getMinimalPhysRegClass(FP);
4426 ++NumRegs;
4427 }
4428 if (SpillBP) {
4429 BP = TRI->getBaseRegister();
4430 if (STI.isTarget64BitILP32())
4431 BP = Register(getX86SubSuperRegister(BP, 64));
4432 RC = TRI->getMinimalPhysRegClass(BP);
4433 ++NumRegs;
4434 }
4435 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4436
4437 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4438 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4439}
4440
4441bool X86FrameLowering::skipSpillFPBP(
4443 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4444 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4445 // SaveRbx = COPY RBX
4446 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4447 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4448 // We should skip this instruction sequence.
4449 int FI;
4450 unsigned Reg;
4451 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4452 MI->getOperand(1).getReg() == X86::RBX) &&
4453 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4454 ++MI;
4455 return true;
4456 }
4457 return false;
4458}
4459
4461 const TargetRegisterInfo *TRI, bool &AccessFP,
4462 bool &AccessBP) {
4463 AccessFP = AccessBP = false;
4464 if (FP) {
4465 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4466 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4467 AccessFP = true;
4468 }
4469 if (BP) {
4470 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4471 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4472 AccessBP = true;
4473 }
4474 return AccessFP || AccessBP;
4475}
4476
4477// Invoke instruction has been lowered to normal function call. We try to figure
4478// out if MI comes from Invoke.
4479// Do we have any better method?
4480static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4481 if (!MI.isCall())
4482 return false;
4483 if (InsideEHLabels)
4484 return true;
4485
4486 const MachineBasicBlock *MBB = MI.getParent();
4487 if (!MBB->hasEHPadSuccessor())
4488 return false;
4489
4490 // Check if there is another call instruction from MI to the end of MBB.
4492 for (++MBBI; MBBI != ME; ++MBBI)
4493 if (MBBI->isCall())
4494 return false;
4495 return true;
4496}
4497
4498/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4499/// interfered stack access in the range, usually generated by register spill.
4500void X86FrameLowering::checkInterferedAccess(
4502 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4503 bool SpillBP) const {
4504 if (DefMI == KillMI)
4505 return;
4506 if (TRI->hasBasePointer(MF)) {
4507 if (!SpillBP)
4508 return;
4509 } else {
4510 if (!SpillFP)
4511 return;
4512 }
4513
4514 auto MI = KillMI;
4515 while (MI != DefMI) {
4516 if (any_of(MI->operands(),
4517 [](const MachineOperand &MO) { return MO.isFI(); }))
4519 "Interference usage of base pointer/frame "
4520 "pointer.");
4521 MI++;
4522 }
4523}
4524
4525/// If a function uses base pointer and the base pointer is clobbered by inline
4526/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4527/// contains garbage value.
4528/// For example if a 32b x86 function uses base pointer esi, and esi is
4529/// clobbered by following inline asm
4530/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4531/// We need to save esi before the asm and restore it after the asm.
4532///
4533/// The problem can also occur to frame pointer if there is a function call, and
4534/// the callee uses a different calling convention and clobbers the fp.
4535///
4536/// Because normal frame objects (spill slots) are accessed through fp/bp
4537/// register, so we can't spill fp/bp to normal spill slots.
4538///
4539/// FIXME: There are 2 possible enhancements:
4540/// 1. In many cases there are different physical registers not clobbered by
4541/// inline asm, we can use one of them as base pointer. Or use a virtual
4542/// register as base pointer and let RA allocate a physical register to it.
4543/// 2. If there is no other instructions access stack with fp/bp from the
4544/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4545/// skip the save and restore operations.
4547 Register FP, BP;
4549 if (TFI.hasFP(MF))
4550 FP = TRI->getFrameRegister(MF);
4551 if (TRI->hasBasePointer(MF))
4552 BP = TRI->getBaseRegister();
4553
4554 // Currently only inline asm and function call can clobbers fp/bp. So we can
4555 // do some quick test and return early.
4556 if (!MF.hasInlineAsm()) {
4558 if (!X86FI->getFPClobberedByCall())
4559 FP = 0;
4560 if (!X86FI->getBPClobberedByCall())
4561 BP = 0;
4562 }
4563 if (!FP && !BP)
4564 return;
4565
4566 for (MachineBasicBlock &MBB : MF) {
4567 bool InsideEHLabels = false;
4568 auto MI = MBB.rbegin(), ME = MBB.rend();
4569 auto TermMI = MBB.getFirstTerminator();
4570 if (TermMI == MBB.begin())
4571 continue;
4572 MI = *(std::prev(TermMI));
4573
4574 while (MI != ME) {
4575 // Skip frame setup/destroy instructions.
4576 // Skip Invoke (call inside try block) instructions.
4577 // Skip instructions handled by target.
4578 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4580 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4581 ++MI;
4582 continue;
4583 }
4584
4585 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4586 InsideEHLabels = !InsideEHLabels;
4587 ++MI;
4588 continue;
4589 }
4590
4591 bool AccessFP, AccessBP;
4592 // Check if fp or bp is used in MI.
4593 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4594 ++MI;
4595 continue;
4596 }
4597
4598 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4599 // used.
4600 bool FPLive = false, BPLive = false;
4601 bool SpillFP = false, SpillBP = false;
4602 auto DefMI = MI, KillMI = MI;
4603 do {
4604 SpillFP |= AccessFP;
4605 SpillBP |= AccessBP;
4606
4607 // Maintain FPLive and BPLive.
4608 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4609 FPLive = false;
4610 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4611 FPLive = true;
4612 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4613 BPLive = false;
4614 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4615 BPLive = true;
4616
4617 DefMI = MI++;
4618 } while ((MI != ME) &&
4619 (FPLive || BPLive ||
4620 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4621
4622 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4623 if (FPLive && !SpillBP)
4624 continue;
4625
4626 // If the bp is clobbered by a call, we should save and restore outside of
4627 // the frame setup instructions.
4628 if (KillMI->isCall() && DefMI != ME) {
4629 auto FrameSetup = std::next(DefMI);
4630 // Look for frame setup instruction toward the start of the BB.
4631 // If we reach another call instruction, it means no frame setup
4632 // instruction for the current call instruction.
4633 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4634 !FrameSetup->isCall())
4635 ++FrameSetup;
4636 // If a frame setup instruction is found, we need to find out the
4637 // corresponding frame destroy instruction.
4638 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4639 (TII.getFrameSize(*FrameSetup) ||
4640 TII.getFrameAdjustment(*FrameSetup))) {
4641 while (!TII.isFrameInstr(*KillMI))
4642 --KillMI;
4643 DefMI = FrameSetup;
4644 MI = DefMI;
4645 ++MI;
4646 }
4647 }
4648
4649 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4650
4651 // Call target function to spill and restore FP and BP registers.
4652 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4653 }
4654 }
4655}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
constexpr int64_t MaxSPChunk
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:911
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1048
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:359
size_t arg_size() const
Definition: Function.h:907
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:688
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:661
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:582
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:693
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:656
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition: MCDwarf.h:676
OpType getOperation() const
Definition: MCDwarf.h:710
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:687
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:598
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition: MCDwarf.h:681
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1073
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1073
A single uniqued string.
Definition: Metadata.h:724
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:71
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:580
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:501
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:297
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:597
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
A tuple of MDNodes.
Definition: Metadata.h:1737
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:121
Represents a location in source code.
Definition: SMLoc.h:23
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
static constexpr size_t npos
Definition: StringRef.h:53
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:655
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:588
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:454
Value * getValue() const
Definition: Metadata.h:494
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int64_t AddOffset, bool doMergeWithPrevious) const
Equivalent to: mergeSPUpdates(MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:329
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:287
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:305
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:285
bool isTargetWin64() const
Definition: X86Subtarget.h:333
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:395
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:309
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:346
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:286
bool isTargetNaCl64() const
Definition: X86Subtarget.h:301
bool isTargetWin32() const
Definition: X86Subtarget.h:335
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:295
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:556
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:194
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
union llvm::TargetFrameLowering::DwarfFrameBase::@249 Location
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76