LLVM 22.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Module.h"
33#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/MC/MCSymbol.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
46
47using namespace llvm;
48
50 MaybeAlign StackAlignOverride)
51 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
52 STI.is64Bit() ? -8 : -4),
53 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
54 // Cache a bunch of frame-related predicates for this subtarget.
56 Is64Bit = STI.is64Bit();
58 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
61}
62
64 return !MF.getFrameInfo().hasVarSizedObjects() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
66 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
67}
68
69/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
70/// call frame pseudos can be simplified. Having a FP, as in the default
71/// implementation, is not sufficient here since we can't always use it.
72/// Use a more nuanced condition.
74 const MachineFunction &MF) const {
75 return hasReservedCallFrame(MF) ||
76 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
77 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
79}
80
81// needsFrameIndexResolution - Do we need to perform FI resolution for
82// this function. Normally, this is required only when the function
83// has any stack objects. However, FI resolution actually has another job,
84// not apparent from the title - it resolves callframesetup/destroy
85// that were not simplified earlier.
86// So, this is required for x86 functions that have push sequences even
87// when there are no stack objects.
89 const MachineFunction &MF) const {
90 return MF.getFrameInfo().hasStackObjects() ||
91 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
92}
93
94/// hasFPImpl - Return true if the specified function should have a dedicated
95/// frame pointer register. This is true if the function has variable sized
96/// allocas or if frame pointer elimination is disabled.
98 const MachineFrameInfo &MFI = MF.getFrameInfo();
99 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
100 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
104 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
105 MFI.hasStackMap() || MFI.hasPatchPoint() ||
106 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
107}
108
109static unsigned getSUBriOpcode(bool IsLP64) {
110 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
111}
112
113static unsigned getADDriOpcode(bool IsLP64) {
114 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
115}
116
117static unsigned getSUBrrOpcode(bool IsLP64) {
118 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
119}
120
121static unsigned getADDrrOpcode(bool IsLP64) {
122 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
123}
124
125static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
126 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
127}
128
129static unsigned getLEArOpcode(bool IsLP64) {
130 return IsLP64 ? X86::LEA64r : X86::LEA32r;
131}
132
133static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
134 if (Use64BitReg) {
135 if (isUInt<32>(Imm))
136 return X86::MOV32ri64;
137 if (isInt<32>(Imm))
138 return X86::MOV64ri32;
139 return X86::MOV64ri;
140 }
141 return X86::MOV32ri;
142}
143
144// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
145// value written by the PUSH from the stack. The processor tracks these marked
146// instructions internally and fast-forwards register data between matching PUSH
147// and POP instructions, without going through memory or through the training
148// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
149// memory-renaming optimization can be used.
150//
151// The PPX hint is purely a performance hint. Instructions with this hint have
152// the same functional semantics as those without. PPX hints set by the
153// compiler that violate the balancing rule may turn off the PPX optimization,
154// but they will not affect program semantics.
155//
156// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
157// are not considered).
158//
159// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
160// GPRs at a time to/from the stack.
161static unsigned getPUSHOpcode(const X86Subtarget &ST) {
162 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
163 : X86::PUSH32r;
164}
165static unsigned getPOPOpcode(const X86Subtarget &ST) {
166 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
167 : X86::POP32r;
168}
169static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
170 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
171}
172static unsigned getPOP2Opcode(const X86Subtarget &ST) {
173 return ST.hasPPX() ? X86::POP2P : X86::POP2;
174}
175
178 MCRegister Reg = RegMask.PhysReg;
179
180 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
181 Reg == X86::AH || Reg == X86::AL)
182 return true;
183 }
184
185 return false;
186}
187
188/// Check if the flags need to be preserved before the terminators.
189/// This would be the case, if the eflags is live-in of the region
190/// composed by the terminators or live-out of that region, without
191/// being defined by a terminator.
192static bool
194 for (const MachineInstr &MI : MBB.terminators()) {
195 bool BreakNext = false;
196 for (const MachineOperand &MO : MI.operands()) {
197 if (!MO.isReg())
198 continue;
199 Register Reg = MO.getReg();
200 if (Reg != X86::EFLAGS)
201 continue;
202
203 // This terminator needs an eflags that is not defined
204 // by a previous another terminator:
205 // EFLAGS is live-in of the region composed by the terminators.
206 if (!MO.isDef())
207 return true;
208 // This terminator defines the eflags, i.e., we don't need to preserve it.
209 // However, we still need to check this specific terminator does not
210 // read a live-in value.
211 BreakNext = true;
212 }
213 // We found a definition of the eflags, no need to preserve them.
214 if (BreakNext)
215 return false;
216 }
217
218 // None of the terminators use or define the eflags.
219 // Check if they are live-out, that would imply we need to preserve them.
220 for (const MachineBasicBlock *Succ : MBB.successors())
221 if (Succ->isLiveIn(X86::EFLAGS))
222 return true;
223
224 return false;
225}
226
227constexpr int64_t MaxSPChunk = (1LL << 31) - 1;
228
229/// emitSPUpdate - Emit a series of instructions to increment / decrement the
230/// stack pointer by a constant value.
233 const DebugLoc &DL, int64_t NumBytes,
234 bool InEpilogue) const {
235 bool isSub = NumBytes < 0;
236 uint64_t Offset = isSub ? -NumBytes : NumBytes;
239
240 if (!Uses64BitFramePtr && !isUInt<32>(Offset)) {
241 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
242 // This might be unreachable code, so don't complain now; just trap if
243 // it's reached at runtime.
244 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
245 return;
246 }
247
248 uint64_t Chunk = MaxSPChunk;
249
253 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
254
255 // It's ok to not take into account large chunks when probing, as the
256 // allocation is split in smaller chunks anyway.
257 if (EmitInlineStackProbe && !InEpilogue) {
258
259 // This pseudo-instruction is going to be expanded, potentially using a
260 // loop, by inlineStackProbe().
261 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
262 return;
263 } else if (Offset > Chunk) {
264 // Rather than emit a long series of instructions for large offsets,
265 // load the offset into a register and do one sub/add
266 unsigned Reg = 0;
267 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
268
269 if (isSub && !isEAXLiveIn(MBB))
270 Reg = Rax;
271 else
273 Uses64BitFramePtr ? 64 : 32);
274
275 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
277 if (Reg) {
279 Reg)
280 .addImm(Offset)
281 .setMIFlag(Flag);
282 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
284 .addReg(Reg);
285 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
286 return;
287 } else if (Offset > 8 * Chunk) {
288 // If we would need more than 8 add or sub instructions (a >16GB stack
289 // frame), it's worth spilling RAX to materialize this immediate.
290 // pushq %rax
291 // movabsq +-$Offset+-SlotSize, %rax
292 // addq %rsp, %rax
293 // xchg %rax, (%rsp)
294 // movq (%rsp), %rsp
295 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
296 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
298 .setMIFlag(Flag);
299 // Subtract is not commutative, so negate the offset and always use add.
300 // Subtract 8 less and add 8 more to account for the PUSH we just did.
301 if (isSub)
302 Offset = -(Offset - SlotSize);
303 else
306 Rax)
307 .addImm(Offset)
308 .setMIFlag(Flag);
309 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
310 .addReg(Rax)
312 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
313 // Exchange the new SP in RAX with the top of the stack.
315 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
316 StackPtr, false, 0);
317 // Load new SP from the top of the stack into RSP.
318 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
319 StackPtr, false, 0);
320 return;
321 }
322 }
323
324 while (Offset) {
325 uint64_t ThisVal = std::min(Offset, Chunk);
326 if (ThisVal == SlotSize) {
327 // Use push / pop for slot sized adjustments as a size optimization. We
328 // need to find a dead register when using pop.
329 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
331 if (Reg) {
332 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
333 : (Is64Bit ? X86::POP64r : X86::POP32r);
334 BuildMI(MBB, MBBI, DL, TII.get(Opc))
335 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
336 .setMIFlag(Flag);
337 Offset -= ThisVal;
338 continue;
339 }
340 }
341
342 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
343 .setMIFlag(Flag);
344
345 Offset -= ThisVal;
346 }
347}
348
349MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
351 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
352 assert(Offset != 0 && "zero offset stack adjustment requested");
353
354 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
355 // is tricky.
356 bool UseLEA;
357 if (!InEpilogue) {
358 // Check if inserting the prologue at the beginning
359 // of MBB would require to use LEA operations.
360 // We need to use LEA operations if EFLAGS is live in, because
361 // it means an instruction will read it before it gets defined.
362 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
363 } else {
364 // If we can use LEA for SP but we shouldn't, check that none
365 // of the terminators uses the eflags. Otherwise we will insert
366 // a ADD that will redefine the eflags and break the condition.
367 // Alternatively, we could move the ADD, but this may not be possible
368 // and is an optimization anyway.
370 if (UseLEA && !STI.useLeaForSP())
372 // If that assert breaks, that means we do not do the right thing
373 // in canUseAsEpilogue.
375 "We shouldn't have allowed this insertion point");
376 }
377
379 if (UseLEA) {
382 StackPtr),
383 StackPtr, false, Offset);
384 } else {
385 bool IsSub = Offset < 0;
386 uint64_t AbsOffset = IsSub ? -Offset : Offset;
387 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
389 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
391 .addImm(AbsOffset);
392 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
393 }
394 return MI;
395}
396
397template <typename FoundT, typename CalcT>
398int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
400 FoundT FoundStackAdjust,
401 CalcT CalcNewOffset,
402 bool doMergeWithPrevious) const {
403 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
404 (!doMergeWithPrevious && MBBI == MBB.end()))
405 return CalcNewOffset(0);
406
407 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
408
410 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
411 // instruction, and that there are no DBG_VALUE or other instructions between
412 // ADD/SUB/LEA and its corresponding CFI instruction.
413 /* TODO: Add support for the case where there are multiple CFI instructions
414 below the ADD/SUB/LEA, e.g.:
415 ...
416 add
417 cfi_def_cfa_offset
418 cfi_offset
419 ...
420 */
421 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
422 PI = std::prev(PI);
423
424 int64_t Offset = 0;
425 for (;;) {
426 unsigned Opc = PI->getOpcode();
427
428 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
429 PI->getOperand(0).getReg() == StackPtr) {
430 assert(PI->getOperand(1).getReg() == StackPtr);
431 Offset = PI->getOperand(2).getImm();
432 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
433 PI->getOperand(0).getReg() == StackPtr &&
434 PI->getOperand(1).getReg() == StackPtr &&
435 PI->getOperand(2).getImm() == 1 &&
436 PI->getOperand(3).getReg() == X86::NoRegister &&
437 PI->getOperand(5).getReg() == X86::NoRegister) {
438 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
439 Offset = PI->getOperand(4).getImm();
440 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
441 PI->getOperand(0).getReg() == StackPtr) {
442 assert(PI->getOperand(1).getReg() == StackPtr);
443 Offset = -PI->getOperand(2).getImm();
444 } else
445 return CalcNewOffset(0);
446
447 FoundStackAdjust(PI, Offset);
448 if (std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
449 break;
450
451 if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
452 return CalcNewOffset(0);
453
454 PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
455 }
456
457 PI = MBB.erase(PI);
458 if (PI != MBB.end() && PI->isCFIInstruction()) {
459 auto CIs = MBB.getParent()->getFrameInstructions();
460 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
463 PI = MBB.erase(PI);
464 }
465 if (!doMergeWithPrevious)
467
468 return CalcNewOffset(Offset);
469}
470
473 int64_t AddOffset,
474 bool doMergeWithPrevious) const {
475 return mergeSPUpdates(
476 MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },
477 doMergeWithPrevious);
478}
479
482 const DebugLoc &DL,
483 const MCCFIInstruction &CFIInst,
484 MachineInstr::MIFlag Flag) const {
486 unsigned CFIIndex = MF.addFrameInst(CFIInst);
487
489 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
490
491 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
492 .addCFIIndex(CFIIndex)
493 .setMIFlag(Flag);
494}
495
496/// Emits Dwarf Info specifying offsets of callee saved registers and
497/// frame pointer. This is called only when basic block sections are enabled.
501 if (!hasFP(MF)) {
503 return;
504 }
507 const Register MachineFramePtr =
509 : FramePtr;
510 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
511 // Offset = space for return address + size of the frame pointer itself.
512 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
514 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
516}
517
520 const DebugLoc &DL, bool IsPrologue) const {
522 MachineFrameInfo &MFI = MF.getFrameInfo();
525
526 // Add callee saved registers to move list.
527 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
528
529 // Calculate offsets.
530 for (const CalleeSavedInfo &I : CSI) {
531 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
532 MCRegister Reg = I.getReg();
533 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
534
535 if (IsPrologue) {
536 if (X86FI->getStackPtrSaveMI()) {
537 // +2*SlotSize because there is return address and ebp at the bottom
538 // of the stack.
539 // | retaddr |
540 // | ebp |
541 // | |<--ebp
542 Offset += 2 * SlotSize;
543 SmallString<64> CfaExpr;
544 CfaExpr.push_back(dwarf::DW_CFA_expression);
545 uint8_t buffer[16];
546 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
547 CfaExpr.push_back(2);
549 const Register MachineFramePtr =
552 : FramePtr;
553 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
554 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
555 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
557 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
559 } else {
561 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
562 }
563 } else {
565 MCCFIInstruction::createRestore(nullptr, DwarfReg));
566 }
567 }
568 if (auto *MI = X86FI->getStackPtrSaveMI()) {
569 int FI = MI->getOperand(1).getIndex();
570 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
571 SmallString<64> CfaExpr;
573 const Register MachineFramePtr =
576 : FramePtr;
577 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
578 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
579 uint8_t buffer[16];
580 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
581 CfaExpr.push_back(dwarf::DW_OP_deref);
582
583 SmallString<64> DefCfaExpr;
584 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
585 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
586 DefCfaExpr.append(CfaExpr.str());
587 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
589 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
591 }
592}
593
594void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
595 MachineBasicBlock &MBB) const {
596 const MachineFunction &MF = *MBB.getParent();
597
598 // Insertion point.
600
601 // Fake a debug loc.
602 DebugLoc DL;
603 if (MBBI != MBB.end())
604 DL = MBBI->getDebugLoc();
605
606 // Zero out FP stack if referenced. Do this outside of the loop below so that
607 // it's done only once.
608 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
609 for (MCRegister Reg : RegsToZero.set_bits()) {
610 if (!X86::RFP80RegClass.contains(Reg))
611 continue;
612
613 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
614 for (unsigned i = 0; i != NumFPRegs; ++i)
615 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
616
617 for (unsigned i = 0; i != NumFPRegs; ++i)
618 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
619 break;
620 }
621
622 // For GPRs, we only care to clear out the 32-bit register.
623 BitVector GPRsToZero(TRI->getNumRegs());
624 for (MCRegister Reg : RegsToZero.set_bits())
625 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
626 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
627 RegsToZero.reset(Reg);
628 }
629
630 // Zero out the GPRs first.
631 for (MCRegister Reg : GPRsToZero.set_bits())
633
634 // Zero out the remaining registers.
635 for (MCRegister Reg : RegsToZero.set_bits())
637}
638
641 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
642 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
645 if (InProlog) {
646 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
647 .addImm(0 /* no explicit stack size */);
648 } else {
649 emitStackProbeInline(MF, MBB, MBBI, DL, false);
650 }
651 } else {
652 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
653 }
654}
655
657 return STI.isOSWindows() && !STI.isTargetWin64();
658}
659
661 MachineBasicBlock &PrologMBB) const {
662 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
663 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
664 });
665 if (Where != PrologMBB.end()) {
666 DebugLoc DL = PrologMBB.findDebugLoc(Where);
667 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
668 Where->eraseFromParent();
669 }
670}
671
672void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
675 const DebugLoc &DL,
676 bool InProlog) const {
678 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
679 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
680 else
681 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
682}
683
684void X86FrameLowering::emitStackProbeInlineGeneric(
686 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
687 MachineInstr &AllocWithProbe = *MBBI;
688 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
689
692 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
693 "different expansion expected for CoreCLR 64 bit");
694
695 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
696 uint64_t ProbeChunk = StackProbeSize * 8;
697
698 uint64_t MaxAlign =
699 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
700
701 // Synthesize a loop or unroll it, depending on the number of iterations.
702 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
703 // between the unaligned rsp and current rsp.
704 if (Offset > ProbeChunk) {
705 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
706 MaxAlign % StackProbeSize);
707 } else {
708 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
709 MaxAlign % StackProbeSize);
710 }
711}
712
713void X86FrameLowering::emitStackProbeInlineGenericBlock(
716 uint64_t AlignOffset) const {
717
718 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
719 const bool HasFP = hasFP(MF);
722 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
723 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
724
725 uint64_t CurrentOffset = 0;
726
727 assert(AlignOffset < StackProbeSize);
728
729 // If the offset is so small it fits within a page, there's nothing to do.
730 if (StackProbeSize < Offset + AlignOffset) {
731
732 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
733 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
735 if (!HasFP && NeedsDwarfCFI) {
736 BuildCFI(
737 MBB, MBBI, DL,
738 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
739 }
740
741 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
743 StackPtr, false, 0)
744 .addImm(0)
746 NumFrameExtraProbe++;
747 CurrentOffset = StackProbeSize - AlignOffset;
748 }
749
750 // For the next N - 1 pages, just probe. I tried to take advantage of
751 // natural probes but it implies much more logic and there was very few
752 // interesting natural probes to interleave.
753 while (CurrentOffset + StackProbeSize < Offset) {
754 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
756
757 if (!HasFP && NeedsDwarfCFI) {
758 BuildCFI(
759 MBB, MBBI, DL,
760 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
761 }
762 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
764 StackPtr, false, 0)
765 .addImm(0)
767 NumFrameExtraProbe++;
768 CurrentOffset += StackProbeSize;
769 }
770
771 // No need to probe the tail, it is smaller than a Page.
772 uint64_t ChunkSize = Offset - CurrentOffset;
773 if (ChunkSize == SlotSize) {
774 // Use push for slot sized adjustments as a size optimization,
775 // like emitSPUpdate does when not probing.
776 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
777 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
778 BuildMI(MBB, MBBI, DL, TII.get(Opc))
781 } else {
782 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
784 }
785 // No need to adjust Dwarf CFA offset here, the last position of the stack has
786 // been defined
787}
788
789void X86FrameLowering::emitStackProbeInlineGenericLoop(
792 uint64_t AlignOffset) const {
793 assert(Offset && "null offset");
794
795 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
797 "Inline stack probe loop will clobber live EFLAGS.");
798
799 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
800 const bool HasFP = hasFP(MF);
803 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
804 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
805
806 if (AlignOffset) {
807 if (AlignOffset < StackProbeSize) {
808 // Perform a first smaller allocation followed by a probe.
809 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
811
812 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
814 StackPtr, false, 0)
815 .addImm(0)
817 NumFrameExtraProbe++;
818 Offset -= AlignOffset;
819 }
820 }
821
822 // Synthesize a loop
823 NumFrameLoopProbe++;
824 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
825
826 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
827 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
828
830 MF.insert(MBBIter, testMBB);
831 MF.insert(MBBIter, tailMBB);
832
833 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
834 : Is64Bit ? X86::R11D
835 : X86::EAX;
836
837 // save loop bound
838 {
839 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
840
841 // Can we calculate the loop bound using SUB with a 32-bit immediate?
842 // Note that the immediate gets sign-extended when used with a 64-bit
843 // register, so in that case we only have 31 bits to work with.
844 bool canUseSub =
845 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
846
847 if (canUseSub) {
848 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
849
850 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
853 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
854 .addReg(FinalStackProbed)
855 .addImm(BoundOffset)
857 } else if (Uses64BitFramePtr) {
858 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
859 .addImm(-BoundOffset)
861 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
862 .addReg(FinalStackProbed)
865 } else {
866 llvm_unreachable("Offset too large for 32-bit stack pointer");
867 }
868
869 // while in the loop, use loop-invariant reg for CFI,
870 // instead of the stack pointer, which changes during the loop
871 if (!HasFP && NeedsDwarfCFI) {
872 // x32 uses the same DWARF register numbers as x86-64,
873 // so there isn't a register number for r11d, we must use r11 instead
874 const Register DwarfFinalStackProbed =
876 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
877 : FinalStackProbed;
878
881 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
883 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
884 }
885 }
886
887 // allocate a page
888 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
889 /*InEpilogue=*/false)
891
892 // touch the page
893 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
895 StackPtr, false, 0)
896 .addImm(0)
898
899 // cmp with stack pointer bound
900 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
902 .addReg(FinalStackProbed)
904
905 // jump
906 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
907 .addMBB(testMBB)
910 testMBB->addSuccessor(testMBB);
911 testMBB->addSuccessor(tailMBB);
912
913 // BB management
914 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
916 MBB.addSuccessor(testMBB);
917
918 // handle tail
919 const uint64_t TailOffset = Offset % StackProbeSize;
920 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
921 if (TailOffset) {
922 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
923 /*InEpilogue=*/false)
925 }
926
927 // after the loop, switch back to stack pointer for CFI
928 if (!HasFP && NeedsDwarfCFI) {
929 // x32 uses the same DWARF register numbers as x86-64,
930 // so there isn't a register number for esp, we must use rsp instead
931 const Register DwarfStackPtr =
935
936 BuildCFI(*tailMBB, TailMBBIter, DL,
938 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
939 }
940
941 // Update Live In information
942 fullyRecomputeLiveIns({tailMBB, testMBB});
943}
944
945void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
947 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
949 assert(STI.is64Bit() && "different expansion needed for 32 bit");
950 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
952 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
953
954 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
956 "Inline stack probe loop will clobber live EFLAGS.");
957
958 // RAX contains the number of bytes of desired stack adjustment.
959 // The handling here assumes this value has already been updated so as to
960 // maintain stack alignment.
961 //
962 // We need to exit with RSP modified by this amount and execute suitable
963 // page touches to notify the OS that we're growing the stack responsibly.
964 // All stack probing must be done without modifying RSP.
965 //
966 // MBB:
967 // SizeReg = RAX;
968 // ZeroReg = 0
969 // CopyReg = RSP
970 // Flags, TestReg = CopyReg - SizeReg
971 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
972 // LimitReg = gs magic thread env access
973 // if FinalReg >= LimitReg goto ContinueMBB
974 // RoundBB:
975 // RoundReg = page address of FinalReg
976 // LoopMBB:
977 // LoopReg = PHI(LimitReg,ProbeReg)
978 // ProbeReg = LoopReg - PageSize
979 // [ProbeReg] = 0
980 // if (ProbeReg > RoundReg) goto LoopMBB
981 // ContinueMBB:
982 // RSP = RSP - RAX
983 // [rest of original MBB]
984
985 // Set up the new basic blocks
986 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
987 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
988 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
989
990 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
991 MF.insert(MBBIter, RoundMBB);
992 MF.insert(MBBIter, LoopMBB);
993 MF.insert(MBBIter, ContinueMBB);
994
995 // Split MBB and move the tail portion down to ContinueMBB.
996 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
997 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
999
1000 // Some useful constants
1001 const int64_t ThreadEnvironmentStackLimit = 0x10;
1002 const int64_t PageSize = 0x1000;
1003 const int64_t PageMask = ~(PageSize - 1);
1004
1005 // Registers we need. For the normal case we use virtual
1006 // registers. For the prolog expansion we use RAX, RCX and RDX.
1008 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
1009 const Register
1010 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
1011 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1012 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1013 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1014 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1015 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1016 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1017 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1018 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
1019
1020 // SP-relative offsets where we can save RCX and RDX.
1021 int64_t RCXShadowSlot = 0;
1022 int64_t RDXShadowSlot = 0;
1023
1024 // If inlining in the prolog, save RCX and RDX.
1025 if (InProlog) {
1026 // Compute the offsets. We need to account for things already
1027 // pushed onto the stack at this point: return address, frame
1028 // pointer (if used), and callee saves.
1030 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1031 const bool HasFP = hasFP(MF);
1032
1033 // Check if we need to spill RCX and/or RDX.
1034 // Here we assume that no earlier prologue instruction changes RCX and/or
1035 // RDX, so checking the block live-ins is enough.
1036 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1037 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1038 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1039 // Assign the initial slot to both registers, then change RDX's slot if both
1040 // need to be spilled.
1041 if (IsRCXLiveIn)
1042 RCXShadowSlot = InitSlot;
1043 if (IsRDXLiveIn)
1044 RDXShadowSlot = InitSlot;
1045 if (IsRDXLiveIn && IsRCXLiveIn)
1046 RDXShadowSlot += 8;
1047 // Emit the saves if needed.
1048 if (IsRCXLiveIn)
1049 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1050 RCXShadowSlot)
1051 .addReg(X86::RCX);
1052 if (IsRDXLiveIn)
1053 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1054 RDXShadowSlot)
1055 .addReg(X86::RDX);
1056 } else {
1057 // Not in the prolog. Copy RAX to a virtual reg.
1058 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1059 }
1060
1061 // Add code to MBB to check for overflow and set the new target stack pointer
1062 // to zero if so.
1063 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1064 .addReg(ZeroReg, RegState::Undef)
1065 .addReg(ZeroReg, RegState::Undef);
1066 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1067 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1068 .addReg(CopyReg)
1069 .addReg(SizeReg);
1070 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1071 .addReg(TestReg)
1072 .addReg(ZeroReg)
1074
1075 // FinalReg now holds final stack pointer value, or zero if
1076 // allocation would overflow. Compare against the current stack
1077 // limit from the thread environment block. Note this limit is the
1078 // lowest touched page on the stack, not the point at which the OS
1079 // will cause an overflow exception, so this is just an optimization
1080 // to avoid unnecessarily touching pages that are below the current
1081 // SP but already committed to the stack by the OS.
1082 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1083 .addReg(0)
1084 .addImm(1)
1085 .addReg(0)
1086 .addImm(ThreadEnvironmentStackLimit)
1087 .addReg(X86::GS);
1088 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1089 // Jump if the desired stack pointer is at or above the stack limit.
1090 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1091 .addMBB(ContinueMBB)
1093
1094 // Add code to roundMBB to round the final stack pointer to a page boundary.
1095 if (InProlog)
1096 RoundMBB->addLiveIn(FinalReg);
1097 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1098 .addReg(FinalReg)
1099 .addImm(PageMask);
1100 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1101
1102 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1103 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1104 // and probe until we reach RoundedReg.
1105 if (!InProlog) {
1106 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1107 .addReg(LimitReg)
1108 .addMBB(RoundMBB)
1109 .addReg(ProbeReg)
1110 .addMBB(LoopMBB);
1111 }
1112
1113 if (InProlog)
1114 LoopMBB->addLiveIn(JoinReg);
1115 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1116 false, -PageSize);
1117
1118 // Probe by storing a byte onto the stack.
1119 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1120 .addReg(ProbeReg)
1121 .addImm(1)
1122 .addReg(0)
1123 .addImm(0)
1124 .addReg(0)
1125 .addImm(0);
1126
1127 if (InProlog)
1128 LoopMBB->addLiveIn(RoundedReg);
1129 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1130 .addReg(RoundedReg)
1131 .addReg(ProbeReg);
1132 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1133 .addMBB(LoopMBB)
1135
1136 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1137
1138 // If in prolog, restore RDX and RCX.
1139 if (InProlog) {
1140 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1141 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1142 TII.get(X86::MOV64rm), X86::RCX),
1143 X86::RSP, false, RCXShadowSlot);
1144 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1145 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1146 TII.get(X86::MOV64rm), X86::RDX),
1147 X86::RSP, false, RDXShadowSlot);
1148 }
1149
1150 // Now that the probing is done, add code to continueMBB to update
1151 // the stack pointer for real.
1152 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1153 .addReg(X86::RSP)
1154 .addReg(SizeReg);
1155
1156 // Add the control flow edges we need.
1157 MBB.addSuccessor(ContinueMBB);
1158 MBB.addSuccessor(RoundMBB);
1159 RoundMBB->addSuccessor(LoopMBB);
1160 LoopMBB->addSuccessor(ContinueMBB);
1161 LoopMBB->addSuccessor(LoopMBB);
1162
1163 if (InProlog) {
1164 LivePhysRegs LiveRegs;
1165 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1166 }
1167
1168 // Mark all the instructions added to the prolog as frame setup.
1169 if (InProlog) {
1170 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1171 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1172 }
1173 for (MachineInstr &MI : *RoundMBB) {
1175 }
1176 for (MachineInstr &MI : *LoopMBB) {
1178 }
1179 for (MachineInstr &MI :
1180 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1182 }
1183 }
1184}
1185
1186void X86FrameLowering::emitStackProbeCall(
1188 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1189 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1190 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1191
1192 // FIXME: Add indirect thunk support and remove this.
1193 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1194 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1195 "code model and indirect thunks not yet implemented.");
1196
1197 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1199 "Stack probe calls will clobber live EFLAGS.");
1200
1201 unsigned CallOp;
1202 if (Is64Bit)
1203 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1204 else
1205 CallOp = X86::CALLpcrel32;
1206
1208
1210 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1211
1212 // All current stack probes take AX and SP as input, clobber flags, and
1213 // preserve all registers. x86_64 probes leave RSP unmodified.
1215 // For the large code model, we have to call through a register. Use R11,
1216 // as it is scratch in all supported calling conventions.
1217 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1219 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1220 } else {
1221 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1223 }
1224
1225 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1226 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1232
1233 MachineInstr *ModInst = CI;
1234 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1235 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1236 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1237 // themselves. They also does not clobber %rax so we can reuse it when
1238 // adjusting %rsp.
1239 // All other platforms do not specify a particular ABI for the stack probe
1240 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1241 ModInst =
1243 .addReg(SP)
1244 .addReg(AX);
1245 }
1246
1247 // DebugInfo variable locations -- if there's an instruction number for the
1248 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1249 // modifies SP.
1250 if (InstrNum) {
1251 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1252 // Label destination operand of the subtract.
1253 MF.makeDebugValueSubstitution(*InstrNum,
1254 {ModInst->getDebugInstrNum(), 0});
1255 } else {
1256 // Label the call. The operand number is the penultimate operand, zero
1257 // based.
1258 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1260 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1261 }
1262 }
1263
1264 if (InProlog) {
1265 // Apply the frame setup flag to all inserted instrs.
1266 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1267 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1268 }
1269}
1270
1271static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1272 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1273 // and might require smaller successive adjustments.
1274 const uint64_t Win64MaxSEHOffset = 128;
1275 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1276 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1277 return SEHFrameOffset & -16;
1278}
1279
1280// If we're forcing a stack realignment we can't rely on just the frame
1281// info, we need to know the ABI stack alignment as well in case we
1282// have a call out. Otherwise just make sure we have some alignment - we'll
1283// go with the minimum SlotSize.
1285X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1286 const MachineFrameInfo &MFI = MF.getFrameInfo();
1287 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1288 Align StackAlign = getStackAlign();
1289 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1290 if (HasRealign) {
1291 if (MFI.hasCalls())
1292 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1293 else if (MaxAlign < SlotSize)
1294 MaxAlign = Align(SlotSize);
1295 }
1296
1298 if (HasRealign)
1299 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1300 else
1301 MaxAlign = Align(16);
1302 }
1303 return MaxAlign.value();
1304}
1305
1306void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1308 const DebugLoc &DL, Register Reg,
1309 uint64_t MaxAlign) const {
1310 uint64_t Val = -MaxAlign;
1311 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1312
1313 MachineFunction &MF = *MBB.getParent();
1315 const X86TargetLowering &TLI = *STI.getTargetLowering();
1316 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1317 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1318
1319 // We want to make sure that (in worst case) less than StackProbeSize bytes
1320 // are not probed after the AND. This assumption is used in
1321 // emitStackProbeInlineGeneric.
1322 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1323 {
1324 NumFrameLoopProbe++;
1325 MachineBasicBlock *entryMBB =
1327 MachineBasicBlock *headMBB =
1329 MachineBasicBlock *bodyMBB =
1331 MachineBasicBlock *footMBB =
1333
1335 MF.insert(MBBIter, entryMBB);
1336 MF.insert(MBBIter, headMBB);
1337 MF.insert(MBBIter, bodyMBB);
1338 MF.insert(MBBIter, footMBB);
1339 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1340 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1341 : Is64Bit ? X86::R11D
1342 : X86::EAX;
1343
1344 // Setup entry block
1345 {
1346
1347 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1348 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1351 MachineInstr *MI =
1352 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1353 .addReg(FinalStackProbed)
1354 .addImm(Val)
1356
1357 // The EFLAGS implicit def is dead.
1358 MI->getOperand(3).setIsDead();
1359
1360 BuildMI(entryMBB, DL,
1361 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1362 .addReg(FinalStackProbed)
1365 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1366 .addMBB(&MBB)
1369 entryMBB->addSuccessor(headMBB);
1370 entryMBB->addSuccessor(&MBB);
1371 }
1372
1373 // Loop entry block
1374
1375 {
1376 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1377 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1379 .addImm(StackProbeSize)
1381
1382 BuildMI(headMBB, DL,
1383 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1385 .addReg(FinalStackProbed)
1387
1388 // jump to the footer if StackPtr < FinalStackProbed
1389 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1390 .addMBB(footMBB)
1393
1394 headMBB->addSuccessor(bodyMBB);
1395 headMBB->addSuccessor(footMBB);
1396 }
1397
1398 // setup loop body
1399 {
1400 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1402 StackPtr, false, 0)
1403 .addImm(0)
1405
1406 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1407 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1409 .addImm(StackProbeSize)
1411
1412 // cmp with stack pointer bound
1413 BuildMI(bodyMBB, DL,
1414 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1415 .addReg(FinalStackProbed)
1418
1419 // jump back while FinalStackProbed < StackPtr
1420 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1421 .addMBB(bodyMBB)
1424 bodyMBB->addSuccessor(bodyMBB);
1425 bodyMBB->addSuccessor(footMBB);
1426 }
1427
1428 // setup loop footer
1429 {
1430 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1431 .addReg(FinalStackProbed)
1433 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1435 StackPtr, false, 0)
1436 .addImm(0)
1438 footMBB->addSuccessor(&MBB);
1439 }
1440
1441 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1442 }
1443 } else {
1444 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1445 .addReg(Reg)
1446 .addImm(Val)
1448
1449 // The EFLAGS implicit def is dead.
1450 MI->getOperand(3).setIsDead();
1451 }
1452}
1453
1455 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1456 // clobbered by any interrupt handler.
1457 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1458 "MF used frame lowering for wrong subtarget");
1459 const Function &Fn = MF.getFunction();
1460 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1461 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1462}
1463
1464/// Return true if we need to use the restricted Windows x64 prologue and
1465/// epilogue code patterns that can be described with WinCFI (.seh_*
1466/// directives).
1467bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1468 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1469}
1470
1471bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1472 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1473}
1474
1475/// Return true if an opcode is part of the REP group of instructions
1476static bool isOpcodeRep(unsigned Opcode) {
1477 switch (Opcode) {
1478 case X86::REPNE_PREFIX:
1479 case X86::REP_MOVSB_32:
1480 case X86::REP_MOVSB_64:
1481 case X86::REP_MOVSD_32:
1482 case X86::REP_MOVSD_64:
1483 case X86::REP_MOVSQ_32:
1484 case X86::REP_MOVSQ_64:
1485 case X86::REP_MOVSW_32:
1486 case X86::REP_MOVSW_64:
1487 case X86::REP_PREFIX:
1488 case X86::REP_STOSB_32:
1489 case X86::REP_STOSB_64:
1490 case X86::REP_STOSD_32:
1491 case X86::REP_STOSD_64:
1492 case X86::REP_STOSQ_32:
1493 case X86::REP_STOSQ_64:
1494 case X86::REP_STOSW_32:
1495 case X86::REP_STOSW_64:
1496 return true;
1497 default:
1498 break;
1499 }
1500 return false;
1501}
1502
1503/// emitPrologue - Push callee-saved registers onto the stack, which
1504/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1505/// space for local variables. Also emit labels used by the exception handler to
1506/// generate the exception handling frames.
1507
1508/*
1509 Here's a gist of what gets emitted:
1510
1511 ; Establish frame pointer, if needed
1512 [if needs FP]
1513 push %rbp
1514 .cfi_def_cfa_offset 16
1515 .cfi_offset %rbp, -16
1516 .seh_pushreg %rpb
1517 mov %rsp, %rbp
1518 .cfi_def_cfa_register %rbp
1519
1520 ; Spill general-purpose registers
1521 [for all callee-saved GPRs]
1522 pushq %<reg>
1523 [if not needs FP]
1524 .cfi_def_cfa_offset (offset from RETADDR)
1525 .seh_pushreg %<reg>
1526
1527 ; If the required stack alignment > default stack alignment
1528 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1529 ; of unknown size in the stack frame.
1530 [if stack needs re-alignment]
1531 and $MASK, %rsp
1532
1533 ; Allocate space for locals
1534 [if target is Windows and allocated space > 4096 bytes]
1535 ; Windows needs special care for allocations larger
1536 ; than one page.
1537 mov $NNN, %rax
1538 call ___chkstk_ms/___chkstk
1539 sub %rax, %rsp
1540 [else]
1541 sub $NNN, %rsp
1542
1543 [if needs FP]
1544 .seh_stackalloc (size of XMM spill slots)
1545 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1546 [else]
1547 .seh_stackalloc NNN
1548
1549 ; Spill XMMs
1550 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1551 ; they may get spilled on any platform, if the current function
1552 ; calls @llvm.eh.unwind.init
1553 [if needs FP]
1554 [for all callee-saved XMM registers]
1555 movaps %<xmm reg>, -MMM(%rbp)
1556 [for all callee-saved XMM registers]
1557 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1558 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1559 [else]
1560 [for all callee-saved XMM registers]
1561 movaps %<xmm reg>, KKK(%rsp)
1562 [for all callee-saved XMM registers]
1563 .seh_savexmm %<xmm reg>, KKK
1564
1565 .seh_endprologue
1566
1567 [if needs base pointer]
1568 mov %rsp, %rbx
1569 [if needs to restore base pointer]
1570 mov %rsp, -MMM(%rbp)
1571
1572 ; Emit CFI info
1573 [if needs FP]
1574 [for all callee-saved registers]
1575 .cfi_offset %<reg>, (offset from %rbp)
1576 [else]
1577 .cfi_def_cfa_offset (offset from RETADDR)
1578 [for all callee-saved registers]
1579 .cfi_offset %<reg>, (offset from %rsp)
1580
1581 Notes:
1582 - .seh directives are emitted only for Windows 64 ABI
1583 - .cv_fpo directives are emitted on win32 when emitting CodeView
1584 - .cfi directives are emitted for all other ABIs
1585 - for 32-bit code, substitute %e?? registers for %r??
1586*/
1587
1589 MachineBasicBlock &MBB) const {
1590 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1591 "MF used frame lowering for wrong subtarget");
1593 MachineFrameInfo &MFI = MF.getFrameInfo();
1594 const Function &Fn = MF.getFunction();
1596 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1597 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1598 bool IsFunclet = MBB.isEHFuncletEntry();
1600 if (Fn.hasPersonalityFn())
1601 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1602 bool FnHasClrFunclet =
1603 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1604 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1605 bool HasFP = hasFP(MF);
1606 bool IsWin64Prologue = isWin64Prologue(MF);
1607 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1608 // FIXME: Emit FPO data for EH funclets.
1609 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1611 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1612 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1614 const Register MachineFramePtr =
1616 : FramePtr;
1617 Register BasePtr = TRI->getBaseRegister();
1618 bool HasWinCFI = false;
1619
1620 // Debug location must be unknown since the first debug location is used
1621 // to determine the end of the prologue.
1622 DebugLoc DL;
1623 Register ArgBaseReg;
1624
1625 // Emit extra prolog for argument stack slot reference.
1626 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1627 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1628 // Creat extra prolog for stack realignment.
1629 ArgBaseReg = MI->getOperand(0).getReg();
1630 // leal 4(%esp), %basereg
1631 // .cfi_def_cfa %basereg, 0
1632 // andl $-128, %esp
1633 // pushl -4(%basereg)
1634 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1635 ArgBaseReg)
1637 .addImm(1)
1638 .addUse(X86::NoRegister)
1640 .addUse(X86::NoRegister)
1642 if (NeedsDwarfCFI) {
1643 // .cfi_def_cfa %basereg, 0
1644 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1645 BuildCFI(MBB, MBBI, DL,
1646 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1648 }
1649 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1650 int64_t Offset = -(int64_t)SlotSize;
1651 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1652 .addReg(ArgBaseReg)
1653 .addImm(1)
1654 .addReg(X86::NoRegister)
1655 .addImm(Offset)
1656 .addReg(X86::NoRegister)
1658 }
1659
1660 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1661 // tail call.
1662 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1663 if (TailCallArgReserveSize && IsWin64Prologue)
1664 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1665
1666 const bool EmitStackProbeCall =
1668 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1669
1670 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1674 // The special symbol below is absolute and has a *value* suitable to be
1675 // combined with the frame pointer directly.
1676 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1677 .addUse(MachineFramePtr)
1678 .addUse(X86::RIP)
1679 .addImm(1)
1680 .addUse(X86::NoRegister)
1681 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1683 .addUse(X86::NoRegister);
1684 break;
1685 }
1686 [[fallthrough]];
1687
1689 assert(
1690 !IsWin64Prologue &&
1691 "win64 prologue does not set the bit 60 in the saved frame pointer");
1692 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1693 .addUse(MachineFramePtr)
1694 .addImm(60)
1696 break;
1697
1699 break;
1700 }
1701 }
1702
1703 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1704 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1705 // stack alignment.
1707 Fn.arg_size() == 2) {
1708 StackSize += 8;
1709 MFI.setStackSize(StackSize);
1710
1711 // Update the stack pointer by pushing a register. This is the instruction
1712 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1713 // Hard-coding the update to a push avoids emitting a second
1714 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1715 // probing isn't needed anyways for an 8-byte update.
1716 // Pushing a register leaves us in a similar situation to a regular
1717 // function call where we know that the address at (rsp-8) is writeable.
1718 // That way we avoid any off-by-ones with stack probing for additional
1719 // stack pointer updates later on.
1720 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1721 .addReg(X86::RAX, RegState::Undef)
1723 }
1724
1725 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1726 // function, and use up to 128 bytes of stack space, don't have a frame
1727 // pointer, calls, or dynamic alloca then we do not need to adjust the
1728 // stack pointer (we fit in the Red Zone). We also check that we don't
1729 // push and pop from the stack.
1730 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1731 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1732 !MFI.adjustsStack() && // No calls.
1733 !EmitStackProbeCall && // No stack probes.
1734 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1735 !MF.shouldSplitStack()) { // Regular stack
1736 uint64_t MinSize =
1738 if (HasFP)
1739 MinSize += SlotSize;
1740 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1741 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1742 MFI.setStackSize(StackSize);
1743 }
1744
1745 // Insert stack pointer adjustment for later moving of return addr. Only
1746 // applies to tail call optimized functions where the callee argument stack
1747 // size is bigger than the callers.
1748 if (TailCallArgReserveSize != 0) {
1749 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1750 /*InEpilogue=*/false)
1752 }
1753
1754 // Mapping for machine moves:
1755 //
1756 // DST: VirtualFP AND
1757 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1758 // ELSE => DW_CFA_def_cfa
1759 //
1760 // SRC: VirtualFP AND
1761 // DST: Register => DW_CFA_def_cfa_register
1762 //
1763 // ELSE
1764 // OFFSET < 0 => DW_CFA_offset_extended_sf
1765 // REG < 64 => DW_CFA_offset + Reg
1766 // ELSE => DW_CFA_offset_extended
1767
1768 uint64_t NumBytes = 0;
1769 int stackGrowth = -SlotSize;
1770
1771 // Find the funclet establisher parameter
1772 MCRegister Establisher;
1773 if (IsClrFunclet)
1774 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1775 else if (IsFunclet)
1776 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1777
1778 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1779 // Immediately spill establisher into the home slot.
1780 // The runtime cares about this.
1781 // MOV64mr %rdx, 16(%rsp)
1782 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1783 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1784 .addReg(Establisher)
1786 MBB.addLiveIn(Establisher);
1787 }
1788
1789 if (HasFP) {
1790 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1791
1792 // Calculate required stack adjustment.
1793 uint64_t FrameSize = StackSize - SlotSize;
1794 NumBytes =
1795 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1796
1797 // Callee-saved registers are pushed on stack before the stack is realigned.
1798 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1799 NumBytes = alignTo(NumBytes, MaxAlign);
1800
1801 // Save EBP/RBP into the appropriate stack slot.
1802 BuildMI(MBB, MBBI, DL,
1804 .addReg(MachineFramePtr, RegState::Kill)
1806
1807 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1808 // Mark the place where EBP/RBP was saved.
1809 // Define the current CFA rule to use the provided offset.
1810 assert(StackSize);
1811 BuildCFI(MBB, MBBI, DL,
1813 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1815
1816 // Change the rule for the FramePtr to be an "offset" rule.
1817 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1818 BuildCFI(MBB, MBBI, DL,
1819 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1820 2 * stackGrowth -
1821 (int)TailCallArgReserveSize),
1823 }
1824
1825 if (NeedsWinCFI) {
1826 HasWinCFI = true;
1827 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1830 }
1831
1832 if (!IsFunclet) {
1833 if (X86FI->hasSwiftAsyncContext()) {
1834 assert(!IsWin64Prologue &&
1835 "win64 prologue does not store async context right below rbp");
1836 const auto &Attrs = MF.getFunction().getAttributes();
1837
1838 // Before we update the live frame pointer we have to ensure there's a
1839 // valid (or null) asynchronous context in its slot just before FP in
1840 // the frame record, so store it now.
1841 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1842 // We have an initial context in r14, store it just before the frame
1843 // pointer.
1844 MBB.addLiveIn(X86::R14);
1845 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1846 .addReg(X86::R14)
1848 } else {
1849 // No initial context, store null so that there's no pointer that
1850 // could be misused.
1851 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1852 .addImm(0)
1854 }
1855
1856 if (NeedsWinCFI) {
1857 HasWinCFI = true;
1858 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1859 .addImm(X86::R14)
1861 }
1862
1863 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1864 .addUse(X86::RSP)
1865 .addImm(1)
1866 .addUse(X86::NoRegister)
1867 .addImm(8)
1868 .addUse(X86::NoRegister)
1870 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1871 .addUse(X86::RSP)
1872 .addImm(8)
1874 }
1875
1876 if (!IsWin64Prologue && !IsFunclet) {
1877 // Update EBP with the new base value.
1878 if (!X86FI->hasSwiftAsyncContext())
1879 BuildMI(MBB, MBBI, DL,
1880 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1881 FramePtr)
1884
1885 if (NeedsDwarfCFI) {
1886 if (ArgBaseReg.isValid()) {
1887 SmallString<64> CfaExpr;
1888 CfaExpr.push_back(dwarf::DW_CFA_expression);
1889 uint8_t buffer[16];
1890 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1891 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1892 CfaExpr.push_back(2);
1893 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1894 CfaExpr.push_back(0);
1895 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1896 BuildCFI(MBB, MBBI, DL,
1897 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1899 } else {
1900 // Mark effective beginning of when frame pointer becomes valid.
1901 // Define the current CFA to use the EBP/RBP register.
1902 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1903 BuildCFI(
1904 MBB, MBBI, DL,
1905 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1907 }
1908 }
1909
1910 if (NeedsWinFPO) {
1911 // .cv_fpo_setframe $FramePtr
1912 HasWinCFI = true;
1913 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1915 .addImm(0)
1917 }
1918 }
1919 }
1920 } else {
1921 assert(!IsFunclet && "funclets without FPs not yet implemented");
1922 NumBytes =
1923 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1924 }
1925
1926 // Update the offset adjustment, which is mainly used by codeview to translate
1927 // from ESP to VFRAME relative local variable offsets.
1928 if (!IsFunclet) {
1929 if (HasFP && TRI->hasStackRealignment(MF))
1930 MFI.setOffsetAdjustment(-NumBytes);
1931 else
1932 MFI.setOffsetAdjustment(-StackSize);
1933 }
1934
1935 // For EH funclets, only allocate enough space for outgoing calls. Save the
1936 // NumBytes value that we would've used for the parent frame.
1937 unsigned ParentFrameNumBytes = NumBytes;
1938 if (IsFunclet)
1939 NumBytes = getWinEHFuncletFrameSize(MF);
1940
1941 // Skip the callee-saved push instructions.
1942 bool PushedRegs = false;
1943 int StackOffset = 2 * stackGrowth;
1945 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1946 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1947 return false;
1948 unsigned Opc = MBBI->getOpcode();
1949 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1950 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1951 };
1952
1953 while (IsCSPush(MBBI)) {
1954 PushedRegs = true;
1955 Register Reg = MBBI->getOperand(0).getReg();
1956 LastCSPush = MBBI;
1957 ++MBBI;
1958 unsigned Opc = LastCSPush->getOpcode();
1959
1960 if (!HasFP && NeedsDwarfCFI) {
1961 // Mark callee-saved push instruction.
1962 // Define the current CFA rule to use the provided offset.
1963 assert(StackSize);
1964 // Compared to push, push2 introduces more stack offset (one more
1965 // register).
1966 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1967 StackOffset += stackGrowth;
1968 BuildCFI(MBB, MBBI, DL,
1971 StackOffset += stackGrowth;
1972 }
1973
1974 if (NeedsWinCFI) {
1975 HasWinCFI = true;
1976 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1977 .addImm(Reg)
1979 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1980 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1981 .addImm(LastCSPush->getOperand(1).getReg())
1983 }
1984 }
1985
1986 // Realign stack after we pushed callee-saved registers (so that we'll be
1987 // able to calculate their offsets from the frame pointer).
1988 // Don't do this for Win64, it needs to realign the stack after the prologue.
1989 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1990 !ArgBaseReg.isValid()) {
1991 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1992 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1993
1994 if (NeedsWinCFI) {
1995 HasWinCFI = true;
1996 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1997 .addImm(MaxAlign)
1999 }
2000 }
2001
2002 // If there is an SUB32ri of ESP immediately before this instruction, merge
2003 // the two. This can be the case when tail call elimination is enabled and
2004 // the callee has more arguments than the caller.
2005 NumBytes = mergeSPUpdates(
2006 MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2007 true);
2008
2009 // Adjust stack pointer: ESP -= numbytes.
2010
2011 // Windows and cygwin/mingw require a prologue helper routine when allocating
2012 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
2013 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
2014 // stack and adjust the stack pointer in one go. The 64-bit version of
2015 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
2016 // responsible for adjusting the stack pointer. Touching the stack at 4K
2017 // increments is necessary to ensure that the guard pages used by the OS
2018 // virtual memory manager are allocated in correct sequence.
2019 uint64_t AlignedNumBytes = NumBytes;
2020 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
2021 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
2022 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
2023 assert(!X86FI->getUsesRedZone() &&
2024 "The Red Zone is not accounted for in stack probes");
2025
2026 // Check whether EAX is livein for this block.
2027 bool isEAXAlive = isEAXLiveIn(MBB);
2028
2029 if (isEAXAlive) {
2030 if (Is64Bit) {
2031 // Save RAX
2032 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2033 .addReg(X86::RAX, RegState::Kill)
2035 } else {
2036 // Save EAX
2037 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2038 .addReg(X86::EAX, RegState::Kill)
2040 }
2041 }
2042
2043 if (Is64Bit) {
2044 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2045 // Function prologue is responsible for adjusting the stack pointer.
2046 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2047 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2048 .addImm(Alloc)
2050 } else {
2051 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2052 // We'll also use 4 already allocated bytes for EAX.
2053 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2054 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2056 }
2057
2058 // Call __chkstk, __chkstk_ms, or __alloca.
2059 emitStackProbe(MF, MBB, MBBI, DL, true);
2060
2061 if (isEAXAlive) {
2062 // Restore RAX/EAX
2064 if (Is64Bit)
2065 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2066 StackPtr, false, NumBytes - 8);
2067 else
2068 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2069 StackPtr, false, NumBytes - 4);
2070 MI->setFlag(MachineInstr::FrameSetup);
2071 MBB.insert(MBBI, MI);
2072 }
2073 } else if (NumBytes) {
2074 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2075 }
2076
2077 if (NeedsWinCFI && NumBytes) {
2078 HasWinCFI = true;
2079 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2080 .addImm(NumBytes)
2082 }
2083
2084 int SEHFrameOffset = 0;
2085 Register SPOrEstablisher;
2086 if (IsFunclet) {
2087 if (IsClrFunclet) {
2088 // The establisher parameter passed to a CLR funclet is actually a pointer
2089 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2090 // to find the root function establisher frame by loading the PSPSym from
2091 // the intermediate frame.
2092 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2093 MachinePointerInfo NoInfo;
2094 MBB.addLiveIn(Establisher);
2095 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2096 Establisher, false, PSPSlotOffset)
2099 ;
2100 // Save the root establisher back into the current funclet's (mostly
2101 // empty) frame, in case a sub-funclet or the GC needs it.
2102 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2103 false, PSPSlotOffset)
2104 .addReg(Establisher)
2106 NoInfo,
2109 }
2110 SPOrEstablisher = Establisher;
2111 } else {
2112 SPOrEstablisher = StackPtr;
2113 }
2114
2115 if (IsWin64Prologue && HasFP) {
2116 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2117 // this calculation on the incoming establisher, which holds the value of
2118 // RSP from the parent frame at the end of the prologue.
2119 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2120 if (SEHFrameOffset)
2121 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2122 SPOrEstablisher, false, SEHFrameOffset);
2123 else
2124 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2125 .addReg(SPOrEstablisher);
2126
2127 // If this is not a funclet, emit the CFI describing our frame pointer.
2128 if (NeedsWinCFI && !IsFunclet) {
2129 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2130 HasWinCFI = true;
2131 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2133 .addImm(SEHFrameOffset)
2135 if (isAsynchronousEHPersonality(Personality))
2136 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2137 }
2138 } else if (IsFunclet && STI.is32Bit()) {
2139 // Reset EBP / ESI to something good for funclets.
2141 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2142 // into the registration node so that the runtime will restore it for us.
2143 if (!MBB.isCleanupFuncletEntry()) {
2144 assert(Personality == EHPersonality::MSVC_CXX);
2145 Register FrameReg;
2147 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2148 // ESP is the first field, so no extra displacement is needed.
2149 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2150 false, EHRegOffset)
2151 .addReg(X86::ESP);
2152 }
2153 }
2154
2155 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2156 const MachineInstr &FrameInstr = *MBBI;
2157 ++MBBI;
2158
2159 if (NeedsWinCFI) {
2160 int FI;
2161 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2162 if (X86::FR64RegClass.contains(Reg)) {
2163 int Offset;
2164 Register IgnoredFrameReg;
2165 if (IsWin64Prologue && IsFunclet)
2166 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2167 else
2168 Offset =
2169 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2170 SEHFrameOffset;
2171
2172 HasWinCFI = true;
2173 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2174 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2175 .addImm(Reg)
2176 .addImm(Offset)
2178 }
2179 }
2180 }
2181 }
2182
2183 if (NeedsWinCFI && HasWinCFI)
2184 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2186
2187 if (FnHasClrFunclet && !IsFunclet) {
2188 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2189 // immediately after the prolog) into the PSPSlot so that funclets
2190 // and the GC can recover it.
2191 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2192 auto PSPInfo = MachinePointerInfo::getFixedStack(
2194 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2195 PSPSlotOffset)
2200 }
2201
2202 // Realign stack after we spilled callee-saved registers (so that we'll be
2203 // able to calculate their offsets from the frame pointer).
2204 // Win64 requires aligning the stack after the prologue.
2205 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2206 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2207 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2208 }
2209
2210 // We already dealt with stack realignment and funclets above.
2211 if (IsFunclet && STI.is32Bit())
2212 return;
2213
2214 // If we need a base pointer, set it up here. It's whatever the value
2215 // of the stack pointer is at this point. Any variable size objects
2216 // will be allocated after this, so we can still use the base pointer
2217 // to reference locals.
2218 if (TRI->hasBasePointer(MF)) {
2219 // Update the base pointer with the current stack pointer.
2220 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2221 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2222 .addReg(SPOrEstablisher)
2224 if (X86FI->getRestoreBasePointer()) {
2225 // Stash value of base pointer. Saving RSP instead of EBP shortens
2226 // dependence chain. Used by SjLj EH.
2227 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2228 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2230 .addReg(SPOrEstablisher)
2232 }
2233
2234 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2235 // Stash the value of the frame pointer relative to the base pointer for
2236 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2237 // it recovers the frame pointer from the base pointer rather than the
2238 // other way around.
2239 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2240 Register UsedReg;
2241 int Offset =
2242 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2243 .getFixed();
2244 assert(UsedReg == BasePtr);
2245 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2248 }
2249 }
2250 if (ArgBaseReg.isValid()) {
2251 // Save argument base pointer.
2252 auto *MI = X86FI->getStackPtrSaveMI();
2253 int FI = MI->getOperand(1).getIndex();
2254 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2255 // movl %basereg, offset(%ebp)
2256 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2257 .addReg(ArgBaseReg)
2259 }
2260
2261 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2262 // Mark end of stack pointer adjustment.
2263 if (!HasFP && NumBytes) {
2264 // Define the current CFA rule to use the provided offset.
2265 assert(StackSize);
2266 BuildCFI(
2267 MBB, MBBI, DL,
2268 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2270 }
2271
2272 // Emit DWARF info specifying the offsets of the callee-saved registers.
2274 }
2275
2276 // X86 Interrupt handling function cannot assume anything about the direction
2277 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2278 // in each prologue of interrupt handler function.
2279 //
2280 // Create "cld" instruction only in these cases:
2281 // 1. The interrupt handling function uses any of the "rep" instructions.
2282 // 2. Interrupt handling function calls another function.
2283 // 3. If there are any inline asm blocks, as we do not know what they do
2284 //
2285 // TODO: We should also emit cld if we detect the use of std, but as of now,
2286 // the compiler does not even emit that instruction or even define it, so in
2287 // practice, this would only happen with inline asm, which we cover anyway.
2289 bool NeedsCLD = false;
2290
2291 for (const MachineBasicBlock &B : MF) {
2292 for (const MachineInstr &MI : B) {
2293 if (MI.isCall()) {
2294 NeedsCLD = true;
2295 break;
2296 }
2297
2298 if (isOpcodeRep(MI.getOpcode())) {
2299 NeedsCLD = true;
2300 break;
2301 }
2302
2303 if (MI.isInlineAsm()) {
2304 // TODO: Parse asm for rep instructions or call sites?
2305 // For now, let's play it safe and emit a cld instruction
2306 // just in case.
2307 NeedsCLD = true;
2308 break;
2309 }
2310 }
2311 }
2312
2313 if (NeedsCLD) {
2314 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2316 }
2317 }
2318
2319 // At this point we know if the function has WinCFI or not.
2320 MF.setHasWinCFI(HasWinCFI);
2321}
2322
2324 const MachineFunction &MF) const {
2325 // We can't use LEA instructions for adjusting the stack pointer if we don't
2326 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2327 // to deallocate the stack.
2328 // This means that we can use LEA for SP in two situations:
2329 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2330 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2331 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2332}
2333
2335 switch (MI.getOpcode()) {
2336 case X86::CATCHRET:
2337 case X86::CLEANUPRET:
2338 return true;
2339 default:
2340 return false;
2341 }
2342 llvm_unreachable("impossible");
2343}
2344
2345// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2346// stack. It holds a pointer to the bottom of the root function frame. The
2347// establisher frame pointer passed to a nested funclet may point to the
2348// (mostly empty) frame of its parent funclet, but it will need to find
2349// the frame of the root function to access locals. To facilitate this,
2350// every funclet copies the pointer to the bottom of the root function
2351// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2352// same offset for the PSPSym in the root function frame that's used in the
2353// funclets' frames allows each funclet to dynamically accept any ancestor
2354// frame as its establisher argument (the runtime doesn't guarantee the
2355// immediate parent for some reason lost to history), and also allows the GC,
2356// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2357// frame with only a single offset reported for the entire method.
2358unsigned
2359X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2360 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2362 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2363 /*IgnoreSPUpdates*/ true)
2364 .getFixed();
2365 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2366 return static_cast<unsigned>(Offset);
2367}
2368
2369unsigned
2370X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2372 // This is the size of the pushed CSRs.
2373 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2374 // This is the size of callee saved XMMs.
2375 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2376 unsigned XMMSize =
2377 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2378 // This is the amount of stack a funclet needs to allocate.
2379 unsigned UsedSize;
2380 EHPersonality Personality =
2382 if (Personality == EHPersonality::CoreCLR) {
2383 // CLR funclets need to hold enough space to include the PSPSym, at the
2384 // same offset from the stack pointer (immediately after the prolog) as it
2385 // resides at in the main function.
2386 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2387 } else {
2388 // Other funclets just need enough stack for outgoing call arguments.
2389 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2390 }
2391 // RBP is not included in the callee saved register block. After pushing RBP,
2392 // everything is 16 byte aligned. Everything we allocate before an outgoing
2393 // call must also be 16 byte aligned.
2394 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2395 // Subtract out the size of the callee saved registers. This is how much stack
2396 // each funclet will allocate.
2397 return FrameSizeMinusRBP + XMMSize - CSSize;
2398}
2399
2400static bool isTailCallOpcode(unsigned Opc) {
2401 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2402 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2403 Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2404 Opc == X86::TCRETURNmi64;
2405}
2406
2408 MachineBasicBlock &MBB) const {
2409 const MachineFrameInfo &MFI = MF.getFrameInfo();
2412 MachineBasicBlock::iterator MBBI = Terminator;
2413 DebugLoc DL;
2414 if (MBBI != MBB.end())
2415 DL = MBBI->getDebugLoc();
2416 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
2417 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2419 Register MachineFramePtr =
2420 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2421
2422 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2423 bool NeedsWin64CFI =
2424 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2425 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2426
2427 // Get the number of bytes to allocate from the FrameInfo.
2428 uint64_t StackSize = MFI.getStackSize();
2429 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2430 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2431 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2432 bool HasFP = hasFP(MF);
2433 uint64_t NumBytes = 0;
2434
2435 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2437 !MF.getTarget().getTargetTriple().isUEFI()) &&
2438 MF.needsFrameMoves();
2439
2440 Register ArgBaseReg;
2441 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2442 unsigned Opc = X86::LEA32r;
2443 Register StackReg = X86::ESP;
2444 ArgBaseReg = MI->getOperand(0).getReg();
2445 if (STI.is64Bit()) {
2446 Opc = X86::LEA64r;
2447 StackReg = X86::RSP;
2448 }
2449 // leal -4(%basereg), %esp
2450 // .cfi_def_cfa %esp, 4
2451 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2452 .addUse(ArgBaseReg)
2453 .addImm(1)
2454 .addUse(X86::NoRegister)
2455 .addImm(-(int64_t)SlotSize)
2456 .addUse(X86::NoRegister)
2458 if (NeedsDwarfCFI) {
2459 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2460 BuildCFI(MBB, MBBI, DL,
2461 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2463 --MBBI;
2464 }
2465 --MBBI;
2466 }
2467
2468 if (IsFunclet) {
2469 assert(HasFP && "EH funclets without FP not yet implemented");
2470 NumBytes = getWinEHFuncletFrameSize(MF);
2471 } else if (HasFP) {
2472 // Calculate required stack adjustment.
2473 uint64_t FrameSize = StackSize - SlotSize;
2474 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2475
2476 // Callee-saved registers were pushed on stack before the stack was
2477 // realigned.
2478 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2479 NumBytes = alignTo(FrameSize, MaxAlign);
2480 } else {
2481 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2482 }
2483 uint64_t SEHStackAllocAmt = NumBytes;
2484
2485 // AfterPop is the position to insert .cfi_restore.
2487 if (HasFP) {
2488 if (X86FI->hasSwiftAsyncContext()) {
2489 // Discard the context.
2490 int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true);
2491 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2492 }
2493 // Pop EBP.
2494 BuildMI(MBB, MBBI, DL,
2496 MachineFramePtr)
2498
2499 // We need to reset FP to its untagged state on return. Bit 60 is currently
2500 // used to show the presence of an extended frame.
2501 if (X86FI->hasSwiftAsyncContext()) {
2502 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2503 .addUse(MachineFramePtr)
2504 .addImm(60)
2506 }
2507
2508 if (NeedsDwarfCFI) {
2509 if (!ArgBaseReg.isValid()) {
2510 unsigned DwarfStackPtr =
2511 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2512 BuildCFI(MBB, MBBI, DL,
2513 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2515 }
2516 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2517 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2518 BuildCFI(MBB, AfterPop, DL,
2519 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2521 --MBBI;
2522 --AfterPop;
2523 }
2524 --MBBI;
2525 }
2526 }
2527
2528 MachineBasicBlock::iterator FirstCSPop = MBBI;
2529 // Skip the callee-saved pop instructions.
2530 while (MBBI != MBB.begin()) {
2531 MachineBasicBlock::iterator PI = std::prev(MBBI);
2532 unsigned Opc = PI->getOpcode();
2533
2534 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2535 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2536 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2537 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2538 Opc != X86::POP2P && Opc != X86::LEA64r))
2539 break;
2540 FirstCSPop = PI;
2541 }
2542
2543 --MBBI;
2544 }
2545 if (ArgBaseReg.isValid()) {
2546 // Restore argument base pointer.
2547 auto *MI = X86FI->getStackPtrSaveMI();
2548 int FI = MI->getOperand(1).getIndex();
2549 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2550 // movl offset(%ebp), %basereg
2551 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2553 }
2554 MBBI = FirstCSPop;
2555
2556 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2557 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2558
2559 if (MBBI != MBB.end())
2560 DL = MBBI->getDebugLoc();
2561 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2562 // instruction, merge the two instructions.
2563 if (NumBytes || MFI.hasVarSizedObjects())
2564 NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true);
2565
2566 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2567 // slot before popping them off! Same applies for the case, when stack was
2568 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2569 // will not do realignment or dynamic stack allocation.
2570 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2571 !IsFunclet) {
2572 if (TRI->hasStackRealignment(MF))
2573 MBBI = FirstCSPop;
2574 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2575 uint64_t LEAAmount =
2576 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2577
2578 if (X86FI->hasSwiftAsyncContext())
2579 LEAAmount -= 16;
2580
2581 // There are only two legal forms of epilogue:
2582 // - add SEHAllocationSize, %rsp
2583 // - lea SEHAllocationSize(%FramePtr), %rsp
2584 //
2585 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2586 // However, we may use this sequence if we have a frame pointer because the
2587 // effects of the prologue can safely be undone.
2588 if (LEAAmount != 0) {
2591 false, LEAAmount);
2592 --MBBI;
2593 } else {
2594 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2596 --MBBI;
2597 }
2598 } else if (NumBytes) {
2599 // Adjust stack pointer back: ESP += numbytes.
2600 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2601 if (!HasFP && NeedsDwarfCFI) {
2602 // Define the current CFA rule to use the provided offset.
2603 BuildCFI(MBB, MBBI, DL,
2605 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2607 }
2608 --MBBI;
2609 }
2610
2611 if (NeedsWin64CFI && MF.hasWinCFI())
2612 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_BeginEpilogue));
2613
2614 if (!HasFP && NeedsDwarfCFI) {
2615 MBBI = FirstCSPop;
2616 int64_t Offset = -(int64_t)CSSize - SlotSize;
2617 // Mark callee-saved pop instruction.
2618 // Define the current CFA rule to use the provided offset.
2619 while (MBBI != MBB.end()) {
2621 unsigned Opc = PI->getOpcode();
2622 ++MBBI;
2623 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2624 Opc == X86::POP2 || Opc == X86::POP2P) {
2625 Offset += SlotSize;
2626 // Compared to pop, pop2 introduces more stack offset (one more
2627 // register).
2628 if (Opc == X86::POP2 || Opc == X86::POP2P)
2629 Offset += SlotSize;
2630 BuildCFI(MBB, MBBI, DL,
2633 }
2634 }
2635 }
2636
2637 // Emit DWARF info specifying the restores of the callee-saved registers.
2638 // For epilogue with return inside or being other block without successor,
2639 // no need to generate .cfi_restore for callee-saved registers.
2640 if (NeedsDwarfCFI && !MBB.succ_empty())
2641 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2642
2643 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2644 // Add the return addr area delta back since we are not tail calling.
2645 int64_t Delta = X86FI->getTCReturnAddrDelta();
2646 assert(Delta <= 0 && "TCDelta should never be positive");
2647 if (Delta) {
2648 // Check for possible merge with preceding ADD instruction.
2649 int64_t Offset = mergeSPAdd(MBB, Terminator, -Delta, true);
2650 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2651 }
2652 }
2653
2654 // Emit tilerelease for AMX kernel.
2656 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2657
2658 if (NeedsWin64CFI && MF.hasWinCFI())
2659 BuildMI(MBB, Terminator, DL, TII.get(X86::SEH_EndEpilogue));
2660}
2661
2663 int FI,
2664 Register &FrameReg) const {
2665 const MachineFrameInfo &MFI = MF.getFrameInfo();
2666
2667 bool IsFixed = MFI.isFixedObjectIndex(FI);
2668 // We can't calculate offset from frame pointer if the stack is realigned,
2669 // so enforce usage of stack/base pointer. The base pointer is used when we
2670 // have dynamic allocas in addition to dynamic realignment.
2671 if (TRI->hasBasePointer(MF))
2672 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2673 else if (TRI->hasStackRealignment(MF))
2674 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2675 else
2676 FrameReg = TRI->getFrameRegister(MF);
2677
2678 // Offset will hold the offset from the stack pointer at function entry to the
2679 // object.
2680 // We need to factor in additional offsets applied during the prologue to the
2681 // frame, base, and stack pointer depending on which is used.
2682 int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
2684 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2685 uint64_t StackSize = MFI.getStackSize();
2686 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2687 int64_t FPDelta = 0;
2688
2689 // In an x86 interrupt, remove the offset we added to account for the return
2690 // address from any stack object allocated in the caller's frame. Interrupts
2691 // do not have a standard return address. Fixed objects in the current frame,
2692 // such as SSE register spills, should not get this treatment.
2694 Offset >= 0) {
2696 }
2697
2698 if (IsWin64Prologue) {
2699 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2700
2701 // Calculate required stack adjustment.
2702 uint64_t FrameSize = StackSize - SlotSize;
2703 // If required, include space for extra hidden slot for stashing base
2704 // pointer.
2705 if (X86FI->getRestoreBasePointer())
2706 FrameSize += SlotSize;
2707 uint64_t NumBytes = FrameSize - CSSize;
2708
2709 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2710 if (FI && FI == X86FI->getFAIndex())
2711 return StackOffset::getFixed(-SEHFrameOffset);
2712
2713 // FPDelta is the offset from the "traditional" FP location of the old base
2714 // pointer followed by return address and the location required by the
2715 // restricted Win64 prologue.
2716 // Add FPDelta to all offsets below that go through the frame pointer.
2717 FPDelta = FrameSize - SEHFrameOffset;
2718 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2719 "FPDelta isn't aligned per the Win64 ABI!");
2720 }
2721
2722 if (FrameReg == TRI->getFramePtr()) {
2723 // Skip saved EBP/RBP
2724 Offset += SlotSize;
2725
2726 // Account for restricted Windows prologue.
2727 Offset += FPDelta;
2728
2729 // Skip the RETADDR move area
2730 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2731 if (TailCallReturnAddrDelta < 0)
2732 Offset -= TailCallReturnAddrDelta;
2733
2735 }
2736
2737 // FrameReg is either the stack pointer or a base pointer. But the base is
2738 // located at the end of the statically known StackSize so the distinction
2739 // doesn't really matter.
2740 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2741 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2742 return StackOffset::getFixed(Offset + StackSize);
2743}
2744
2746 Register &FrameReg) const {
2747 const MachineFrameInfo &MFI = MF.getFrameInfo();
2749 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2750 const auto it = WinEHXMMSlotInfo.find(FI);
2751
2752 if (it == WinEHXMMSlotInfo.end())
2753 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2754
2755 FrameReg = TRI->getStackRegister();
2757 it->second;
2758}
2759
2762 Register &FrameReg,
2763 int Adjustment) const {
2764 const MachineFrameInfo &MFI = MF.getFrameInfo();
2765 FrameReg = TRI->getStackRegister();
2766 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2767 getOffsetOfLocalArea() + Adjustment);
2768}
2769
2772 int FI, Register &FrameReg,
2773 bool IgnoreSPUpdates) const {
2774
2775 const MachineFrameInfo &MFI = MF.getFrameInfo();
2776 // Does not include any dynamic realign.
2777 const uint64_t StackSize = MFI.getStackSize();
2778 // LLVM arranges the stack as follows:
2779 // ...
2780 // ARG2
2781 // ARG1
2782 // RETADDR
2783 // PUSH RBP <-- RBP points here
2784 // PUSH CSRs
2785 // ~~~~~~~ <-- possible stack realignment (non-win64)
2786 // ...
2787 // STACK OBJECTS
2788 // ... <-- RSP after prologue points here
2789 // ~~~~~~~ <-- possible stack realignment (win64)
2790 //
2791 // if (hasVarSizedObjects()):
2792 // ... <-- "base pointer" (ESI/RBX) points here
2793 // DYNAMIC ALLOCAS
2794 // ... <-- RSP points here
2795 //
2796 // Case 1: In the simple case of no stack realignment and no dynamic
2797 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2798 // with fixed offsets from RSP.
2799 //
2800 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2801 // stack objects are addressed with RBP and regular stack objects with RSP.
2802 //
2803 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2804 // to address stack arguments for outgoing calls and nothing else. The "base
2805 // pointer" points to local variables, and RBP points to fixed objects.
2806 //
2807 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2808 // answer we give is relative to the SP after the prologue, and not the
2809 // SP in the middle of the function.
2810
2811 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2812 !STI.isTargetWin64())
2813 return getFrameIndexReference(MF, FI, FrameReg);
2814
2815 // If !hasReservedCallFrame the function might have SP adjustement in the
2816 // body. So, even though the offset is statically known, it depends on where
2817 // we are in the function.
2818 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2819 return getFrameIndexReference(MF, FI, FrameReg);
2820
2821 // We don't handle tail calls, and shouldn't be seeing them either.
2823 "we don't handle this case!");
2824
2825 // This is how the math works out:
2826 //
2827 // %rsp grows (i.e. gets lower) left to right. Each box below is
2828 // one word (eight bytes). Obj0 is the stack slot we're trying to
2829 // get to.
2830 //
2831 // ----------------------------------
2832 // | BP | Obj0 | Obj1 | ... | ObjN |
2833 // ----------------------------------
2834 // ^ ^ ^ ^
2835 // A B C E
2836 //
2837 // A is the incoming stack pointer.
2838 // (B - A) is the local area offset (-8 for x86-64) [1]
2839 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2840 //
2841 // |(E - B)| is the StackSize (absolute value, positive). For a
2842 // stack that grown down, this works out to be (B - E). [3]
2843 //
2844 // E is also the value of %rsp after stack has been set up, and we
2845 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2846 // (C - E) == (C - A) - (B - A) + (B - E)
2847 // { Using [1], [2] and [3] above }
2848 // == getObjectOffset - LocalAreaOffset + StackSize
2849
2850 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2851}
2852
2855 std::vector<CalleeSavedInfo> &CSI) const {
2856 MachineFrameInfo &MFI = MF.getFrameInfo();
2858
2859 unsigned CalleeSavedFrameSize = 0;
2860 unsigned XMMCalleeSavedFrameSize = 0;
2861 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2862 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2863
2864 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2865
2866 if (TailCallReturnAddrDelta < 0) {
2867 // create RETURNADDR area
2868 // arg
2869 // arg
2870 // RETADDR
2871 // { ...
2872 // RETADDR area
2873 // ...
2874 // }
2875 // [EBP]
2876 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2877 TailCallReturnAddrDelta - SlotSize, true);
2878 }
2879
2880 // Spill the BasePtr if it's used.
2881 if (this->TRI->hasBasePointer(MF)) {
2882 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2883 if (MF.hasEHFunclets()) {
2885 X86FI->setHasSEHFramePtrSave(true);
2886 X86FI->setSEHFramePtrSaveIndex(FI);
2887 }
2888 }
2889
2890 if (hasFP(MF)) {
2891 // emitPrologue always spills frame register the first thing.
2892 SpillSlotOffset -= SlotSize;
2893 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2894
2895 // The async context lives directly before the frame pointer, and we
2896 // allocate a second slot to preserve stack alignment.
2897 if (X86FI->hasSwiftAsyncContext()) {
2898 SpillSlotOffset -= SlotSize;
2899 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2900 SpillSlotOffset -= SlotSize;
2901 }
2902
2903 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2904 // the frame register, we can delete it from CSI list and not have to worry
2905 // about avoiding it later.
2907 for (unsigned i = 0; i < CSI.size(); ++i) {
2908 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2909 CSI.erase(CSI.begin() + i);
2910 break;
2911 }
2912 }
2913 }
2914
2915 // Strategy:
2916 // 1. Use push2 when
2917 // a) number of CSR > 1 if no need padding
2918 // b) number of CSR > 2 if need padding
2919 // c) stack alignment >= 16 bytes
2920 // 2. When the number of CSR push is odd
2921 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2922 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2923 // 3. When the number of CSR push is even, start to use push2 from the 1st
2924 // push and make the stack 16B aligned before the push
2925 unsigned NumRegsForPush2 = 0;
2926 if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
2927 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2928 return X86::GR64RegClass.contains(I.getReg());
2929 });
2930 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2931 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2932 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2933 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2934 if (X86FI->padForPush2Pop2()) {
2935 SpillSlotOffset -= SlotSize;
2936 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2937 }
2938 }
2939
2940 // Assign slots for GPRs. It increases frame size.
2941 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2942 MCRegister Reg = I.getReg();
2943
2944 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2945 continue;
2946
2947 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2948 // or only an odd number of registers in the candidates.
2949 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2950 (SpillSlotOffset % 16 == 0 ||
2951 X86FI->getNumCandidatesForPush2Pop2() % 2))
2952 X86FI->addCandidateForPush2Pop2(Reg);
2953
2954 SpillSlotOffset -= SlotSize;
2955 CalleeSavedFrameSize += SlotSize;
2956
2957 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2958 I.setFrameIdx(SlotIndex);
2959 }
2960
2961 // Adjust the offset of spill slot as we know the accurate callee saved frame
2962 // size.
2963 if (X86FI->getRestoreBasePointer()) {
2964 SpillSlotOffset -= SlotSize;
2965 CalleeSavedFrameSize += SlotSize;
2966
2967 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2968 // TODO: saving the slot index is better?
2969 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2970 }
2971 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2972 "Expect even candidates for push2/pop2");
2973 if (X86FI->getNumCandidatesForPush2Pop2())
2974 ++NumFunctionUsingPush2Pop2;
2975 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2976 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2977
2978 // Assign slots for XMMs.
2979 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2980 MCRegister Reg = I.getReg();
2981 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2982 continue;
2983
2984 // If this is k-register make sure we lookup via the largest legal type.
2985 MVT VT = MVT::Other;
2986 if (X86::VK16RegClass.contains(Reg))
2987 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2988
2989 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2990 unsigned Size = TRI->getSpillSize(*RC);
2991 Align Alignment = TRI->getSpillAlign(*RC);
2992 // ensure alignment
2993 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2994 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2995
2996 // spill into slot
2997 SpillSlotOffset -= Size;
2998 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2999 I.setFrameIdx(SlotIndex);
3000 MFI.ensureMaxAlignment(Alignment);
3001
3002 // Save the start offset and size of XMM in stack frame for funclets.
3003 if (X86::VR128RegClass.contains(Reg)) {
3004 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
3005 XMMCalleeSavedFrameSize += Size;
3006 }
3007 }
3008
3009 return true;
3010}
3011
3016
3017 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
3018 // for us, and there are no XMM CSRs on Win32.
3019 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
3020 return true;
3021
3022 // Push GPRs. It increases frame size.
3023 const MachineFunction &MF = *MBB.getParent();
3025 if (X86FI->padForPush2Pop2()) {
3026 assert(SlotSize == 8 && "Unexpected slot size for padding!");
3027 BuildMI(MBB, MI, DL, TII.get(X86::PUSH64r))
3028 .addReg(X86::RAX, RegState::Undef)
3030 }
3031
3032 // Update LiveIn of the basic block and decide whether we can add a kill flag
3033 // to the use.
3034 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3035 const MachineRegisterInfo &MRI = MF.getRegInfo();
3036 // Do not set a kill flag on values that are also marked as live-in. This
3037 // happens with the @llvm-returnaddress intrinsic and with arguments
3038 // passed in callee saved registers.
3039 // Omitting the kill flags is conservatively correct even if the live-in
3040 // is not used after all.
3041 if (MRI.isLiveIn(Reg))
3042 return false;
3043 MBB.addLiveIn(Reg);
3044 // Check if any subregister is live-in
3045 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3046 if (MRI.isLiveIn(*AReg))
3047 return false;
3048 return true;
3049 };
3050 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3051 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3052 };
3053
3054 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3055 MCRegister Reg = RI->getReg();
3056 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3057 continue;
3058
3059 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3060 MCRegister Reg2 = (++RI)->getReg();
3062 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3063 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3065 } else {
3066 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3067 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3069 }
3070 }
3071
3072 if (X86FI->getRestoreBasePointer()) {
3073 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3074 Register BaseReg = this->TRI->getBaseRegister();
3075 BuildMI(MBB, MI, DL, TII.get(Opc))
3076 .addReg(BaseReg, getKillRegState(true))
3078 }
3079
3080 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3081 // It can be done by spilling XMMs to stack frame.
3082 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3083 MCRegister Reg = I.getReg();
3084 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3085 continue;
3086
3087 // If this is k-register make sure we lookup via the largest legal type.
3088 MVT VT = MVT::Other;
3089 if (X86::VK16RegClass.contains(Reg))
3090 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3091
3092 // Add the callee-saved register as live-in. It's killed at the spill.
3093 MBB.addLiveIn(Reg);
3094 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3095
3096 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3098 }
3099
3100 return true;
3101}
3102
3103void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3105 MachineInstr *CatchRet) const {
3106 // SEH shouldn't use catchret.
3109 "SEH should not use CATCHRET");
3110 const DebugLoc &DL = CatchRet->getDebugLoc();
3111 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3112
3113 // Fill EAX/RAX with the address of the target block.
3114 if (STI.is64Bit()) {
3115 // LEA64r CatchRetTarget(%rip), %rax
3116 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3117 .addReg(X86::RIP)
3118 .addImm(0)
3119 .addReg(0)
3120 .addMBB(CatchRetTarget)
3121 .addReg(0);
3122 } else {
3123 // MOV32ri $CatchRetTarget, %eax
3124 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3125 .addMBB(CatchRetTarget);
3126 }
3127
3128 // Record that we've taken the address of CatchRetTarget and no longer just
3129 // reference it in a terminator.
3130 CatchRetTarget->setMachineBlockAddressTaken();
3131}
3132
3136 if (CSI.empty())
3137 return false;
3138
3139 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3140 // Don't restore CSRs in 32-bit EH funclets. Matches
3141 // spillCalleeSavedRegisters.
3142 if (STI.is32Bit())
3143 return true;
3144 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3145 // funclets. emitEpilogue transforms these to normal jumps.
3146 if (MI->getOpcode() == X86::CATCHRET) {
3147 const Function &F = MBB.getParent()->getFunction();
3148 bool IsSEH = isAsynchronousEHPersonality(
3149 classifyEHPersonality(F.getPersonalityFn()));
3150 if (IsSEH)
3151 return true;
3152 }
3153 }
3154
3156
3157 // Reload XMMs from stack frame.
3158 for (const CalleeSavedInfo &I : CSI) {
3159 MCRegister Reg = I.getReg();
3160 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3161 continue;
3162
3163 // If this is k-register make sure we lookup via the largest legal type.
3164 MVT VT = MVT::Other;
3165 if (X86::VK16RegClass.contains(Reg))
3166 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3167
3168 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3169 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3170 Register());
3171 }
3172
3173 // Clear the stack slot for spill base pointer register.
3174 MachineFunction &MF = *MBB.getParent();
3176 if (X86FI->getRestoreBasePointer()) {
3177 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3178 Register BaseReg = this->TRI->getBaseRegister();
3179 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3181 }
3182
3183 // POP GPRs.
3184 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3185 MCRegister Reg = I->getReg();
3186 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3187 continue;
3188
3189 if (X86FI->isCandidateForPush2Pop2(Reg))
3190 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3193 else
3194 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3196 }
3197 if (X86FI->padForPush2Pop2())
3198 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3199
3200 return true;
3201}
3202
3204 BitVector &SavedRegs,
3205 RegScavenger *RS) const {
3207
3208 // Spill the BasePtr if it's used.
3209 if (TRI->hasBasePointer(MF)) {
3210 Register BasePtr = TRI->getBaseRegister();
3211 if (STI.isTarget64BitILP32())
3212 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3213 SavedRegs.set(BasePtr);
3214 }
3215}
3216
3217static bool HasNestArgument(const MachineFunction *MF) {
3218 const Function &F = MF->getFunction();
3219 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3220 I++) {
3221 if (I->hasNestAttr() && !I->use_empty())
3222 return true;
3223 }
3224 return false;
3225}
3226
3227/// GetScratchRegister - Get a temp register for performing work in the
3228/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3229/// and the properties of the function either one or two registers will be
3230/// needed. Set primary to true for the first register, false for the second.
3231static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3232 const MachineFunction &MF, bool Primary) {
3233 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3234
3235 // Erlang stuff.
3236 if (CallingConvention == CallingConv::HiPE) {
3237 if (Is64Bit)
3238 return Primary ? X86::R14 : X86::R13;
3239 else
3240 return Primary ? X86::EBX : X86::EDI;
3241 }
3242
3243 if (Is64Bit) {
3244 if (IsLP64)
3245 return Primary ? X86::R11 : X86::R12;
3246 else
3247 return Primary ? X86::R11D : X86::R12D;
3248 }
3249
3250 bool IsNested = HasNestArgument(&MF);
3251
3252 if (CallingConvention == CallingConv::X86_FastCall ||
3253 CallingConvention == CallingConv::Fast ||
3254 CallingConvention == CallingConv::Tail) {
3255 if (IsNested)
3256 report_fatal_error("Segmented stacks does not support fastcall with "
3257 "nested function.");
3258 return Primary ? X86::EAX : X86::ECX;
3259 }
3260 if (IsNested)
3261 return Primary ? X86::EDX : X86::EAX;
3262 return Primary ? X86::ECX : X86::EAX;
3263}
3264
3265// The stack limit in the TCB is set to this many bytes above the actual stack
3266// limit.
3268
3270 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3271 MachineFrameInfo &MFI = MF.getFrameInfo();
3272 uint64_t StackSize;
3273 unsigned TlsReg, TlsOffset;
3274 DebugLoc DL;
3275
3276 // To support shrink-wrapping we would need to insert the new blocks
3277 // at the right place and update the branches to PrologueMBB.
3278 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3279
3280 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3281 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3282 "Scratch register is live-in");
3283
3284 if (MF.getFunction().isVarArg())
3285 report_fatal_error("Segmented stacks do not support vararg functions.");
3286 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3289 report_fatal_error("Segmented stacks not supported on this platform.");
3290
3291 // Eventually StackSize will be calculated by a link-time pass; which will
3292 // also decide whether checking code needs to be injected into this particular
3293 // prologue.
3294 StackSize = MFI.getStackSize();
3295
3296 if (!MFI.needsSplitStackProlog())
3297 return;
3298
3302 bool IsNested = false;
3303
3304 // We need to know if the function has a nest argument only in 64 bit mode.
3305 if (Is64Bit)
3306 IsNested = HasNestArgument(&MF);
3307
3308 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3309 // allocMBB needs to be last (terminating) instruction.
3310
3311 for (const auto &LI : PrologueMBB.liveins()) {
3312 allocMBB->addLiveIn(LI);
3313 checkMBB->addLiveIn(LI);
3314 }
3315
3316 if (IsNested)
3317 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3318
3319 MF.push_front(allocMBB);
3320 MF.push_front(checkMBB);
3321
3322 // When the frame size is less than 256 we just compare the stack
3323 // boundary directly to the value of the stack pointer, per gcc.
3324 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3325
3326 // Read the limit off the current stacklet off the stack_guard location.
3327 if (Is64Bit) {
3328 if (STI.isTargetLinux()) {
3329 TlsReg = X86::FS;
3330 TlsOffset = IsLP64 ? 0x70 : 0x40;
3331 } else if (STI.isTargetDarwin()) {
3332 TlsReg = X86::GS;
3333 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3334 } else if (STI.isTargetWin64()) {
3335 TlsReg = X86::GS;
3336 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3337 } else if (STI.isTargetFreeBSD()) {
3338 TlsReg = X86::FS;
3339 TlsOffset = 0x18;
3340 } else if (STI.isTargetDragonFly()) {
3341 TlsReg = X86::FS;
3342 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3343 } else {
3344 report_fatal_error("Segmented stacks not supported on this platform.");
3345 }
3346
3347 if (CompareStackPointer)
3348 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3349 else
3350 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3351 ScratchReg)
3352 .addReg(X86::RSP)
3353 .addImm(1)
3354 .addReg(0)
3355 .addImm(-StackSize)
3356 .addReg(0);
3357
3358 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3359 .addReg(ScratchReg)
3360 .addReg(0)
3361 .addImm(1)
3362 .addReg(0)
3363 .addImm(TlsOffset)
3364 .addReg(TlsReg);
3365 } else {
3366 if (STI.isTargetLinux()) {
3367 TlsReg = X86::GS;
3368 TlsOffset = 0x30;
3369 } else if (STI.isTargetDarwin()) {
3370 TlsReg = X86::GS;
3371 TlsOffset = 0x48 + 90 * 4;
3372 } else if (STI.isTargetWin32()) {
3373 TlsReg = X86::FS;
3374 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3375 } else if (STI.isTargetDragonFly()) {
3376 TlsReg = X86::FS;
3377 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3378 } else if (STI.isTargetFreeBSD()) {
3379 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3380 } else {
3381 report_fatal_error("Segmented stacks not supported on this platform.");
3382 }
3383
3384 if (CompareStackPointer)
3385 ScratchReg = X86::ESP;
3386 else
3387 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3388 .addReg(X86::ESP)
3389 .addImm(1)
3390 .addReg(0)
3391 .addImm(-StackSize)
3392 .addReg(0);
3393
3396 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3397 .addReg(ScratchReg)
3398 .addReg(0)
3399 .addImm(0)
3400 .addReg(0)
3401 .addImm(TlsOffset)
3402 .addReg(TlsReg);
3403 } else if (STI.isTargetDarwin()) {
3404
3405 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3406 unsigned ScratchReg2;
3407 bool SaveScratch2;
3408 if (CompareStackPointer) {
3409 // The primary scratch register is available for holding the TLS offset.
3410 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3411 SaveScratch2 = false;
3412 } else {
3413 // Need to use a second register to hold the TLS offset
3414 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3415
3416 // Unfortunately, with fastcc the second scratch register may hold an
3417 // argument.
3418 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3419 }
3420
3421 // If Scratch2 is live-in then it needs to be saved.
3422 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3423 "Scratch register is live-in and not saved");
3424
3425 if (SaveScratch2)
3426 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3427 .addReg(ScratchReg2, RegState::Kill);
3428
3429 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3430 .addImm(TlsOffset);
3431 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3432 .addReg(ScratchReg)
3433 .addReg(ScratchReg2)
3434 .addImm(1)
3435 .addReg(0)
3436 .addImm(0)
3437 .addReg(TlsReg);
3438
3439 if (SaveScratch2)
3440 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3441 }
3442 }
3443
3444 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3445 // It jumps to normal execution of the function body.
3446 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3447 .addMBB(&PrologueMBB)
3449
3450 // On 32 bit we first push the arguments size and then the frame size. On 64
3451 // bit, we pass the stack frame size in r10 and the argument size in r11.
3452 if (Is64Bit) {
3453 // Functions with nested arguments use R10, so it needs to be saved across
3454 // the call to _morestack
3455
3456 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3457 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3458 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3459 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3460
3461 if (IsNested)
3462 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3463
3464 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3465 .addImm(StackSize);
3466 BuildMI(allocMBB, DL,
3468 Reg11)
3469 .addImm(X86FI->getArgumentStackSize());
3470 } else {
3471 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3472 .addImm(X86FI->getArgumentStackSize());
3473 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3474 }
3475
3476 // __morestack is in libgcc
3478 // Under the large code model, we cannot assume that __morestack lives
3479 // within 2^31 bytes of the call site, so we cannot use pc-relative
3480 // addressing. We cannot perform the call via a temporary register,
3481 // as the rax register may be used to store the static chain, and all
3482 // other suitable registers may be either callee-save or used for
3483 // parameter passing. We cannot use the stack at this point either
3484 // because __morestack manipulates the stack directly.
3485 //
3486 // To avoid these issues, perform an indirect call via a read-only memory
3487 // location containing the address.
3488 //
3489 // This solution is not perfect, as it assumes that the .rodata section
3490 // is laid out within 2^31 bytes of each function body, but this seems
3491 // to be sufficient for JIT.
3492 // FIXME: Add retpoline support and remove the error here..
3494 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3495 "code model and thunks not yet implemented.");
3496 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3497 .addReg(X86::RIP)
3498 .addImm(0)
3499 .addReg(0)
3500 .addExternalSymbol("__morestack_addr")
3501 .addReg(0);
3502 } else {
3503 if (Is64Bit)
3504 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3505 .addExternalSymbol("__morestack");
3506 else
3507 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3508 .addExternalSymbol("__morestack");
3509 }
3510
3511 if (IsNested)
3512 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3513 else
3514 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3515
3516 allocMBB->addSuccessor(&PrologueMBB);
3517
3518 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3519 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3520
3521#ifdef EXPENSIVE_CHECKS
3522 MF.verify();
3523#endif
3524}
3525
3526/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3527/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3528/// to fields it needs, through a named metadata node "hipe.literals" containing
3529/// name-value pairs.
3530static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3531 const StringRef LiteralName) {
3532 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3533 MDNode *Node = HiPELiteralsMD->getOperand(i);
3534 if (Node->getNumOperands() != 2)
3535 continue;
3536 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3537 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3538 if (!NodeName || !NodeVal)
3539 continue;
3540 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3541 if (ValConst && NodeName->getString() == LiteralName) {
3542 return ValConst->getZExtValue();
3543 }
3544 }
3545
3546 report_fatal_error("HiPE literal " + LiteralName +
3547 " required but not provided");
3548}
3549
3550// Return true if there are no non-ehpad successors to MBB and there are no
3551// non-meta instructions between MBBI and MBB.end().
3554 return llvm::all_of(
3555 MBB.successors(),
3556 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3557 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3558 return MI.isMetaInstruction();
3559 });
3560}
3561
3562/// Erlang programs may need a special prologue to handle the stack size they
3563/// might need at runtime. That is because Erlang/OTP does not implement a C
3564/// stack but uses a custom implementation of hybrid stack/heap architecture.
3565/// (for more information see Eric Stenman's Ph.D. thesis:
3566/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3567///
3568/// CheckStack:
3569/// temp0 = sp - MaxStack
3570/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3571/// OldStart:
3572/// ...
3573/// IncStack:
3574/// call inc_stack # doubles the stack space
3575/// temp0 = sp - MaxStack
3576/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3578 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3579 MachineFrameInfo &MFI = MF.getFrameInfo();
3580 DebugLoc DL;
3581
3582 // To support shrink-wrapping we would need to insert the new blocks
3583 // at the right place and update the branches to PrologueMBB.
3584 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3585
3586 // HiPE-specific values
3587 NamedMDNode *HiPELiteralsMD =
3588 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3589 if (!HiPELiteralsMD)
3591 "Can't generate HiPE prologue without runtime parameters");
3592 const unsigned HipeLeafWords = getHiPELiteral(
3593 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3594 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3595 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3596 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3597 ? MF.getFunction().arg_size() - CCRegisteredArgs
3598 : 0;
3599 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3600
3602 "HiPE prologue is only supported on Linux operating systems.");
3603
3604 // Compute the largest caller's frame that is needed to fit the callees'
3605 // frames. This 'MaxStack' is computed from:
3606 //
3607 // a) the fixed frame size, which is the space needed for all spilled temps,
3608 // b) outgoing on-stack parameter areas, and
3609 // c) the minimum stack space this function needs to make available for the
3610 // functions it calls (a tunable ABI property).
3611 if (MFI.hasCalls()) {
3612 unsigned MoreStackForCalls = 0;
3613
3614 for (auto &MBB : MF) {
3615 for (auto &MI : MBB) {
3616 if (!MI.isCall())
3617 continue;
3618
3619 // Get callee operand.
3620 const MachineOperand &MO = MI.getOperand(0);
3621
3622 // Only take account of global function calls (no closures etc.).
3623 if (!MO.isGlobal())
3624 continue;
3625
3626 const Function *F = dyn_cast<Function>(MO.getGlobal());
3627 if (!F)
3628 continue;
3629
3630 // Do not update 'MaxStack' for primitive and built-in functions
3631 // (encoded with names either starting with "erlang."/"bif_" or not
3632 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3633 // "_", such as the BIF "suspend_0") as they are executed on another
3634 // stack.
3635 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3636 F->getName().find_first_of("._") == StringRef::npos)
3637 continue;
3638
3639 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3640 ? F->arg_size() - CCRegisteredArgs
3641 : 0;
3642 if (HipeLeafWords - 1 > CalleeStkArity)
3643 MoreStackForCalls =
3644 std::max(MoreStackForCalls,
3645 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3646 }
3647 }
3648 MaxStack += MoreStackForCalls;
3649 }
3650
3651 // If the stack frame needed is larger than the guaranteed then runtime checks
3652 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3653 if (MaxStack > Guaranteed) {
3654 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3655 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3656
3657 for (const auto &LI : PrologueMBB.liveins()) {
3658 stackCheckMBB->addLiveIn(LI);
3659 incStackMBB->addLiveIn(LI);
3660 }
3661
3662 MF.push_front(incStackMBB);
3663 MF.push_front(stackCheckMBB);
3664
3665 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3666 unsigned LEAop, CMPop, CALLop;
3667 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3668 if (Is64Bit) {
3669 SPReg = X86::RSP;
3670 PReg = X86::RBP;
3671 LEAop = X86::LEA64r;
3672 CMPop = X86::CMP64rm;
3673 CALLop = X86::CALL64pcrel32;
3674 } else {
3675 SPReg = X86::ESP;
3676 PReg = X86::EBP;
3677 LEAop = X86::LEA32r;
3678 CMPop = X86::CMP32rm;
3679 CALLop = X86::CALLpcrel32;
3680 }
3681
3682 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3683 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3684 "HiPE prologue scratch register is live-in");
3685
3686 // Create new MBB for StackCheck:
3687 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3688 false, -MaxStack);
3689 // SPLimitOffset is in a fixed heap location (pointed by BP).
3690 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3691 PReg, false, SPLimitOffset);
3692 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3693 .addMBB(&PrologueMBB)
3695
3696 // Create new MBB for IncStack:
3697 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3698 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3699 false, -MaxStack);
3700 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3701 PReg, false, SPLimitOffset);
3702 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3703 .addMBB(incStackMBB)
3705
3706 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3707 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3708 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3709 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3710 }
3711#ifdef EXPENSIVE_CHECKS
3712 MF.verify();
3713#endif
3714}
3715
3716bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3718 const DebugLoc &DL,
3719 int Offset) const {
3720 if (Offset <= 0)
3721 return false;
3722
3723 if (Offset % SlotSize)
3724 return false;
3725
3726 int NumPops = Offset / SlotSize;
3727 // This is only worth it if we have at most 2 pops.
3728 if (NumPops != 1 && NumPops != 2)
3729 return false;
3730
3731 // Handle only the trivial case where the adjustment directly follows
3732 // a call. This is the most common one, anyway.
3733 if (MBBI == MBB.begin())
3734 return false;
3735 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3736 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3737 return false;
3738
3739 unsigned Regs[2];
3740 unsigned FoundRegs = 0;
3741
3743 const MachineOperand &RegMask = Prev->getOperand(1);
3744
3745 auto &RegClass =
3746 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3747 // Try to find up to NumPops free registers.
3748 for (auto Candidate : RegClass) {
3749 // Poor man's liveness:
3750 // Since we're immediately after a call, any register that is clobbered
3751 // by the call and not defined by it can be considered dead.
3752 if (!RegMask.clobbersPhysReg(Candidate))
3753 continue;
3754
3755 // Don't clobber reserved registers
3756 if (MRI.isReserved(Candidate))
3757 continue;
3758
3759 bool IsDef = false;
3760 for (const MachineOperand &MO : Prev->implicit_operands()) {
3761 if (MO.isReg() && MO.isDef() &&
3762 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3763 IsDef = true;
3764 break;
3765 }
3766 }
3767
3768 if (IsDef)
3769 continue;
3770
3771 Regs[FoundRegs++] = Candidate;
3772 if (FoundRegs == (unsigned)NumPops)
3773 break;
3774 }
3775
3776 if (FoundRegs == 0)
3777 return false;
3778
3779 // If we found only one free register, but need two, reuse the same one twice.
3780 while (FoundRegs < (unsigned)NumPops)
3781 Regs[FoundRegs++] = Regs[0];
3782
3783 for (int i = 0; i < NumPops; ++i)
3784 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3785 Regs[i]);
3786
3787 return true;
3788}
3789
3793 bool reserveCallFrame = hasReservedCallFrame(MF);
3794 unsigned Opcode = I->getOpcode();
3795 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3796 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3797 uint64_t Amount = TII.getFrameSize(*I);
3798 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3799 I = MBB.erase(I);
3800 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3801
3802 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3803 // typically because the function is marked noreturn (abort, throw,
3804 // assert_fail, etc).
3805 if (isDestroy && blockEndIsUnreachable(MBB, I))
3806 return I;
3807
3808 if (!reserveCallFrame) {
3809 // If the stack pointer can be changed after prologue, turn the
3810 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3811 // adjcallstackdown instruction into 'add ESP, <amt>'
3812
3813 // We need to keep the stack aligned properly. To do this, we round the
3814 // amount of space needed for the outgoing arguments up to the next
3815 // alignment boundary.
3816 Amount = alignTo(Amount, getStackAlign());
3817
3818 const Function &F = MF.getFunction();
3819 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3820 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3821
3822 // If we have any exception handlers in this function, and we adjust
3823 // the SP before calls, we may need to indicate this to the unwinder
3824 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3825 // Amount == 0, because the preceding function may have set a non-0
3826 // GNU_ARGS_SIZE.
3827 // TODO: We don't need to reset this between subsequent functions,
3828 // if it didn't change.
3829 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3830
3831 if (HasDwarfEHHandlers && !isDestroy &&
3833 BuildCFI(MBB, InsertPos, DL,
3834 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3835
3836 if (Amount == 0)
3837 return I;
3838
3839 // Factor out the amount that gets handled inside the sequence
3840 // (Pushes of argument for frame setup, callee pops for frame destroy)
3841 Amount -= InternalAmt;
3842
3843 // TODO: This is needed only if we require precise CFA.
3844 // If this is a callee-pop calling convention, emit a CFA adjust for
3845 // the amount the callee popped.
3846 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3847 BuildCFI(MBB, InsertPos, DL,
3848 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3849
3850 // Add Amount to SP to destroy a frame, or subtract to setup.
3851 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3852 int64_t CfaAdjustment = StackAdjustment;
3853
3854 if (StackAdjustment) {
3855 // Merge with any previous or following adjustment instruction. Note: the
3856 // instructions merged with here do not have CFI, so their stack
3857 // adjustments do not feed into CfaAdjustment
3858
3859 auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI,
3860 int64_t Offset) {
3861 CfaAdjustment += Offset;
3862 };
3863 auto CalcNewOffset = [&StackAdjustment](int64_t Offset) {
3864 return StackAdjustment + Offset;
3865 };
3866 StackAdjustment =
3867 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, true);
3868 StackAdjustment =
3869 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, false);
3870
3871 if (StackAdjustment) {
3872 if (!(F.hasMinSize() &&
3873 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3874 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3875 /*InEpilogue=*/false);
3876 }
3877 }
3878
3879 if (DwarfCFI && !hasFP(MF) && CfaAdjustment) {
3880 // If we don't have FP, but need to generate unwind information,
3881 // we need to set the correct CFA offset after the stack adjustment.
3882 // How much we adjust the CFA offset depends on whether we're emitting
3883 // CFI only for EH purposes or for debugging. EH only requires the CFA
3884 // offset to be correct at each call site, while for debugging we want
3885 // it to be more precise.
3886
3887 // TODO: When not using precise CFA, we also need to adjust for the
3888 // InternalAmt here.
3889 BuildCFI(
3890 MBB, InsertPos, DL,
3891 MCCFIInstruction::createAdjustCfaOffset(nullptr, -CfaAdjustment));
3892 }
3893
3894 return I;
3895 }
3896
3897 if (InternalAmt) {
3900 while (CI != B && !std::prev(CI)->isCall())
3901 --CI;
3902 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3903 }
3904
3905 return I;
3906}
3907
3909 assert(MBB.getParent() && "Block is not attached to a function!");
3910 const MachineFunction &MF = *MBB.getParent();
3911 if (!MBB.isLiveIn(X86::EFLAGS))
3912 return true;
3913
3914 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3915 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3917 const X86TargetLowering &TLI = *STI.getTargetLowering();
3918 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3919 return false;
3920
3922 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3923}
3924
3926 assert(MBB.getParent() && "Block is not attached to a function!");
3927
3928 // Win64 has strict requirements in terms of epilogue and we are
3929 // not taking a chance at messing with them.
3930 // I.e., unless this block is already an exit block, we can't use
3931 // it as an epilogue.
3932 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3933 return false;
3934
3935 // Swift async context epilogue has a BTR instruction that clobbers parts of
3936 // EFLAGS.
3937 const MachineFunction &MF = *MBB.getParent();
3940
3942 return true;
3943
3944 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3945 // clobbers the EFLAGS. Check that we do not need to preserve it,
3946 // otherwise, conservatively assume this is not
3947 // safe to insert the epilogue here.
3949}
3950
3952 // If we may need to emit frameless compact unwind information, give
3953 // up as this is currently broken: PR25614.
3954 bool CompactUnwind =
3956 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3957 !CompactUnwind) &&
3958 // The lowering of segmented stack and HiPE only support entry
3959 // blocks as prologue blocks: PR26107. This limitation may be
3960 // lifted if we fix:
3961 // - adjustForSegmentedStacks
3962 // - adjustForHiPEPrologue
3964 !MF.shouldSplitStack();
3965}
3966
3969 const DebugLoc &DL, bool RestoreSP) const {
3970 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3971 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3972 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3973 "restoring EBP/ESI on non-32-bit target");
3974
3975 MachineFunction &MF = *MBB.getParent();
3977 Register BasePtr = TRI->getBaseRegister();
3978 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3980 MachineFrameInfo &MFI = MF.getFrameInfo();
3981
3982 // FIXME: Don't set FrameSetup flag in catchret case.
3983
3984 int FI = FuncInfo.EHRegNodeFrameIndex;
3985 int EHRegSize = MFI.getObjectSize(FI);
3986
3987 if (RestoreSP) {
3988 // MOV32rm -EHRegSize(%ebp), %esp
3989 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3990 X86::EBP, true, -EHRegSize)
3992 }
3993
3994 Register UsedReg;
3995 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3996 int EndOffset = -EHRegOffset - EHRegSize;
3997 FuncInfo.EHRegNodeEndOffset = EndOffset;
3998
3999 if (UsedReg == FramePtr) {
4000 // ADD $offset, %ebp
4001 unsigned ADDri = getADDriOpcode(false);
4002 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
4004 .addImm(EndOffset)
4006 ->getOperand(3)
4007 .setIsDead();
4008 assert(EndOffset >= 0 &&
4009 "end of registration object above normal EBP position!");
4010 } else if (UsedReg == BasePtr) {
4011 // LEA offset(%ebp), %esi
4012 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
4013 FramePtr, false, EndOffset)
4015 // MOV32rm SavedEBPOffset(%esi), %ebp
4016 assert(X86FI->getHasSEHFramePtrSave());
4017 int Offset =
4018 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
4019 .getFixed();
4020 assert(UsedReg == BasePtr);
4021 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
4022 UsedReg, true, Offset)
4024 } else {
4025 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
4026 }
4027 return MBBI;
4028}
4029
4031 return TRI->getSlotSize();
4032}
4033
4036 return StackPtr;
4037}
4038
4042 Register FrameRegister = RI->getFrameRegister(MF);
4043 if (getInitialCFARegister(MF) == FrameRegister &&
4045 DwarfFrameBase FrameBase;
4046 FrameBase.Kind = DwarfFrameBase::CFA;
4047 FrameBase.Location.Offset =
4049 return FrameBase;
4050 }
4051
4052 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4053}
4054
4055namespace {
4056// Struct used by orderFrameObjects to help sort the stack objects.
4057struct X86FrameSortingObject {
4058 bool IsValid = false; // true if we care about this Object.
4059 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4060 unsigned ObjectSize = 0; // Size of Object in bytes.
4061 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4062 unsigned ObjectNumUses = 0; // Object static number of uses.
4063};
4064
4065// The comparison function we use for std::sort to order our local
4066// stack symbols. The current algorithm is to use an estimated
4067// "density". This takes into consideration the size and number of
4068// uses each object has in order to roughly minimize code size.
4069// So, for example, an object of size 16B that is referenced 5 times
4070// will get higher priority than 4 4B objects referenced 1 time each.
4071// It's not perfect and we may be able to squeeze a few more bytes out of
4072// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4073// fringe end can have special consideration, given their size is less
4074// important, etc.), but the algorithmic complexity grows too much to be
4075// worth the extra gains we get. This gets us pretty close.
4076// The final order leaves us with objects with highest priority going
4077// at the end of our list.
4078struct X86FrameSortingComparator {
4079 inline bool operator()(const X86FrameSortingObject &A,
4080 const X86FrameSortingObject &B) const {
4081 uint64_t DensityAScaled, DensityBScaled;
4082
4083 // For consistency in our comparison, all invalid objects are placed
4084 // at the end. This also allows us to stop walking when we hit the
4085 // first invalid item after it's all sorted.
4086 if (!A.IsValid)
4087 return false;
4088 if (!B.IsValid)
4089 return true;
4090
4091 // The density is calculated by doing :
4092 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4093 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4094 // Since this approach may cause inconsistencies in
4095 // the floating point <, >, == comparisons, depending on the floating
4096 // point model with which the compiler was built, we're going
4097 // to scale both sides by multiplying with
4098 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4099 // the division and, with it, the need for any floating point
4100 // arithmetic.
4101 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4102 static_cast<uint64_t>(B.ObjectSize);
4103 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4104 static_cast<uint64_t>(A.ObjectSize);
4105
4106 // If the two densities are equal, prioritize highest alignment
4107 // objects. This allows for similar alignment objects
4108 // to be packed together (given the same density).
4109 // There's room for improvement here, also, since we can pack
4110 // similar alignment (different density) objects next to each
4111 // other to save padding. This will also require further
4112 // complexity/iterations, and the overall gain isn't worth it,
4113 // in general. Something to keep in mind, though.
4114 if (DensityAScaled == DensityBScaled)
4115 return A.ObjectAlignment < B.ObjectAlignment;
4116
4117 return DensityAScaled < DensityBScaled;
4118 }
4119};
4120} // namespace
4121
4122// Order the symbols in the local stack.
4123// We want to place the local stack objects in some sort of sensible order.
4124// The heuristic we use is to try and pack them according to static number
4125// of uses and size of object in order to minimize code size.
4127 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4128 const MachineFrameInfo &MFI = MF.getFrameInfo();
4129
4130 // Don't waste time if there's nothing to do.
4131 if (ObjectsToAllocate.empty())
4132 return;
4133
4134 // Create an array of all MFI objects. We won't need all of these
4135 // objects, but we're going to create a full array of them to make
4136 // it easier to index into when we're counting "uses" down below.
4137 // We want to be able to easily/cheaply access an object by simply
4138 // indexing into it, instead of having to search for it every time.
4139 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4140
4141 // Walk the objects we care about and mark them as such in our working
4142 // struct.
4143 for (auto &Obj : ObjectsToAllocate) {
4144 SortingObjects[Obj].IsValid = true;
4145 SortingObjects[Obj].ObjectIndex = Obj;
4146 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4147 // Set the size.
4148 int ObjectSize = MFI.getObjectSize(Obj);
4149 if (ObjectSize == 0)
4150 // Variable size. Just use 4.
4151 SortingObjects[Obj].ObjectSize = 4;
4152 else
4153 SortingObjects[Obj].ObjectSize = ObjectSize;
4154 }
4155
4156 // Count the number of uses for each object.
4157 for (auto &MBB : MF) {
4158 for (auto &MI : MBB) {
4159 if (MI.isDebugInstr())
4160 continue;
4161 for (const MachineOperand &MO : MI.operands()) {
4162 // Check to see if it's a local stack symbol.
4163 if (!MO.isFI())
4164 continue;
4165 int Index = MO.getIndex();
4166 // Check to see if it falls within our range, and is tagged
4167 // to require ordering.
4168 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4169 SortingObjects[Index].IsValid)
4170 SortingObjects[Index].ObjectNumUses++;
4171 }
4172 }
4173 }
4174
4175 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4176 // info).
4177 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4178
4179 // Now modify the original list to represent the final order that
4180 // we want. The order will depend on whether we're going to access them
4181 // from the stack pointer or the frame pointer. For SP, the list should
4182 // end up with the END containing objects that we want with smaller offsets.
4183 // For FP, it should be flipped.
4184 int i = 0;
4185 for (auto &Obj : SortingObjects) {
4186 // All invalid items are sorted at the end, so it's safe to stop.
4187 if (!Obj.IsValid)
4188 break;
4189 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4190 }
4191
4192 // Flip it if we're accessing off of the FP.
4193 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4194 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4195}
4196
4197unsigned
4199 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4200 unsigned Offset = 16;
4201 // RBP is immediately pushed.
4202 Offset += SlotSize;
4203 // All callee-saved registers are then pushed.
4204 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4205 // Every funclet allocates enough stack space for the largest outgoing call.
4206 Offset += getWinEHFuncletFrameSize(MF);
4207 return Offset;
4208}
4209
4211 MachineFunction &MF, RegScavenger *RS) const {
4212 // Mark the function as not having WinCFI. We will set it back to true in
4213 // emitPrologue if it gets called and emits CFI.
4214 MF.setHasWinCFI(false);
4215
4216 MachineFrameInfo &MFI = MF.getFrameInfo();
4217 // If the frame is big enough that we might need to scavenge a register to
4218 // handle huge offsets, reserve a stack slot for that now.
4219 if (!isInt<32>(MFI.estimateStackSize(MF))) {
4220 int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
4222 }
4223
4224 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4225 // aligned. The format doesn't support misaligned stack adjustments.
4228
4229 // If this function isn't doing Win64-style C++ EH, we don't need to do
4230 // anything.
4231 if (STI.is64Bit() && MF.hasEHFunclets() &&
4234 adjustFrameForMsvcCxxEh(MF);
4235 }
4236}
4237
4238void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4239 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4240 // relative to RSP after the prologue. Find the offset of the last fixed
4241 // object, so that we can allocate a slot immediately following it. If there
4242 // were no fixed objects, use offset -SlotSize, which is immediately after the
4243 // return address. Fixed objects have negative frame indices.
4244 MachineFrameInfo &MFI = MF.getFrameInfo();
4245 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4246 int64_t MinFixedObjOffset = -SlotSize;
4247 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4248 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4249
4250 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4251 for (WinEHHandlerType &H : TBME.HandlerArray) {
4252 int FrameIndex = H.CatchObj.FrameIndex;
4253 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {
4254 // Ensure alignment.
4255 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4256 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4257 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4258 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4259 }
4260 }
4261 }
4262
4263 // Ensure alignment.
4264 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4265 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4266 int UnwindHelpFI =
4267 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4268 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4269
4270 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4271 // other frame setup instructions.
4272 MachineBasicBlock &MBB = MF.front();
4273 auto MBBI = MBB.begin();
4274 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4275 ++MBBI;
4276
4278 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4279 UnwindHelpFI)
4280 .addImm(-2);
4281}
4282
4284 MachineFunction &MF, RegScavenger *RS) const {
4285 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4286
4287 if (STI.is32Bit() && MF.hasEHFunclets())
4289 // We have emitted prolog and epilog. Don't need stack pointer saving
4290 // instruction any more.
4291 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4292 MI->eraseFromParent();
4293 X86FI->setStackPtrSaveMI(nullptr);
4294 }
4295}
4296
4298 MachineFunction &MF) const {
4299 // 32-bit functions have to restore stack pointers when control is transferred
4300 // back to the parent function. These blocks are identified as eh pads that
4301 // are not funclet entries.
4302 bool IsSEH = isAsynchronousEHPersonality(
4304 for (MachineBasicBlock &MBB : MF) {
4305 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4306 if (NeedsRestore)
4308 /*RestoreSP=*/IsSEH);
4309 }
4310}
4311
4312// Compute the alignment gap between current SP after spilling FP/BP and the
4313// next properly aligned stack offset.
4315 const TargetRegisterClass *RC,
4316 unsigned NumSpilledRegs) {
4318 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4319 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4320 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4321 return AlignedSize - AllocSize;
4322}
4323
4324void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4326 Register FP, Register BP,
4327 int SPAdjust) const {
4328 assert(FP.isValid() || BP.isValid());
4329
4330 MachineBasicBlock *MBB = BeforeMI->getParent();
4331 DebugLoc DL = BeforeMI->getDebugLoc();
4332
4333 // Spill FP.
4334 if (FP.isValid()) {
4335 BuildMI(*MBB, BeforeMI, DL,
4337 .addReg(FP);
4338 }
4339
4340 // Spill BP.
4341 if (BP.isValid()) {
4342 BuildMI(*MBB, BeforeMI, DL,
4344 .addReg(BP);
4345 }
4346
4347 // Make sure SP is aligned.
4348 if (SPAdjust)
4349 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4350
4351 // Emit unwinding information.
4352 if (FP.isValid() && needsDwarfCFI(MF)) {
4353 // Emit .cfi_remember_state to remember old frame.
4354 unsigned CFIIndex =
4356 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4357 .addCFIIndex(CFIIndex);
4358
4359 // Setup new CFA value with DW_CFA_def_cfa_expression:
4360 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4361 SmallString<64> CfaExpr;
4362 uint8_t buffer[16];
4363 int Offset = SPAdjust;
4364 if (BP.isValid())
4365 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4366 // If BeforeMI is a frame setup instruction, we need to adjust the position
4367 // and offset of the new cfi instruction.
4368 if (TII.isFrameSetup(*BeforeMI)) {
4369 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4370 BeforeMI = std::next(BeforeMI);
4371 }
4373 if (STI.isTarget64BitILP32())
4375 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4376 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4377 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4378 CfaExpr.push_back(dwarf::DW_OP_deref);
4379 CfaExpr.push_back(dwarf::DW_OP_consts);
4380 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4381 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4382
4383 SmallString<64> DefCfaExpr;
4384 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4385 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4386 DefCfaExpr.append(CfaExpr.str());
4387 BuildCFI(*MBB, BeforeMI, DL,
4388 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4390 }
4391}
4392
4393void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4395 Register FP, Register BP,
4396 int SPAdjust) const {
4397 assert(FP.isValid() || BP.isValid());
4398
4399 // Adjust SP so it points to spilled FP or BP.
4400 MachineBasicBlock *MBB = AfterMI->getParent();
4401 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4402 DebugLoc DL = AfterMI->getDebugLoc();
4403 if (SPAdjust)
4404 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4405
4406 // Restore BP.
4407 if (BP.isValid()) {
4408 BuildMI(*MBB, Pos, DL,
4409 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4410 }
4411
4412 // Restore FP.
4413 if (FP.isValid()) {
4414 BuildMI(*MBB, Pos, DL,
4416
4417 // Emit unwinding information.
4418 if (needsDwarfCFI(MF)) {
4419 // Restore original frame with .cfi_restore_state.
4420 unsigned CFIIndex =
4422 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4423 .addCFIIndex(CFIIndex);
4424 }
4425 }
4426}
4427
4428void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4430 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4431 assert(SpillFP || SpillBP);
4432
4433 Register FP, BP;
4434 const TargetRegisterClass *RC;
4435 unsigned NumRegs = 0;
4436
4437 if (SpillFP) {
4438 FP = TRI->getFrameRegister(MF);
4439 if (STI.isTarget64BitILP32())
4441 RC = TRI->getMinimalPhysRegClass(FP);
4442 ++NumRegs;
4443 }
4444 if (SpillBP) {
4445 BP = TRI->getBaseRegister();
4446 if (STI.isTarget64BitILP32())
4447 BP = Register(getX86SubSuperRegister(BP, 64));
4448 RC = TRI->getMinimalPhysRegClass(BP);
4449 ++NumRegs;
4450 }
4451 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4452
4453 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4454 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4455}
4456
4457bool X86FrameLowering::skipSpillFPBP(
4459 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4460 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4461 // SaveRbx = COPY RBX
4462 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4463 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4464 // We should skip this instruction sequence.
4465 int FI;
4466 Register Reg;
4467 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4468 MI->getOperand(1).getReg() == X86::RBX) &&
4469 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4470 ++MI;
4471 return true;
4472 }
4473 return false;
4474}
4475
4477 const TargetRegisterInfo *TRI, bool &AccessFP,
4478 bool &AccessBP) {
4479 AccessFP = AccessBP = false;
4480 if (FP) {
4481 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4482 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4483 AccessFP = true;
4484 }
4485 if (BP) {
4486 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4487 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4488 AccessBP = true;
4489 }
4490 return AccessFP || AccessBP;
4491}
4492
4493// Invoke instruction has been lowered to normal function call. We try to figure
4494// out if MI comes from Invoke.
4495// Do we have any better method?
4496static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4497 if (!MI.isCall())
4498 return false;
4499 if (InsideEHLabels)
4500 return true;
4501
4502 const MachineBasicBlock *MBB = MI.getParent();
4503 if (!MBB->hasEHPadSuccessor())
4504 return false;
4505
4506 // Check if there is another call instruction from MI to the end of MBB.
4508 for (++MBBI; MBBI != ME; ++MBBI)
4509 if (MBBI->isCall())
4510 return false;
4511 return true;
4512}
4513
4514/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4515/// interfered stack access in the range, usually generated by register spill.
4516void X86FrameLowering::checkInterferedAccess(
4518 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4519 bool SpillBP) const {
4520 if (DefMI == KillMI)
4521 return;
4522 if (TRI->hasBasePointer(MF)) {
4523 if (!SpillBP)
4524 return;
4525 } else {
4526 if (!SpillFP)
4527 return;
4528 }
4529
4530 auto MI = KillMI;
4531 while (MI != DefMI) {
4532 if (any_of(MI->operands(),
4533 [](const MachineOperand &MO) { return MO.isFI(); }))
4535 "Interference usage of base pointer/frame "
4536 "pointer.");
4537 MI++;
4538 }
4539}
4540
4541/// If a function uses base pointer and the base pointer is clobbered by inline
4542/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4543/// contains garbage value.
4544/// For example if a 32b x86 function uses base pointer esi, and esi is
4545/// clobbered by following inline asm
4546/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4547/// We need to save esi before the asm and restore it after the asm.
4548///
4549/// The problem can also occur to frame pointer if there is a function call, and
4550/// the callee uses a different calling convention and clobbers the fp.
4551///
4552/// Because normal frame objects (spill slots) are accessed through fp/bp
4553/// register, so we can't spill fp/bp to normal spill slots.
4554///
4555/// FIXME: There are 2 possible enhancements:
4556/// 1. In many cases there are different physical registers not clobbered by
4557/// inline asm, we can use one of them as base pointer. Or use a virtual
4558/// register as base pointer and let RA allocate a physical register to it.
4559/// 2. If there is no other instructions access stack with fp/bp from the
4560/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4561/// skip the save and restore operations.
4563 Register FP, BP;
4565 if (TFI.hasFP(MF))
4566 FP = TRI->getFrameRegister(MF);
4567 if (TRI->hasBasePointer(MF))
4568 BP = TRI->getBaseRegister();
4569
4570 // Currently only inline asm and function call can clobbers fp/bp. So we can
4571 // do some quick test and return early.
4572 if (!MF.hasInlineAsm()) {
4574 if (!X86FI->getFPClobberedByCall())
4575 FP = 0;
4576 if (!X86FI->getBPClobberedByCall())
4577 BP = 0;
4578 }
4579 if (!FP && !BP)
4580 return;
4581
4582 for (MachineBasicBlock &MBB : MF) {
4583 bool InsideEHLabels = false;
4584 auto MI = MBB.rbegin(), ME = MBB.rend();
4585 auto TermMI = MBB.getFirstTerminator();
4586 if (TermMI == MBB.begin())
4587 continue;
4588 MI = *(std::prev(TermMI));
4589
4590 while (MI != ME) {
4591 // Skip frame setup/destroy instructions.
4592 // Skip Invoke (call inside try block) instructions.
4593 // Skip instructions handled by target.
4594 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4596 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4597 ++MI;
4598 continue;
4599 }
4600
4601 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4602 InsideEHLabels = !InsideEHLabels;
4603 ++MI;
4604 continue;
4605 }
4606
4607 bool AccessFP, AccessBP;
4608 // Check if fp or bp is used in MI.
4609 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4610 ++MI;
4611 continue;
4612 }
4613
4614 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4615 // used.
4616 bool FPLive = false, BPLive = false;
4617 bool SpillFP = false, SpillBP = false;
4618 auto DefMI = MI, KillMI = MI;
4619 do {
4620 SpillFP |= AccessFP;
4621 SpillBP |= AccessBP;
4622
4623 // Maintain FPLive and BPLive.
4624 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4625 FPLive = false;
4626 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4627 FPLive = true;
4628 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4629 BPLive = false;
4630 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4631 BPLive = true;
4632
4633 DefMI = MI++;
4634 } while ((MI != ME) &&
4635 (FPLive || BPLive ||
4636 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4637
4638 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4639 if (FPLive && !SpillBP)
4640 continue;
4641
4642 // If the bp is clobbered by a call, we should save and restore outside of
4643 // the frame setup instructions.
4644 if (KillMI->isCall() && DefMI != ME) {
4645 auto FrameSetup = std::next(DefMI);
4646 // Look for frame setup instruction toward the start of the BB.
4647 // If we reach another call instruction, it means no frame setup
4648 // instruction for the current call instruction.
4649 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4650 !FrameSetup->isCall())
4651 ++FrameSetup;
4652 // If a frame setup instruction is found, we need to find out the
4653 // corresponding frame destroy instruction.
4654 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4655 (TII.getFrameSize(*FrameSetup) ||
4656 TII.getFrameAdjustment(*FrameSetup))) {
4657 while (!TII.isFrameInstr(*KillMI))
4658 --KillMI;
4659 DefMI = FrameSetup;
4660 MI = DefMI;
4661 ++MI;
4662 }
4663 }
4664
4665 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4666
4667 // Call target function to spill and restore FP and BP registers.
4668 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4669 }
4670 }
4671}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
constexpr int64_t MaxSPChunk
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:139
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
reverse_iterator rbegin() const
Definition: ArrayRef.h:138
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
A debug info location.
Definition: DebugLoc.h:124
unsigned size() const
Definition: DenseMap.h:120
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:903
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1036
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:352
size_t arg_size() const
Definition: Function.h:899
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:681
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:652
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:592
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:703
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:666
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:627
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition: MCDwarf.h:686
OpType getOperation() const
Definition: MCDwarf.h:720
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:697
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:608
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition: MCDwarf.h:691
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1115
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1077
A single uniqued string.
Definition: Metadata.h:720
LLVM_ABI StringRef getString() const
Definition: Metadata.cpp:617
Machine Value Type.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI bool hasEHPadSuccessor() const
bool isEHPad() const
Returns true if the block is a landing pad.
reverse_iterator rend()
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
LLVM_ABI int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:295
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:595
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
iterator end() const
Definition: ArrayRef.h:348
iterator begin() const
Definition: ArrayRef.h:347
A tuple of MDNodes.
Definition: Metadata.h:1753
LLVM_ABI MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1465
LLVM_ABI unsigned getNumOperands() const
Definition: Metadata.cpp:1461
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:107
Represents a location in source code.
Definition: SMLoc.h:23
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:66
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:50
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:43
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
static constexpr size_t npos
Definition: StringRef.h:57
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
bool isUEFI() const
Tests whether the OS is UEFI.
Definition: Triple.h:671
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:676
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition: Triple.h:608
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:457
Value * getValue() const
Definition: Metadata.h:497
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int64_t AddOffset, bool doMergeWithPrevious) const
Equivalent to: mergeSPUpdates(MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:262
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:325
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:286
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:301
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:284
bool isTargetWin64() const
Definition: X86Subtarget.h:329
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:176
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:392
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:305
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:342
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:285
bool isTargetWin32() const
Definition: X86Subtarget.h:331
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:214
bool isTargetLinux() const
Definition: X86Subtarget.h:294
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:200
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
void stable_sort(R &&Range)
Definition: STLExtras.h:2077
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:551
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:198
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:24
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:81
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, Register Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:225
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
union llvm::TargetFrameLowering::DwarfFrameBase::@249 Location
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76