LLVM 22.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Module.h"
33#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/MC/MCSymbol.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
46
47using namespace llvm;
48
50 MaybeAlign StackAlignOverride)
51 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
52 STI.is64Bit() ? -8 : -4),
53 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
54 // Cache a bunch of frame-related predicates for this subtarget.
55 SlotSize = TRI->getSlotSize();
56 Is64Bit = STI.is64Bit();
57 IsLP64 = STI.isTarget64BitLP64();
58 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
59 Uses64BitFramePtr = STI.isTarget64BitLP64();
60 StackPtr = TRI->getStackRegister();
61}
62
64 return !MF.getFrameInfo().hasVarSizedObjects() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
66 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
67}
68
69/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
70/// call frame pseudos can be simplified. Having a FP, as in the default
71/// implementation, is not sufficient here since we can't always use it.
72/// Use a more nuanced condition.
74 const MachineFunction &MF) const {
75 return hasReservedCallFrame(MF) ||
76 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
77 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78 TRI->hasBasePointer(MF);
79}
80
81// needsFrameIndexResolution - Do we need to perform FI resolution for
82// this function. Normally, this is required only when the function
83// has any stack objects. However, FI resolution actually has another job,
84// not apparent from the title - it resolves callframesetup/destroy
85// that were not simplified earlier.
86// So, this is required for x86 functions that have push sequences even
87// when there are no stack objects.
89 const MachineFunction &MF) const {
90 return MF.getFrameInfo().hasStackObjects() ||
91 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
92}
93
94/// hasFPImpl - Return true if the specified function should have a dedicated
95/// frame pointer register. This is true if the function has variable sized
96/// allocas or if frame pointer elimination is disabled.
98 const MachineFrameInfo &MFI = MF.getFrameInfo();
99 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
100 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
104 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
105 MFI.hasStackMap() || MFI.hasPatchPoint() ||
106 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
107}
108
109static unsigned getSUBriOpcode(bool IsLP64) {
110 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
111}
112
113static unsigned getADDriOpcode(bool IsLP64) {
114 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
115}
116
117static unsigned getSUBrrOpcode(bool IsLP64) {
118 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
119}
120
121static unsigned getADDrrOpcode(bool IsLP64) {
122 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
123}
124
125static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
126 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
127}
128
129static unsigned getLEArOpcode(bool IsLP64) {
130 return IsLP64 ? X86::LEA64r : X86::LEA32r;
131}
132
133static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
134 if (Use64BitReg) {
135 if (isUInt<32>(Imm))
136 return X86::MOV32ri64;
137 if (isInt<32>(Imm))
138 return X86::MOV64ri32;
139 return X86::MOV64ri;
140 }
141 return X86::MOV32ri;
142}
143
144// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
145// value written by the PUSH from the stack. The processor tracks these marked
146// instructions internally and fast-forwards register data between matching PUSH
147// and POP instructions, without going through memory or through the training
148// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
149// memory-renaming optimization can be used.
150//
151// The PPX hint is purely a performance hint. Instructions with this hint have
152// the same functional semantics as those without. PPX hints set by the
153// compiler that violate the balancing rule may turn off the PPX optimization,
154// but they will not affect program semantics.
155//
156// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
157// are not considered).
158//
159// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
160// GPRs at a time to/from the stack.
161static unsigned getPUSHOpcode(const X86Subtarget &ST) {
162 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
163 : X86::PUSH32r;
164}
165static unsigned getPOPOpcode(const X86Subtarget &ST) {
166 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
167 : X86::POP32r;
168}
169static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
170 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
171}
172static unsigned getPOP2Opcode(const X86Subtarget &ST) {
173 return ST.hasPPX() ? X86::POP2P : X86::POP2;
174}
175
177 for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
178 MCRegister Reg = RegMask.PhysReg;
179
180 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
181 Reg == X86::AH || Reg == X86::AL)
182 return true;
183 }
184
185 return false;
186}
187
188/// Check if the flags need to be preserved before the terminators.
189/// This would be the case, if the eflags is live-in of the region
190/// composed by the terminators or live-out of that region, without
191/// being defined by a terminator.
192static bool
194 for (const MachineInstr &MI : MBB.terminators()) {
195 bool BreakNext = false;
196 for (const MachineOperand &MO : MI.operands()) {
197 if (!MO.isReg())
198 continue;
199 Register Reg = MO.getReg();
200 if (Reg != X86::EFLAGS)
201 continue;
202
203 // This terminator needs an eflags that is not defined
204 // by a previous another terminator:
205 // EFLAGS is live-in of the region composed by the terminators.
206 if (!MO.isDef())
207 return true;
208 // This terminator defines the eflags, i.e., we don't need to preserve it.
209 // However, we still need to check this specific terminator does not
210 // read a live-in value.
211 BreakNext = true;
212 }
213 // We found a definition of the eflags, no need to preserve them.
214 if (BreakNext)
215 return false;
216 }
217
218 // None of the terminators use or define the eflags.
219 // Check if they are live-out, that would imply we need to preserve them.
220 for (const MachineBasicBlock *Succ : MBB.successors())
221 if (Succ->isLiveIn(X86::EFLAGS))
222 return true;
223
224 return false;
225}
226
227constexpr int64_t MaxSPChunk = (1LL << 31) - 1;
228
229/// emitSPUpdate - Emit a series of instructions to increment / decrement the
230/// stack pointer by a constant value.
233 const DebugLoc &DL, int64_t NumBytes,
234 bool InEpilogue) const {
235 bool isSub = NumBytes < 0;
236 uint64_t Offset = isSub ? -NumBytes : NumBytes;
239
241 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
242 // This might be unreachable code, so don't complain now; just trap if
243 // it's reached at runtime.
244 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
245 return;
246 }
247
248 uint64_t Chunk = MaxSPChunk;
249
250 MachineFunction &MF = *MBB.getParent();
252 const X86TargetLowering &TLI = *STI.getTargetLowering();
253 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
254
255 // It's ok to not take into account large chunks when probing, as the
256 // allocation is split in smaller chunks anyway.
257 if (EmitInlineStackProbe && !InEpilogue) {
258
259 // This pseudo-instruction is going to be expanded, potentially using a
260 // loop, by inlineStackProbe().
261 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
262 return;
263 } else if (Offset > Chunk) {
264 // Rather than emit a long series of instructions for large offsets,
265 // load the offset into a register and do one sub/add
266 unsigned Reg = 0;
267 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
268
269 if (isSub && !isEAXLiveIn(MBB))
270 Reg = Rax;
271 else
272 Reg = getX86SubSuperRegister(TRI->findDeadCallerSavedReg(MBB, MBBI),
273 Uses64BitFramePtr ? 64 : 32);
274
275 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
277 if (Reg) {
279 Reg)
280 .addImm(Offset)
281 .setMIFlag(Flag);
282 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
284 .addReg(Reg);
285 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
286 return;
287 } else if (Offset > 8 * Chunk) {
288 // If we would need more than 8 add or sub instructions (a >16GB stack
289 // frame), it's worth spilling RAX to materialize this immediate.
290 // pushq %rax
291 // movabsq +-$Offset+-SlotSize, %rax
292 // addq %rsp, %rax
293 // xchg %rax, (%rsp)
294 // movq (%rsp), %rsp
295 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
296 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
298 .setMIFlag(Flag);
299 // Subtract is not commutative, so negate the offset and always use add.
300 // Subtract 8 less and add 8 more to account for the PUSH we just did.
301 if (isSub)
302 Offset = -(Offset - SlotSize);
303 else
306 Rax)
307 .addImm(Offset)
308 .setMIFlag(Flag);
309 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
310 .addReg(Rax)
312 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
313 // Exchange the new SP in RAX with the top of the stack.
315 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
316 StackPtr, false, 0);
317 // Load new SP from the top of the stack into RSP.
318 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
319 StackPtr, false, 0);
320 return;
321 }
322 }
323
324 while (Offset) {
325 uint64_t ThisVal = std::min(Offset, Chunk);
326 if (ThisVal == SlotSize) {
327 // Use push / pop for slot sized adjustments as a size optimization. We
328 // need to find a dead register when using pop.
329 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
330 : TRI->findDeadCallerSavedReg(MBB, MBBI);
331 if (Reg) {
332 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
333 : (Is64Bit ? X86::POP64r : X86::POP32r);
334 BuildMI(MBB, MBBI, DL, TII.get(Opc))
335 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
336 .setMIFlag(Flag);
337 Offset -= ThisVal;
338 continue;
339 }
340 }
341
342 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
343 .setMIFlag(Flag);
344
345 Offset -= ThisVal;
346 }
347}
348
349MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
351 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
352 assert(Offset != 0 && "zero offset stack adjustment requested");
353
354 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
355 // is tricky.
356 bool UseLEA;
357 if (!InEpilogue) {
358 // Check if inserting the prologue at the beginning
359 // of MBB would require to use LEA operations.
360 // We need to use LEA operations if EFLAGS is live in, because
361 // it means an instruction will read it before it gets defined.
362 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
363 } else {
364 // If we can use LEA for SP but we shouldn't, check that none
365 // of the terminators uses the eflags. Otherwise we will insert
366 // a ADD that will redefine the eflags and break the condition.
367 // Alternatively, we could move the ADD, but this may not be possible
368 // and is an optimization anyway.
369 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
370 if (UseLEA && !STI.useLeaForSP())
372 // If that assert breaks, that means we do not do the right thing
373 // in canUseAsEpilogue.
375 "We shouldn't have allowed this insertion point");
376 }
377
378 MachineInstrBuilder MI;
379 if (UseLEA) {
382 StackPtr),
383 StackPtr, false, Offset);
384 } else {
385 bool IsSub = Offset < 0;
386 uint64_t AbsOffset = IsSub ? -Offset : Offset;
387 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
389 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
391 .addImm(AbsOffset);
392 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
393 }
394 return MI;
395}
396
397template <typename FoundT, typename CalcT>
398int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
400 FoundT FoundStackAdjust,
401 CalcT CalcNewOffset,
402 bool doMergeWithPrevious) const {
403 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
404 (!doMergeWithPrevious && MBBI == MBB.end()))
405 return CalcNewOffset(0);
406
407 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
408
410 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
411 // instruction, and that there are no DBG_VALUE or other instructions between
412 // ADD/SUB/LEA and its corresponding CFI instruction.
413 /* TODO: Add support for the case where there are multiple CFI instructions
414 below the ADD/SUB/LEA, e.g.:
415 ...
416 add
417 cfi_def_cfa_offset
418 cfi_offset
419 ...
420 */
421 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
422 PI = std::prev(PI);
423
424 int64_t Offset = 0;
425 for (;;) {
426 unsigned Opc = PI->getOpcode();
427
428 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
429 PI->getOperand(0).getReg() == StackPtr) {
430 assert(PI->getOperand(1).getReg() == StackPtr);
431 Offset = PI->getOperand(2).getImm();
432 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
433 PI->getOperand(0).getReg() == StackPtr &&
434 PI->getOperand(1).getReg() == StackPtr &&
435 PI->getOperand(2).getImm() == 1 &&
436 PI->getOperand(3).getReg() == X86::NoRegister &&
437 PI->getOperand(5).getReg() == X86::NoRegister) {
438 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
439 Offset = PI->getOperand(4).getImm();
440 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
441 PI->getOperand(0).getReg() == StackPtr) {
442 assert(PI->getOperand(1).getReg() == StackPtr);
443 Offset = -PI->getOperand(2).getImm();
444 } else
445 return CalcNewOffset(0);
446
447 FoundStackAdjust(PI, Offset);
448 if (std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
449 break;
450
451 if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
452 return CalcNewOffset(0);
453
454 PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
455 }
456
457 PI = MBB.erase(PI);
458 if (PI != MBB.end() && PI->isCFIInstruction()) {
459 auto CIs = MBB.getParent()->getFrameInstructions();
460 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
463 PI = MBB.erase(PI);
464 }
465 if (!doMergeWithPrevious)
467
468 return CalcNewOffset(Offset);
469}
470
473 int64_t AddOffset,
474 bool doMergeWithPrevious) const {
475 return mergeSPUpdates(
476 MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },
477 doMergeWithPrevious);
478}
479
482 const DebugLoc &DL,
483 const MCCFIInstruction &CFIInst,
484 MachineInstr::MIFlag Flag) const {
485 MachineFunction &MF = *MBB.getParent();
486 unsigned CFIIndex = MF.addFrameInst(CFIInst);
487
489 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
490
491 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
492 .addCFIIndex(CFIIndex)
493 .setMIFlag(Flag);
494}
495
496/// Emits Dwarf Info specifying offsets of callee saved registers and
497/// frame pointer. This is called only when basic block sections are enabled.
500 MachineFunction &MF = *MBB.getParent();
501 if (!hasFP(MF)) {
503 return;
504 }
506 const Register FramePtr = TRI->getFrameRegister(MF);
507 const Register MachineFramePtr =
508 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
509 : FramePtr;
510 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
511 // Offset = space for return address + size of the frame pointer itself.
512 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
514 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
516}
517
520 const DebugLoc &DL, bool IsPrologue) const {
521 MachineFunction &MF = *MBB.getParent();
522 MachineFrameInfo &MFI = MF.getFrameInfo();
525
526 // Add callee saved registers to move list.
527 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
528
529 // Calculate offsets.
530 for (const CalleeSavedInfo &I : CSI) {
531 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
532 MCRegister Reg = I.getReg();
533 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
534
535 if (IsPrologue) {
536 if (X86FI->getStackPtrSaveMI()) {
537 // +2*SlotSize because there is return address and ebp at the bottom
538 // of the stack.
539 // | retaddr |
540 // | ebp |
541 // | |<--ebp
542 Offset += 2 * SlotSize;
543 SmallString<64> CfaExpr;
544 CfaExpr.push_back(dwarf::DW_CFA_expression);
545 uint8_t buffer[16];
546 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
547 CfaExpr.push_back(2);
548 Register FramePtr = TRI->getFrameRegister(MF);
549 const Register MachineFramePtr =
550 STI.isTarget64BitILP32()
552 : FramePtr;
553 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
554 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
555 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
557 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
559 } else {
561 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
562 }
563 } else {
565 MCCFIInstruction::createRestore(nullptr, DwarfReg));
566 }
567 }
568 if (auto *MI = X86FI->getStackPtrSaveMI()) {
569 int FI = MI->getOperand(1).getIndex();
570 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
571 SmallString<64> CfaExpr;
572 Register FramePtr = TRI->getFrameRegister(MF);
573 const Register MachineFramePtr =
574 STI.isTarget64BitILP32()
576 : FramePtr;
577 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
578 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
579 uint8_t buffer[16];
580 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
581 CfaExpr.push_back(dwarf::DW_OP_deref);
582
583 SmallString<64> DefCfaExpr;
584 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
585 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
586 DefCfaExpr.append(CfaExpr.str());
587 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
589 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
591 }
592}
593
594void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
595 MachineBasicBlock &MBB) const {
596 const MachineFunction &MF = *MBB.getParent();
597
598 // Insertion point.
599 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
600
601 // Fake a debug loc.
602 DebugLoc DL;
603 if (MBBI != MBB.end())
604 DL = MBBI->getDebugLoc();
605
606 // Zero out FP stack if referenced. Do this outside of the loop below so that
607 // it's done only once.
608 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
609 for (MCRegister Reg : RegsToZero.set_bits()) {
610 if (!X86::RFP80RegClass.contains(Reg))
611 continue;
612
613 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
614 for (unsigned i = 0; i != NumFPRegs; ++i)
615 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
616
617 for (unsigned i = 0; i != NumFPRegs; ++i)
618 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
619 break;
620 }
621
622 // For GPRs, we only care to clear out the 32-bit register.
623 BitVector GPRsToZero(TRI->getNumRegs());
624 for (MCRegister Reg : RegsToZero.set_bits())
625 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
626 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
627 RegsToZero.reset(Reg);
628 }
629
630 // Zero out the GPRs first.
631 for (MCRegister Reg : GPRsToZero.set_bits())
632 TII.buildClearRegister(Reg, MBB, MBBI, DL);
633
634 // Zero out the remaining registers.
635 for (MCRegister Reg : RegsToZero.set_bits())
636 TII.buildClearRegister(Reg, MBB, MBBI, DL);
637}
638
641 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
642 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
644 if (STI.isTargetWindowsCoreCLR()) {
645 if (InProlog) {
646 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
647 .addImm(0 /* no explicit stack size */);
648 } else {
649 emitStackProbeInline(MF, MBB, MBBI, DL, false);
650 }
651 } else {
652 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
653 }
654}
655
657 return STI.isOSWindows() && !STI.isTargetWin64();
658}
659
661 MachineBasicBlock &PrologMBB) const {
662 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
663 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
664 });
665 if (Where != PrologMBB.end()) {
666 DebugLoc DL = PrologMBB.findDebugLoc(Where);
667 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
668 Where->eraseFromParent();
669 }
670}
671
672void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
675 const DebugLoc &DL,
676 bool InProlog) const {
678 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
679 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
680 else
681 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
682}
683
684void X86FrameLowering::emitStackProbeInlineGeneric(
686 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
687 MachineInstr &AllocWithProbe = *MBBI;
688 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
689
692 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
693 "different expansion expected for CoreCLR 64 bit");
694
695 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
696 uint64_t ProbeChunk = StackProbeSize * 8;
697
698 uint64_t MaxAlign =
699 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
700
701 // Synthesize a loop or unroll it, depending on the number of iterations.
702 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
703 // between the unaligned rsp and current rsp.
704 if (Offset > ProbeChunk) {
705 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
706 MaxAlign % StackProbeSize);
707 } else {
708 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
709 MaxAlign % StackProbeSize);
710 }
711}
712
713void X86FrameLowering::emitStackProbeInlineGenericBlock(
716 uint64_t AlignOffset) const {
717
718 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
719 const bool HasFP = hasFP(MF);
720 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
721 const X86TargetLowering &TLI = *STI.getTargetLowering();
722 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
723 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
724
725 uint64_t CurrentOffset = 0;
726
727 assert(AlignOffset < StackProbeSize);
728
729 // If the offset is so small it fits within a page, there's nothing to do.
730 if (StackProbeSize < Offset + AlignOffset) {
731
732 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
733 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
734 .setMIFlag(MachineInstr::FrameSetup);
735 if (!HasFP && NeedsDwarfCFI) {
736 BuildCFI(
737 MBB, MBBI, DL,
738 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
739 }
740
741 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
743 StackPtr, false, 0)
744 .addImm(0)
746 NumFrameExtraProbe++;
747 CurrentOffset = StackProbeSize - AlignOffset;
748 }
749
750 // For the next N - 1 pages, just probe. I tried to take advantage of
751 // natural probes but it implies much more logic and there was very few
752 // interesting natural probes to interleave.
753 while (CurrentOffset + StackProbeSize < Offset) {
754 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
755 .setMIFlag(MachineInstr::FrameSetup);
756
757 if (!HasFP && NeedsDwarfCFI) {
758 BuildCFI(
759 MBB, MBBI, DL,
760 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
761 }
762 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
764 StackPtr, false, 0)
765 .addImm(0)
767 NumFrameExtraProbe++;
768 CurrentOffset += StackProbeSize;
769 }
770
771 // No need to probe the tail, it is smaller than a Page.
772 uint64_t ChunkSize = Offset - CurrentOffset;
773 if (ChunkSize == SlotSize) {
774 // Use push for slot sized adjustments as a size optimization,
775 // like emitSPUpdate does when not probing.
776 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
777 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
778 BuildMI(MBB, MBBI, DL, TII.get(Opc))
781 } else {
782 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
783 .setMIFlag(MachineInstr::FrameSetup);
784 }
785 // No need to adjust Dwarf CFA offset here, the last position of the stack has
786 // been defined
787}
788
789void X86FrameLowering::emitStackProbeInlineGenericLoop(
792 uint64_t AlignOffset) const {
793 assert(Offset && "null offset");
794
795 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
797 "Inline stack probe loop will clobber live EFLAGS.");
798
799 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
800 const bool HasFP = hasFP(MF);
801 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
802 const X86TargetLowering &TLI = *STI.getTargetLowering();
803 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
804 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
805
806 if (AlignOffset) {
807 if (AlignOffset < StackProbeSize) {
808 // Perform a first smaller allocation followed by a probe.
809 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
810 .setMIFlag(MachineInstr::FrameSetup);
811
812 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
814 StackPtr, false, 0)
815 .addImm(0)
817 NumFrameExtraProbe++;
818 Offset -= AlignOffset;
819 }
820 }
821
822 // Synthesize a loop
823 NumFrameLoopProbe++;
824 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
825
826 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
827 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
828
830 MF.insert(MBBIter, testMBB);
831 MF.insert(MBBIter, tailMBB);
832
833 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
834 : Is64Bit ? X86::R11D
835 : X86::EAX;
836
837 // save loop bound
838 {
839 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
840
841 // Can we calculate the loop bound using SUB with a 32-bit immediate?
842 // Note that the immediate gets sign-extended when used with a 64-bit
843 // register, so in that case we only have 31 bits to work with.
844 bool canUseSub =
845 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
846
847 if (canUseSub) {
848 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
849
850 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
853 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
854 .addReg(FinalStackProbed)
855 .addImm(BoundOffset)
857 } else if (Uses64BitFramePtr) {
858 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
859 .addImm(-BoundOffset)
861 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
862 .addReg(FinalStackProbed)
865 } else {
866 llvm_unreachable("Offset too large for 32-bit stack pointer");
867 }
868
869 // while in the loop, use loop-invariant reg for CFI,
870 // instead of the stack pointer, which changes during the loop
871 if (!HasFP && NeedsDwarfCFI) {
872 // x32 uses the same DWARF register numbers as x86-64,
873 // so there isn't a register number for r11d, we must use r11 instead
874 const Register DwarfFinalStackProbed =
875 STI.isTarget64BitILP32()
876 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
877 : FinalStackProbed;
878
881 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
883 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
884 }
885 }
886
887 // allocate a page
888 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
889 /*InEpilogue=*/false)
890 .setMIFlag(MachineInstr::FrameSetup);
891
892 // touch the page
893 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
895 StackPtr, false, 0)
896 .addImm(0)
898
899 // cmp with stack pointer bound
900 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
902 .addReg(FinalStackProbed)
904
905 // jump
906 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
907 .addMBB(testMBB)
910 testMBB->addSuccessor(testMBB);
911 testMBB->addSuccessor(tailMBB);
912
913 // BB management
914 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
916 MBB.addSuccessor(testMBB);
917
918 // handle tail
919 const uint64_t TailOffset = Offset % StackProbeSize;
920 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
921 if (TailOffset) {
922 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
923 /*InEpilogue=*/false)
924 .setMIFlag(MachineInstr::FrameSetup);
925 }
926
927 // after the loop, switch back to stack pointer for CFI
928 if (!HasFP && NeedsDwarfCFI) {
929 // x32 uses the same DWARF register numbers as x86-64,
930 // so there isn't a register number for esp, we must use rsp instead
931 const Register DwarfStackPtr =
932 STI.isTarget64BitILP32()
935
936 BuildCFI(*tailMBB, TailMBBIter, DL,
938 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
939 }
940
941 // Update Live In information
942 fullyRecomputeLiveIns({tailMBB, testMBB});
943}
944
945void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
947 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
948 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
949 assert(STI.is64Bit() && "different expansion needed for 32 bit");
950 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
951 const TargetInstrInfo &TII = *STI.getInstrInfo();
952 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
953
954 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
956 "Inline stack probe loop will clobber live EFLAGS.");
957
958 // RAX contains the number of bytes of desired stack adjustment.
959 // The handling here assumes this value has already been updated so as to
960 // maintain stack alignment.
961 //
962 // We need to exit with RSP modified by this amount and execute suitable
963 // page touches to notify the OS that we're growing the stack responsibly.
964 // All stack probing must be done without modifying RSP.
965 //
966 // MBB:
967 // SizeReg = RAX;
968 // ZeroReg = 0
969 // CopyReg = RSP
970 // Flags, TestReg = CopyReg - SizeReg
971 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
972 // LimitReg = gs magic thread env access
973 // if FinalReg >= LimitReg goto ContinueMBB
974 // RoundBB:
975 // RoundReg = page address of FinalReg
976 // LoopMBB:
977 // LoopReg = PHI(LimitReg,ProbeReg)
978 // ProbeReg = LoopReg - PageSize
979 // [ProbeReg] = 0
980 // if (ProbeReg > RoundReg) goto LoopMBB
981 // ContinueMBB:
982 // RSP = RSP - RAX
983 // [rest of original MBB]
984
985 // Set up the new basic blocks
986 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
987 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
988 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
989
990 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
991 MF.insert(MBBIter, RoundMBB);
992 MF.insert(MBBIter, LoopMBB);
993 MF.insert(MBBIter, ContinueMBB);
994
995 // Split MBB and move the tail portion down to ContinueMBB.
996 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
997 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
999
1000 // Some useful constants
1001 const int64_t ThreadEnvironmentStackLimit = 0x10;
1002 const int64_t PageSize = 0x1000;
1003 const int64_t PageMask = ~(PageSize - 1);
1004
1005 // Registers we need. For the normal case we use virtual
1006 // registers. For the prolog expansion we use RAX, RCX and RDX.
1007 MachineRegisterInfo &MRI = MF.getRegInfo();
1008 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
1009 const Register
1010 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
1011 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1012 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1013 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1014 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1015 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1016 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1017 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1018 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
1019
1020 // SP-relative offsets where we can save RCX and RDX.
1021 int64_t RCXShadowSlot = 0;
1022 int64_t RDXShadowSlot = 0;
1023
1024 // If inlining in the prolog, save RCX and RDX.
1025 if (InProlog) {
1026 // Compute the offsets. We need to account for things already
1027 // pushed onto the stack at this point: return address, frame
1028 // pointer (if used), and callee saves.
1029 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1030 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1031 const bool HasFP = hasFP(MF);
1032
1033 // Check if we need to spill RCX and/or RDX.
1034 // Here we assume that no earlier prologue instruction changes RCX and/or
1035 // RDX, so checking the block live-ins is enough.
1036 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1037 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1038 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1039 // Assign the initial slot to both registers, then change RDX's slot if both
1040 // need to be spilled.
1041 if (IsRCXLiveIn)
1042 RCXShadowSlot = InitSlot;
1043 if (IsRDXLiveIn)
1044 RDXShadowSlot = InitSlot;
1045 if (IsRDXLiveIn && IsRCXLiveIn)
1046 RDXShadowSlot += 8;
1047 // Emit the saves if needed.
1048 if (IsRCXLiveIn)
1049 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1050 RCXShadowSlot)
1051 .addReg(X86::RCX);
1052 if (IsRDXLiveIn)
1053 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1054 RDXShadowSlot)
1055 .addReg(X86::RDX);
1056 } else {
1057 // Not in the prolog. Copy RAX to a virtual reg.
1058 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1059 }
1060
1061 // Add code to MBB to check for overflow and set the new target stack pointer
1062 // to zero if so.
1063 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1064 .addReg(ZeroReg, RegState::Undef)
1065 .addReg(ZeroReg, RegState::Undef);
1066 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1067 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1068 .addReg(CopyReg)
1069 .addReg(SizeReg);
1070 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1071 .addReg(TestReg)
1072 .addReg(ZeroReg)
1074
1075 // FinalReg now holds final stack pointer value, or zero if
1076 // allocation would overflow. Compare against the current stack
1077 // limit from the thread environment block. Note this limit is the
1078 // lowest touched page on the stack, not the point at which the OS
1079 // will cause an overflow exception, so this is just an optimization
1080 // to avoid unnecessarily touching pages that are below the current
1081 // SP but already committed to the stack by the OS.
1082 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1083 .addReg(0)
1084 .addImm(1)
1085 .addReg(0)
1086 .addImm(ThreadEnvironmentStackLimit)
1087 .addReg(X86::GS);
1088 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1089 // Jump if the desired stack pointer is at or above the stack limit.
1090 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1091 .addMBB(ContinueMBB)
1093
1094 // Add code to roundMBB to round the final stack pointer to a page boundary.
1095 if (InProlog)
1096 RoundMBB->addLiveIn(FinalReg);
1097 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1098 .addReg(FinalReg)
1099 .addImm(PageMask);
1100 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1101
1102 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1103 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1104 // and probe until we reach RoundedReg.
1105 if (!InProlog) {
1106 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1107 .addReg(LimitReg)
1108 .addMBB(RoundMBB)
1109 .addReg(ProbeReg)
1110 .addMBB(LoopMBB);
1111 }
1112
1113 if (InProlog)
1114 LoopMBB->addLiveIn(JoinReg);
1115 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1116 false, -PageSize);
1117
1118 // Probe by storing a byte onto the stack.
1119 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1120 .addReg(ProbeReg)
1121 .addImm(1)
1122 .addReg(0)
1123 .addImm(0)
1124 .addReg(0)
1125 .addImm(0);
1126
1127 if (InProlog)
1128 LoopMBB->addLiveIn(RoundedReg);
1129 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1130 .addReg(RoundedReg)
1131 .addReg(ProbeReg);
1132 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1133 .addMBB(LoopMBB)
1135
1136 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1137
1138 // If in prolog, restore RDX and RCX.
1139 if (InProlog) {
1140 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1141 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1142 TII.get(X86::MOV64rm), X86::RCX),
1143 X86::RSP, false, RCXShadowSlot);
1144 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1145 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1146 TII.get(X86::MOV64rm), X86::RDX),
1147 X86::RSP, false, RDXShadowSlot);
1148 }
1149
1150 // Now that the probing is done, add code to continueMBB to update
1151 // the stack pointer for real.
1152 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1153 .addReg(X86::RSP)
1154 .addReg(SizeReg);
1155
1156 // Add the control flow edges we need.
1157 MBB.addSuccessor(ContinueMBB);
1158 MBB.addSuccessor(RoundMBB);
1159 RoundMBB->addSuccessor(LoopMBB);
1160 LoopMBB->addSuccessor(ContinueMBB);
1161 LoopMBB->addSuccessor(LoopMBB);
1162
1163 if (InProlog) {
1164 LivePhysRegs LiveRegs;
1165 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1166 }
1167
1168 // Mark all the instructions added to the prolog as frame setup.
1169 if (InProlog) {
1170 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1171 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1172 }
1173 for (MachineInstr &MI : *RoundMBB) {
1175 }
1176 for (MachineInstr &MI : *LoopMBB) {
1178 }
1179 for (MachineInstr &MI :
1180 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1182 }
1183 }
1184}
1185
1186void X86FrameLowering::emitStackProbeCall(
1188 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1189 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1190 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1191
1192 // FIXME: Add indirect thunk support and remove this.
1193 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1194 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1195 "code model and indirect thunks not yet implemented.");
1196
1197 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1199 "Stack probe calls will clobber live EFLAGS.");
1200
1201 unsigned CallOp;
1202 if (Is64Bit)
1203 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1204 else
1205 CallOp = X86::CALLpcrel32;
1206
1207 StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
1208
1209 MachineInstrBuilder CI;
1210 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1211
1212 // All current stack probes take AX and SP as input, clobber flags, and
1213 // preserve all registers. x86_64 probes leave RSP unmodified.
1215 // For the large code model, we have to call through a register. Use R11,
1216 // as it is scratch in all supported calling conventions.
1217 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1219 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1220 } else {
1221 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1223 }
1224
1225 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1226 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1232
1233 MachineInstr *ModInst = CI;
1234 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1235 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1236 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1237 // themselves. They also does not clobber %rax so we can reuse it when
1238 // adjusting %rsp.
1239 // All other platforms do not specify a particular ABI for the stack probe
1240 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1241 ModInst =
1243 .addReg(SP)
1244 .addReg(AX);
1245 }
1246
1247 // DebugInfo variable locations -- if there's an instruction number for the
1248 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1249 // modifies SP.
1250 if (InstrNum) {
1251 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1252 // Label destination operand of the subtract.
1253 MF.makeDebugValueSubstitution(*InstrNum,
1254 {ModInst->getDebugInstrNum(), 0});
1255 } else {
1256 // Label the call. The operand number is the penultimate operand, zero
1257 // based.
1258 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1260 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1261 }
1262 }
1263
1264 if (InProlog) {
1265 // Apply the frame setup flag to all inserted instrs.
1266 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1267 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1268 }
1269}
1270
1271static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1272 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1273 // and might require smaller successive adjustments.
1274 const uint64_t Win64MaxSEHOffset = 128;
1275 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1276 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1277 return SEHFrameOffset & -16;
1278}
1279
1280// If we're forcing a stack realignment we can't rely on just the frame
1281// info, we need to know the ABI stack alignment as well in case we
1282// have a call out. Otherwise just make sure we have some alignment - we'll
1283// go with the minimum SlotSize.
1284uint64_t
1285X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1286 const MachineFrameInfo &MFI = MF.getFrameInfo();
1287 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1288 Align StackAlign = getStackAlign();
1289 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1290 if (HasRealign) {
1291 if (MFI.hasCalls())
1292 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1293 else if (MaxAlign < SlotSize)
1294 MaxAlign = Align(SlotSize);
1295 }
1296
1298 if (HasRealign)
1299 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1300 else
1301 MaxAlign = Align(16);
1302 }
1303 return MaxAlign.value();
1304}
1305
1306void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1308 const DebugLoc &DL, Register Reg,
1309 uint64_t MaxAlign) const {
1310 uint64_t Val = -MaxAlign;
1311 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1312
1313 MachineFunction &MF = *MBB.getParent();
1314 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
1315 const X86TargetLowering &TLI = *STI.getTargetLowering();
1316 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1317 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1318
1319 // We want to make sure that (in worst case) less than StackProbeSize bytes
1320 // are not probed after the AND. This assumption is used in
1321 // emitStackProbeInlineGeneric.
1322 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1323 {
1324 NumFrameLoopProbe++;
1325 MachineBasicBlock *entryMBB =
1327 MachineBasicBlock *headMBB =
1329 MachineBasicBlock *bodyMBB =
1331 MachineBasicBlock *footMBB =
1333
1335 MF.insert(MBBIter, entryMBB);
1336 MF.insert(MBBIter, headMBB);
1337 MF.insert(MBBIter, bodyMBB);
1338 MF.insert(MBBIter, footMBB);
1339 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1340 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1341 : Is64Bit ? X86::R11D
1342 : X86::EAX;
1343
1344 // Setup entry block
1345 {
1346
1347 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1348 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1351 MachineInstr *MI =
1352 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1353 .addReg(FinalStackProbed)
1354 .addImm(Val)
1356
1357 // The EFLAGS implicit def is dead.
1358 MI->getOperand(3).setIsDead();
1359
1360 BuildMI(entryMBB, DL,
1361 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1362 .addReg(FinalStackProbed)
1365 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1366 .addMBB(&MBB)
1369 entryMBB->addSuccessor(headMBB);
1370 entryMBB->addSuccessor(&MBB);
1371 }
1372
1373 // Loop entry block
1374
1375 {
1376 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1377 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1379 .addImm(StackProbeSize)
1381
1382 BuildMI(headMBB, DL,
1383 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1385 .addReg(FinalStackProbed)
1387
1388 // jump to the footer if StackPtr < FinalStackProbed
1389 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1390 .addMBB(footMBB)
1393
1394 headMBB->addSuccessor(bodyMBB);
1395 headMBB->addSuccessor(footMBB);
1396 }
1397
1398 // setup loop body
1399 {
1400 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1402 StackPtr, false, 0)
1403 .addImm(0)
1405
1406 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1407 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1409 .addImm(StackProbeSize)
1411
1412 // cmp with stack pointer bound
1413 BuildMI(bodyMBB, DL,
1414 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1415 .addReg(FinalStackProbed)
1418
1419 // jump back while FinalStackProbed < StackPtr
1420 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1421 .addMBB(bodyMBB)
1424 bodyMBB->addSuccessor(bodyMBB);
1425 bodyMBB->addSuccessor(footMBB);
1426 }
1427
1428 // setup loop footer
1429 {
1430 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1431 .addReg(FinalStackProbed)
1433 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1435 StackPtr, false, 0)
1436 .addImm(0)
1438 footMBB->addSuccessor(&MBB);
1439 }
1440
1441 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1442 }
1443 } else {
1444 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1445 .addReg(Reg)
1446 .addImm(Val)
1448
1449 // The EFLAGS implicit def is dead.
1450 MI->getOperand(3).setIsDead();
1451 }
1452}
1453
1455 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1456 // clobbered by any interrupt handler.
1457 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1458 "MF used frame lowering for wrong subtarget");
1459 const Function &Fn = MF.getFunction();
1460 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1461 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1462}
1463
1464/// Return true if we need to use the restricted Windows x64 prologue and
1465/// epilogue code patterns that can be described with WinCFI (.seh_*
1466/// directives).
1467bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1468 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1469}
1470
1471bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1472 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1473}
1474
1475/// Return true if an opcode is part of the REP group of instructions
1476static bool isOpcodeRep(unsigned Opcode) {
1477 switch (Opcode) {
1478 case X86::REPNE_PREFIX:
1479 case X86::REP_MOVSB_32:
1480 case X86::REP_MOVSB_64:
1481 case X86::REP_MOVSD_32:
1482 case X86::REP_MOVSD_64:
1483 case X86::REP_MOVSQ_32:
1484 case X86::REP_MOVSQ_64:
1485 case X86::REP_MOVSW_32:
1486 case X86::REP_MOVSW_64:
1487 case X86::REP_PREFIX:
1488 case X86::REP_STOSB_32:
1489 case X86::REP_STOSB_64:
1490 case X86::REP_STOSD_32:
1491 case X86::REP_STOSD_64:
1492 case X86::REP_STOSQ_32:
1493 case X86::REP_STOSQ_64:
1494 case X86::REP_STOSW_32:
1495 case X86::REP_STOSW_64:
1496 return true;
1497 default:
1498 break;
1499 }
1500 return false;
1501}
1502
1503/// emitPrologue - Push callee-saved registers onto the stack, which
1504/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1505/// space for local variables. Also emit labels used by the exception handler to
1506/// generate the exception handling frames.
1507
1508/*
1509 Here's a gist of what gets emitted:
1510
1511 ; Establish frame pointer, if needed
1512 [if needs FP]
1513 push %rbp
1514 .cfi_def_cfa_offset 16
1515 .cfi_offset %rbp, -16
1516 .seh_pushreg %rpb
1517 mov %rsp, %rbp
1518 .cfi_def_cfa_register %rbp
1519
1520 ; Spill general-purpose registers
1521 [for all callee-saved GPRs]
1522 pushq %<reg>
1523 [if not needs FP]
1524 .cfi_def_cfa_offset (offset from RETADDR)
1525 .seh_pushreg %<reg>
1526
1527 ; If the required stack alignment > default stack alignment
1528 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1529 ; of unknown size in the stack frame.
1530 [if stack needs re-alignment]
1531 and $MASK, %rsp
1532
1533 ; Allocate space for locals
1534 [if target is Windows and allocated space > 4096 bytes]
1535 ; Windows needs special care for allocations larger
1536 ; than one page.
1537 mov $NNN, %rax
1538 call ___chkstk_ms/___chkstk
1539 sub %rax, %rsp
1540 [else]
1541 sub $NNN, %rsp
1542
1543 [if needs FP]
1544 .seh_stackalloc (size of XMM spill slots)
1545 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1546 [else]
1547 .seh_stackalloc NNN
1548
1549 ; Spill XMMs
1550 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1551 ; they may get spilled on any platform, if the current function
1552 ; calls @llvm.eh.unwind.init
1553 [if needs FP]
1554 [for all callee-saved XMM registers]
1555 movaps %<xmm reg>, -MMM(%rbp)
1556 [for all callee-saved XMM registers]
1557 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1558 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1559 [else]
1560 [for all callee-saved XMM registers]
1561 movaps %<xmm reg>, KKK(%rsp)
1562 [for all callee-saved XMM registers]
1563 .seh_savexmm %<xmm reg>, KKK
1564
1565 .seh_endprologue
1566
1567 [if needs base pointer]
1568 mov %rsp, %rbx
1569 [if needs to restore base pointer]
1570 mov %rsp, -MMM(%rbp)
1571
1572 ; Emit CFI info
1573 [if needs FP]
1574 [for all callee-saved registers]
1575 .cfi_offset %<reg>, (offset from %rbp)
1576 [else]
1577 .cfi_def_cfa_offset (offset from RETADDR)
1578 [for all callee-saved registers]
1579 .cfi_offset %<reg>, (offset from %rsp)
1580
1581 Notes:
1582 - .seh directives are emitted only for Windows 64 ABI
1583 - .cv_fpo directives are emitted on win32 when emitting CodeView
1584 - .cfi directives are emitted for all other ABIs
1585 - for 32-bit code, substitute %e?? registers for %r??
1586*/
1587
1589 MachineBasicBlock &MBB) const {
1590 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1591 "MF used frame lowering for wrong subtarget");
1593 MachineFrameInfo &MFI = MF.getFrameInfo();
1594 const Function &Fn = MF.getFunction();
1596 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1597 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1598 bool IsFunclet = MBB.isEHFuncletEntry();
1600 if (Fn.hasPersonalityFn())
1601 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1602 bool FnHasClrFunclet =
1603 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1604 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1605 bool HasFP = hasFP(MF);
1606 bool IsWin64Prologue = isWin64Prologue(MF);
1607 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1608 // FIXME: Emit FPO data for EH funclets.
1609 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1611 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1612 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1613 Register FramePtr = TRI->getFrameRegister(MF);
1614 const Register MachineFramePtr =
1615 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
1616 : FramePtr;
1617 Register BasePtr = TRI->getBaseRegister();
1618 bool HasWinCFI = false;
1619
1620 // Debug location must be unknown since the first debug location is used
1621 // to determine the end of the prologue.
1622 DebugLoc DL;
1623 Register ArgBaseReg;
1624
1625 // Emit extra prolog for argument stack slot reference.
1626 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1627 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1628 // Creat extra prolog for stack realignment.
1629 ArgBaseReg = MI->getOperand(0).getReg();
1630 // leal 4(%esp), %basereg
1631 // .cfi_def_cfa %basereg, 0
1632 // andl $-128, %esp
1633 // pushl -4(%basereg)
1634 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1635 ArgBaseReg)
1637 .addImm(1)
1638 .addUse(X86::NoRegister)
1640 .addUse(X86::NoRegister)
1642 if (NeedsDwarfCFI) {
1643 // .cfi_def_cfa %basereg, 0
1644 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1645 BuildCFI(MBB, MBBI, DL,
1646 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1648 }
1649 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1650 int64_t Offset = -(int64_t)SlotSize;
1651 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1652 .addReg(ArgBaseReg)
1653 .addImm(1)
1654 .addReg(X86::NoRegister)
1655 .addImm(Offset)
1656 .addReg(X86::NoRegister)
1658 }
1659
1660 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1661 // tail call.
1662 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1663 if (TailCallArgReserveSize && IsWin64Prologue)
1664 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1665
1666 const bool EmitStackProbeCall =
1667 STI.getTargetLowering()->hasStackProbeSymbol(MF);
1668 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1669
1670 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1673 if (STI.swiftAsyncContextIsDynamicallySet()) {
1674 // The special symbol below is absolute and has a *value* suitable to be
1675 // combined with the frame pointer directly.
1676 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1677 .addUse(MachineFramePtr)
1678 .addUse(X86::RIP)
1679 .addImm(1)
1680 .addUse(X86::NoRegister)
1681 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1683 .addUse(X86::NoRegister);
1684 break;
1685 }
1686 [[fallthrough]];
1687
1689 assert(
1690 !IsWin64Prologue &&
1691 "win64 prologue does not set the bit 60 in the saved frame pointer");
1692 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1693 .addUse(MachineFramePtr)
1694 .addImm(60)
1696 break;
1697
1699 break;
1700 }
1701 }
1702
1703 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1704 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1705 // stack alignment.
1707 Fn.arg_size() == 2) {
1708 StackSize += 8;
1709 MFI.setStackSize(StackSize);
1710
1711 // Update the stack pointer by pushing a register. This is the instruction
1712 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1713 // Hard-coding the update to a push avoids emitting a second
1714 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1715 // probing isn't needed anyways for an 8-byte update.
1716 // Pushing a register leaves us in a similar situation to a regular
1717 // function call where we know that the address at (rsp-8) is writeable.
1718 // That way we avoid any off-by-ones with stack probing for additional
1719 // stack pointer updates later on.
1720 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1721 .addReg(X86::RAX, RegState::Undef)
1723 }
1724
1725 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1726 // function, and use up to 128 bytes of stack space, don't have a frame
1727 // pointer, calls, or dynamic alloca then we do not need to adjust the
1728 // stack pointer (we fit in the Red Zone). We also check that we don't
1729 // push and pop from the stack.
1730 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1731 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1732 !MFI.adjustsStack() && // No calls.
1733 !EmitStackProbeCall && // No stack probes.
1734 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1735 !MF.shouldSplitStack()) { // Regular stack
1736 uint64_t MinSize =
1738 if (HasFP)
1739 MinSize += SlotSize;
1740 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1741 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1742 MFI.setStackSize(StackSize);
1743 }
1744
1745 // Insert stack pointer adjustment for later moving of return addr. Only
1746 // applies to tail call optimized functions where the callee argument stack
1747 // size is bigger than the callers.
1748 if (TailCallArgReserveSize != 0) {
1749 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1750 /*InEpilogue=*/false)
1751 .setMIFlag(MachineInstr::FrameSetup);
1752 }
1753
1754 // Mapping for machine moves:
1755 //
1756 // DST: VirtualFP AND
1757 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1758 // ELSE => DW_CFA_def_cfa
1759 //
1760 // SRC: VirtualFP AND
1761 // DST: Register => DW_CFA_def_cfa_register
1762 //
1763 // ELSE
1764 // OFFSET < 0 => DW_CFA_offset_extended_sf
1765 // REG < 64 => DW_CFA_offset + Reg
1766 // ELSE => DW_CFA_offset_extended
1767
1768 uint64_t NumBytes = 0;
1769 int stackGrowth = -SlotSize;
1770
1771 // Find the funclet establisher parameter
1772 MCRegister Establisher;
1773 if (IsClrFunclet)
1774 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1775 else if (IsFunclet)
1776 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1777
1778 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1779 // Immediately spill establisher into the home slot.
1780 // The runtime cares about this.
1781 // MOV64mr %rdx, 16(%rsp)
1782 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1783 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1784 .addReg(Establisher)
1786 MBB.addLiveIn(Establisher);
1787 }
1788
1789 if (HasFP) {
1790 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1791
1792 // Calculate required stack adjustment.
1793 uint64_t FrameSize = StackSize - SlotSize;
1794 NumBytes =
1795 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1796
1797 // Callee-saved registers are pushed on stack before the stack is realigned.
1798 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1799 NumBytes = alignTo(NumBytes, MaxAlign);
1800
1801 // Save EBP/RBP into the appropriate stack slot.
1802 BuildMI(MBB, MBBI, DL,
1804 .addReg(MachineFramePtr, RegState::Kill)
1806
1807 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1808 // Mark the place where EBP/RBP was saved.
1809 // Define the current CFA rule to use the provided offset.
1810 assert(StackSize);
1811 BuildCFI(MBB, MBBI, DL,
1813 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1815
1816 // Change the rule for the FramePtr to be an "offset" rule.
1817 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1818 BuildCFI(MBB, MBBI, DL,
1819 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1820 2 * stackGrowth -
1821 (int)TailCallArgReserveSize),
1823 }
1824
1825 if (NeedsWinCFI) {
1826 HasWinCFI = true;
1827 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1830 }
1831
1832 if (!IsFunclet) {
1833 if (X86FI->hasSwiftAsyncContext()) {
1834 assert(!IsWin64Prologue &&
1835 "win64 prologue does not store async context right below rbp");
1836 const auto &Attrs = MF.getFunction().getAttributes();
1837
1838 // Before we update the live frame pointer we have to ensure there's a
1839 // valid (or null) asynchronous context in its slot just before FP in
1840 // the frame record, so store it now.
1841 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1842 // We have an initial context in r14, store it just before the frame
1843 // pointer.
1844 MBB.addLiveIn(X86::R14);
1845 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1846 .addReg(X86::R14)
1848 } else {
1849 // No initial context, store null so that there's no pointer that
1850 // could be misused.
1851 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1852 .addImm(0)
1854 }
1855
1856 if (NeedsWinCFI) {
1857 HasWinCFI = true;
1858 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1859 .addImm(X86::R14)
1861 }
1862
1863 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1864 .addUse(X86::RSP)
1865 .addImm(1)
1866 .addUse(X86::NoRegister)
1867 .addImm(8)
1868 .addUse(X86::NoRegister)
1870 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1871 .addUse(X86::RSP)
1872 .addImm(8)
1874 }
1875
1876 if (!IsWin64Prologue && !IsFunclet) {
1877 // Update EBP with the new base value.
1878 if (!X86FI->hasSwiftAsyncContext())
1879 BuildMI(MBB, MBBI, DL,
1880 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1881 FramePtr)
1884
1885 if (NeedsDwarfCFI) {
1886 if (ArgBaseReg.isValid()) {
1887 SmallString<64> CfaExpr;
1888 CfaExpr.push_back(dwarf::DW_CFA_expression);
1889 uint8_t buffer[16];
1890 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1891 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1892 CfaExpr.push_back(2);
1893 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1894 CfaExpr.push_back(0);
1895 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1896 BuildCFI(MBB, MBBI, DL,
1897 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1899 } else {
1900 // Mark effective beginning of when frame pointer becomes valid.
1901 // Define the current CFA to use the EBP/RBP register.
1902 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1903 BuildCFI(
1904 MBB, MBBI, DL,
1905 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1907 }
1908 }
1909
1910 if (NeedsWinFPO) {
1911 // .cv_fpo_setframe $FramePtr
1912 HasWinCFI = true;
1913 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1915 .addImm(0)
1917 }
1918 }
1919 }
1920 } else {
1921 assert(!IsFunclet && "funclets without FPs not yet implemented");
1922 NumBytes =
1923 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1924 }
1925
1926 // Update the offset adjustment, which is mainly used by codeview to translate
1927 // from ESP to VFRAME relative local variable offsets.
1928 if (!IsFunclet) {
1929 if (HasFP && TRI->hasStackRealignment(MF))
1930 MFI.setOffsetAdjustment(-NumBytes);
1931 else
1932 MFI.setOffsetAdjustment(-StackSize);
1933 }
1934
1935 // For EH funclets, only allocate enough space for outgoing calls. Save the
1936 // NumBytes value that we would've used for the parent frame.
1937 unsigned ParentFrameNumBytes = NumBytes;
1938 if (IsFunclet)
1939 NumBytes = getWinEHFuncletFrameSize(MF);
1940
1941 // Skip the callee-saved push instructions.
1942 bool PushedRegs = false;
1943 int StackOffset = 2 * stackGrowth;
1945 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1946 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1947 return false;
1948 unsigned Opc = MBBI->getOpcode();
1949 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1950 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1951 };
1952
1953 while (IsCSPush(MBBI)) {
1954 PushedRegs = true;
1955 Register Reg = MBBI->getOperand(0).getReg();
1956 LastCSPush = MBBI;
1957 ++MBBI;
1958 unsigned Opc = LastCSPush->getOpcode();
1959
1960 if (!HasFP && NeedsDwarfCFI) {
1961 // Mark callee-saved push instruction.
1962 // Define the current CFA rule to use the provided offset.
1963 assert(StackSize);
1964 // Compared to push, push2 introduces more stack offset (one more
1965 // register).
1966 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1967 StackOffset += stackGrowth;
1968 BuildCFI(MBB, MBBI, DL,
1971 StackOffset += stackGrowth;
1972 }
1973
1974 if (NeedsWinCFI) {
1975 HasWinCFI = true;
1976 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1977 .addImm(Reg)
1979 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1980 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1981 .addImm(LastCSPush->getOperand(1).getReg())
1983 }
1984 }
1985
1986 // Realign stack after we pushed callee-saved registers (so that we'll be
1987 // able to calculate their offsets from the frame pointer).
1988 // Don't do this for Win64, it needs to realign the stack after the prologue.
1989 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1990 !ArgBaseReg.isValid()) {
1991 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1992 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1993
1994 if (NeedsWinCFI) {
1995 HasWinCFI = true;
1996 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1997 .addImm(MaxAlign)
1999 }
2000 }
2001
2002 // If there is an SUB32ri of ESP immediately before this instruction, merge
2003 // the two. This can be the case when tail call elimination is enabled and
2004 // the callee has more arguments than the caller.
2005 NumBytes = mergeSPUpdates(
2006 MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2007 true);
2008
2009 // Adjust stack pointer: ESP -= numbytes.
2010
2011 // Windows and cygwin/mingw require a prologue helper routine when allocating
2012 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
2013 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
2014 // stack and adjust the stack pointer in one go. The 64-bit version of
2015 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
2016 // responsible for adjusting the stack pointer. Touching the stack at 4K
2017 // increments is necessary to ensure that the guard pages used by the OS
2018 // virtual memory manager are allocated in correct sequence.
2019 uint64_t AlignedNumBytes = NumBytes;
2020 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
2021 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
2022 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
2023 assert(!X86FI->getUsesRedZone() &&
2024 "The Red Zone is not accounted for in stack probes");
2025
2026 // Check whether EAX is livein for this block.
2027 bool isEAXAlive = isEAXLiveIn(MBB);
2028
2029 if (isEAXAlive) {
2030 if (Is64Bit) {
2031 // Save RAX
2032 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2033 .addReg(X86::RAX, RegState::Kill)
2035 } else {
2036 // Save EAX
2037 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2038 .addReg(X86::EAX, RegState::Kill)
2040 }
2041 }
2042
2043 if (Is64Bit) {
2044 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2045 // Function prologue is responsible for adjusting the stack pointer.
2046 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2047 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2048 .addImm(Alloc)
2050 } else {
2051 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2052 // We'll also use 4 already allocated bytes for EAX.
2053 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2054 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2056 }
2057
2058 // Call __chkstk, __chkstk_ms, or __alloca.
2059 emitStackProbe(MF, MBB, MBBI, DL, true);
2060
2061 if (isEAXAlive) {
2062 // Restore RAX/EAX
2064 if (Is64Bit)
2065 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2066 StackPtr, false, NumBytes - 8);
2067 else
2068 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2069 StackPtr, false, NumBytes - 4);
2070 MI->setFlag(MachineInstr::FrameSetup);
2071 MBB.insert(MBBI, MI);
2072 }
2073 } else if (NumBytes) {
2074 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2075 }
2076
2077 if (NeedsWinCFI && NumBytes) {
2078 HasWinCFI = true;
2079 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2080 .addImm(NumBytes)
2082 }
2083
2084 int SEHFrameOffset = 0;
2085 Register SPOrEstablisher;
2086 if (IsFunclet) {
2087 if (IsClrFunclet) {
2088 // The establisher parameter passed to a CLR funclet is actually a pointer
2089 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2090 // to find the root function establisher frame by loading the PSPSym from
2091 // the intermediate frame.
2092 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2093 MachinePointerInfo NoInfo;
2094 MBB.addLiveIn(Establisher);
2095 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2096 Establisher, false, PSPSlotOffset)
2099 ;
2100 // Save the root establisher back into the current funclet's (mostly
2101 // empty) frame, in case a sub-funclet or the GC needs it.
2102 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2103 false, PSPSlotOffset)
2104 .addReg(Establisher)
2106 NoInfo,
2109 }
2110 SPOrEstablisher = Establisher;
2111 } else {
2112 SPOrEstablisher = StackPtr;
2113 }
2114
2115 if (IsWin64Prologue && HasFP) {
2116 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2117 // this calculation on the incoming establisher, which holds the value of
2118 // RSP from the parent frame at the end of the prologue.
2119 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2120 if (SEHFrameOffset)
2121 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2122 SPOrEstablisher, false, SEHFrameOffset);
2123 else
2124 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2125 .addReg(SPOrEstablisher);
2126
2127 // If this is not a funclet, emit the CFI describing our frame pointer.
2128 if (NeedsWinCFI && !IsFunclet) {
2129 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2130 HasWinCFI = true;
2131 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2133 .addImm(SEHFrameOffset)
2135 if (isAsynchronousEHPersonality(Personality))
2136 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2137 }
2138 } else if (IsFunclet && STI.is32Bit()) {
2139 // Reset EBP / ESI to something good for funclets.
2141 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2142 // into the registration node so that the runtime will restore it for us.
2143 if (!MBB.isCleanupFuncletEntry()) {
2144 assert(Personality == EHPersonality::MSVC_CXX);
2145 Register FrameReg;
2147 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2148 // ESP is the first field, so no extra displacement is needed.
2149 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2150 false, EHRegOffset)
2151 .addReg(X86::ESP);
2152 }
2153 }
2154
2155 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2156 const MachineInstr &FrameInstr = *MBBI;
2157 ++MBBI;
2158
2159 if (NeedsWinCFI) {
2160 int FI;
2161 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2162 if (X86::FR64RegClass.contains(Reg)) {
2163 int Offset;
2164 Register IgnoredFrameReg;
2165 if (IsWin64Prologue && IsFunclet)
2166 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2167 else
2168 Offset =
2169 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2170 SEHFrameOffset;
2171
2172 HasWinCFI = true;
2173 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2174 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2175 .addImm(Reg)
2176 .addImm(Offset)
2178 }
2179 }
2180 }
2181 }
2182
2183 if (NeedsWinCFI && HasWinCFI)
2184 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2186
2187 if (FnHasClrFunclet && !IsFunclet) {
2188 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2189 // immediately after the prolog) into the PSPSlot so that funclets
2190 // and the GC can recover it.
2191 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2192 auto PSPInfo = MachinePointerInfo::getFixedStack(
2194 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2195 PSPSlotOffset)
2200 }
2201
2202 // Realign stack after we spilled callee-saved registers (so that we'll be
2203 // able to calculate their offsets from the frame pointer).
2204 // Win64 requires aligning the stack after the prologue.
2205 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2206 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2207 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2208 }
2209
2210 // We already dealt with stack realignment and funclets above.
2211 if (IsFunclet && STI.is32Bit())
2212 return;
2213
2214 // If we need a base pointer, set it up here. It's whatever the value
2215 // of the stack pointer is at this point. Any variable size objects
2216 // will be allocated after this, so we can still use the base pointer
2217 // to reference locals.
2218 if (TRI->hasBasePointer(MF)) {
2219 // Update the base pointer with the current stack pointer.
2220 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2221 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2222 .addReg(SPOrEstablisher)
2224 if (X86FI->getRestoreBasePointer()) {
2225 // Stash value of base pointer. Saving RSP instead of EBP shortens
2226 // dependence chain. Used by SjLj EH.
2227 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2228 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2230 .addReg(SPOrEstablisher)
2232 }
2233
2234 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2235 // Stash the value of the frame pointer relative to the base pointer for
2236 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2237 // it recovers the frame pointer from the base pointer rather than the
2238 // other way around.
2239 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2240 Register UsedReg;
2241 int Offset =
2242 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2243 .getFixed();
2244 assert(UsedReg == BasePtr);
2245 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2248 }
2249 }
2250 if (ArgBaseReg.isValid()) {
2251 // Save argument base pointer.
2252 auto *MI = X86FI->getStackPtrSaveMI();
2253 int FI = MI->getOperand(1).getIndex();
2254 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2255 // movl %basereg, offset(%ebp)
2256 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2257 .addReg(ArgBaseReg)
2259 }
2260
2261 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2262 // Mark end of stack pointer adjustment.
2263 if (!HasFP && NumBytes) {
2264 // Define the current CFA rule to use the provided offset.
2265 assert(StackSize);
2266 BuildCFI(
2267 MBB, MBBI, DL,
2268 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2270 }
2271
2272 // Emit DWARF info specifying the offsets of the callee-saved registers.
2274 }
2275
2276 // X86 Interrupt handling function cannot assume anything about the direction
2277 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2278 // in each prologue of interrupt handler function.
2279 //
2280 // Create "cld" instruction only in these cases:
2281 // 1. The interrupt handling function uses any of the "rep" instructions.
2282 // 2. Interrupt handling function calls another function.
2283 // 3. If there are any inline asm blocks, as we do not know what they do
2284 //
2285 // TODO: We should also emit cld if we detect the use of std, but as of now,
2286 // the compiler does not even emit that instruction or even define it, so in
2287 // practice, this would only happen with inline asm, which we cover anyway.
2289 bool NeedsCLD = false;
2290
2291 for (const MachineBasicBlock &B : MF) {
2292 for (const MachineInstr &MI : B) {
2293 if (MI.isCall()) {
2294 NeedsCLD = true;
2295 break;
2296 }
2297
2298 if (isOpcodeRep(MI.getOpcode())) {
2299 NeedsCLD = true;
2300 break;
2301 }
2302
2303 if (MI.isInlineAsm()) {
2304 // TODO: Parse asm for rep instructions or call sites?
2305 // For now, let's play it safe and emit a cld instruction
2306 // just in case.
2307 NeedsCLD = true;
2308 break;
2309 }
2310 }
2311 }
2312
2313 if (NeedsCLD) {
2314 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2316 }
2317 }
2318
2319 // At this point we know if the function has WinCFI or not.
2320 MF.setHasWinCFI(HasWinCFI);
2321}
2322
2324 const MachineFunction &MF) const {
2325 // We can't use LEA instructions for adjusting the stack pointer if we don't
2326 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2327 // to deallocate the stack.
2328 // This means that we can use LEA for SP in two situations:
2329 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2330 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2331 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2332}
2333
2335 switch (MI.getOpcode()) {
2336 case X86::CATCHRET:
2337 case X86::CLEANUPRET:
2338 return true;
2339 default:
2340 return false;
2341 }
2342 llvm_unreachable("impossible");
2343}
2344
2345// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2346// stack. It holds a pointer to the bottom of the root function frame. The
2347// establisher frame pointer passed to a nested funclet may point to the
2348// (mostly empty) frame of its parent funclet, but it will need to find
2349// the frame of the root function to access locals. To facilitate this,
2350// every funclet copies the pointer to the bottom of the root function
2351// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2352// same offset for the PSPSym in the root function frame that's used in the
2353// funclets' frames allows each funclet to dynamically accept any ancestor
2354// frame as its establisher argument (the runtime doesn't guarantee the
2355// immediate parent for some reason lost to history), and also allows the GC,
2356// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2357// frame with only a single offset reported for the entire method.
2358unsigned
2359X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2360 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2362 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2363 /*IgnoreSPUpdates*/ true)
2364 .getFixed();
2365 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2366 return static_cast<unsigned>(Offset);
2367}
2368
2369unsigned
2370X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2371 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2372 // This is the size of the pushed CSRs.
2373 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2374 // This is the size of callee saved XMMs.
2375 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2376 unsigned XMMSize =
2377 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2378 // This is the amount of stack a funclet needs to allocate.
2379 unsigned UsedSize;
2380 EHPersonality Personality =
2382 if (Personality == EHPersonality::CoreCLR) {
2383 // CLR funclets need to hold enough space to include the PSPSym, at the
2384 // same offset from the stack pointer (immediately after the prolog) as it
2385 // resides at in the main function.
2386 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2387 } else {
2388 // Other funclets just need enough stack for outgoing call arguments.
2389 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2390 }
2391 // RBP is not included in the callee saved register block. After pushing RBP,
2392 // everything is 16 byte aligned. Everything we allocate before an outgoing
2393 // call must also be 16 byte aligned.
2394 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2395 // Subtract out the size of the callee saved registers. This is how much stack
2396 // each funclet will allocate.
2397 return FrameSizeMinusRBP + XMMSize - CSSize;
2398}
2399
2400static bool isTailCallOpcode(unsigned Opc) {
2401 return Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
2402 Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
2403 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2404 Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2405 Opc == X86::TCRETURNmi64;
2406}
2407
2409 MachineBasicBlock &MBB) const {
2410 const MachineFrameInfo &MFI = MF.getFrameInfo();
2412 MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
2413 MachineBasicBlock::iterator MBBI = Terminator;
2414 DebugLoc DL;
2415 if (MBBI != MBB.end())
2416 DL = MBBI->getDebugLoc();
2417 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
2418 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2419 Register FramePtr = TRI->getFrameRegister(MF);
2420 Register MachineFramePtr =
2421 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2422
2423 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2424 bool NeedsWin64CFI =
2425 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2426 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2427
2428 // Get the number of bytes to allocate from the FrameInfo.
2429 uint64_t StackSize = MFI.getStackSize();
2430 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2431 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2432 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2433 bool HasFP = hasFP(MF);
2434 uint64_t NumBytes = 0;
2435
2436 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2438 !MF.getTarget().getTargetTriple().isUEFI()) &&
2439 MF.needsFrameMoves();
2440
2441 Register ArgBaseReg;
2442 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2443 unsigned Opc = X86::LEA32r;
2444 Register StackReg = X86::ESP;
2445 ArgBaseReg = MI->getOperand(0).getReg();
2446 if (STI.is64Bit()) {
2447 Opc = X86::LEA64r;
2448 StackReg = X86::RSP;
2449 }
2450 // leal -4(%basereg), %esp
2451 // .cfi_def_cfa %esp, 4
2452 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2453 .addUse(ArgBaseReg)
2454 .addImm(1)
2455 .addUse(X86::NoRegister)
2456 .addImm(-(int64_t)SlotSize)
2457 .addUse(X86::NoRegister)
2459 if (NeedsDwarfCFI) {
2460 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2461 BuildCFI(MBB, MBBI, DL,
2462 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2464 --MBBI;
2465 }
2466 --MBBI;
2467 }
2468
2469 if (IsFunclet) {
2470 assert(HasFP && "EH funclets without FP not yet implemented");
2471 NumBytes = getWinEHFuncletFrameSize(MF);
2472 } else if (HasFP) {
2473 // Calculate required stack adjustment.
2474 uint64_t FrameSize = StackSize - SlotSize;
2475 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2476
2477 // Callee-saved registers were pushed on stack before the stack was
2478 // realigned.
2479 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2480 NumBytes = alignTo(FrameSize, MaxAlign);
2481 } else {
2482 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2483 }
2484 uint64_t SEHStackAllocAmt = NumBytes;
2485
2486 // AfterPop is the position to insert .cfi_restore.
2488 if (HasFP) {
2489 if (X86FI->hasSwiftAsyncContext()) {
2490 // Discard the context.
2491 int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true);
2492 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2493 }
2494 // Pop EBP.
2495 BuildMI(MBB, MBBI, DL,
2497 MachineFramePtr)
2499
2500 // We need to reset FP to its untagged state on return. Bit 60 is currently
2501 // used to show the presence of an extended frame.
2502 if (X86FI->hasSwiftAsyncContext()) {
2503 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2504 .addUse(MachineFramePtr)
2505 .addImm(60)
2507 }
2508
2509 if (NeedsDwarfCFI) {
2510 if (!ArgBaseReg.isValid()) {
2511 unsigned DwarfStackPtr =
2512 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2513 BuildCFI(MBB, MBBI, DL,
2514 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2516 }
2517 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2518 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2519 BuildCFI(MBB, AfterPop, DL,
2520 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2522 --MBBI;
2523 --AfterPop;
2524 }
2525 --MBBI;
2526 }
2527 }
2528
2529 MachineBasicBlock::iterator FirstCSPop = MBBI;
2530 // Skip the callee-saved pop instructions.
2531 while (MBBI != MBB.begin()) {
2532 MachineBasicBlock::iterator PI = std::prev(MBBI);
2533 unsigned Opc = PI->getOpcode();
2534
2535 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2536 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2537 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2538 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2539 Opc != X86::POP2P && Opc != X86::LEA64r))
2540 break;
2541 FirstCSPop = PI;
2542 }
2543
2544 --MBBI;
2545 }
2546 if (ArgBaseReg.isValid()) {
2547 // Restore argument base pointer.
2548 auto *MI = X86FI->getStackPtrSaveMI();
2549 int FI = MI->getOperand(1).getIndex();
2550 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2551 // movl offset(%ebp), %basereg
2552 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2554 }
2555 MBBI = FirstCSPop;
2556
2557 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2558 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2559
2560 if (MBBI != MBB.end())
2561 DL = MBBI->getDebugLoc();
2562 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2563 // instruction, merge the two instructions.
2564 if (NumBytes || MFI.hasVarSizedObjects())
2565 NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true);
2566
2567 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2568 // slot before popping them off! Same applies for the case, when stack was
2569 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2570 // will not do realignment or dynamic stack allocation.
2571 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2572 !IsFunclet) {
2573 if (TRI->hasStackRealignment(MF))
2574 MBBI = FirstCSPop;
2575 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2576 uint64_t LEAAmount =
2577 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2578
2579 if (X86FI->hasSwiftAsyncContext())
2580 LEAAmount -= 16;
2581
2582 // There are only two legal forms of epilogue:
2583 // - add SEHAllocationSize, %rsp
2584 // - lea SEHAllocationSize(%FramePtr), %rsp
2585 //
2586 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2587 // However, we may use this sequence if we have a frame pointer because the
2588 // effects of the prologue can safely be undone.
2589 if (LEAAmount != 0) {
2592 false, LEAAmount);
2593 --MBBI;
2594 } else {
2595 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2597 --MBBI;
2598 }
2599 } else if (NumBytes) {
2600 // Adjust stack pointer back: ESP += numbytes.
2601 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2602 if (!HasFP && NeedsDwarfCFI) {
2603 // Define the current CFA rule to use the provided offset.
2604 BuildCFI(MBB, MBBI, DL,
2606 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2608 }
2609 --MBBI;
2610 }
2611
2612 if (NeedsWin64CFI && MF.hasWinCFI())
2613 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_BeginEpilogue));
2614
2615 if (!HasFP && NeedsDwarfCFI) {
2616 MBBI = FirstCSPop;
2617 int64_t Offset = -(int64_t)CSSize - SlotSize;
2618 // Mark callee-saved pop instruction.
2619 // Define the current CFA rule to use the provided offset.
2620 while (MBBI != MBB.end()) {
2622 unsigned Opc = PI->getOpcode();
2623 ++MBBI;
2624 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2625 Opc == X86::POP2 || Opc == X86::POP2P) {
2626 Offset += SlotSize;
2627 // Compared to pop, pop2 introduces more stack offset (one more
2628 // register).
2629 if (Opc == X86::POP2 || Opc == X86::POP2P)
2630 Offset += SlotSize;
2631 BuildCFI(MBB, MBBI, DL,
2634 }
2635 }
2636 }
2637
2638 // Emit DWARF info specifying the restores of the callee-saved registers.
2639 // For epilogue with return inside or being other block without successor,
2640 // no need to generate .cfi_restore for callee-saved registers.
2641 if (NeedsDwarfCFI && !MBB.succ_empty())
2642 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2643
2644 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2645 // Add the return addr area delta back since we are not tail calling.
2646 int64_t Delta = X86FI->getTCReturnAddrDelta();
2647 assert(Delta <= 0 && "TCDelta should never be positive");
2648 if (Delta) {
2649 // Check for possible merge with preceding ADD instruction.
2650 int64_t Offset = mergeSPAdd(MBB, Terminator, -Delta, true);
2651 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2652 }
2653 }
2654
2655 // Emit tilerelease for AMX kernel.
2657 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2658
2659 if (NeedsWin64CFI && MF.hasWinCFI())
2660 BuildMI(MBB, Terminator, DL, TII.get(X86::SEH_EndEpilogue));
2661}
2662
2664 int FI,
2665 Register &FrameReg) const {
2666 const MachineFrameInfo &MFI = MF.getFrameInfo();
2667
2668 bool IsFixed = MFI.isFixedObjectIndex(FI);
2669 // We can't calculate offset from frame pointer if the stack is realigned,
2670 // so enforce usage of stack/base pointer. The base pointer is used when we
2671 // have dynamic allocas in addition to dynamic realignment.
2672 if (TRI->hasBasePointer(MF))
2673 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2674 else if (TRI->hasStackRealignment(MF))
2675 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2676 else
2677 FrameReg = TRI->getFrameRegister(MF);
2678
2679 // Offset will hold the offset from the stack pointer at function entry to the
2680 // object.
2681 // We need to factor in additional offsets applied during the prologue to the
2682 // frame, base, and stack pointer depending on which is used.
2683 int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
2685 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2686 uint64_t StackSize = MFI.getStackSize();
2687 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2688 int64_t FPDelta = 0;
2689
2690 // In an x86 interrupt, remove the offset we added to account for the return
2691 // address from any stack object allocated in the caller's frame. Interrupts
2692 // do not have a standard return address. Fixed objects in the current frame,
2693 // such as SSE register spills, should not get this treatment.
2695 Offset >= 0) {
2697 }
2698
2699 if (IsWin64Prologue) {
2700 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2701
2702 // Calculate required stack adjustment.
2703 uint64_t FrameSize = StackSize - SlotSize;
2704 // If required, include space for extra hidden slot for stashing base
2705 // pointer.
2706 if (X86FI->getRestoreBasePointer())
2707 FrameSize += SlotSize;
2708 uint64_t NumBytes = FrameSize - CSSize;
2709
2710 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2711 if (FI && FI == X86FI->getFAIndex())
2712 return StackOffset::getFixed(-SEHFrameOffset);
2713
2714 // FPDelta is the offset from the "traditional" FP location of the old base
2715 // pointer followed by return address and the location required by the
2716 // restricted Win64 prologue.
2717 // Add FPDelta to all offsets below that go through the frame pointer.
2718 FPDelta = FrameSize - SEHFrameOffset;
2719 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2720 "FPDelta isn't aligned per the Win64 ABI!");
2721 }
2722
2723 if (FrameReg == TRI->getFramePtr()) {
2724 // Skip saved EBP/RBP
2725 Offset += SlotSize;
2726
2727 // Account for restricted Windows prologue.
2728 Offset += FPDelta;
2729
2730 // Skip the RETADDR move area
2731 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2732 if (TailCallReturnAddrDelta < 0)
2733 Offset -= TailCallReturnAddrDelta;
2734
2736 }
2737
2738 // FrameReg is either the stack pointer or a base pointer. But the base is
2739 // located at the end of the statically known StackSize so the distinction
2740 // doesn't really matter.
2741 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2742 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2743 return StackOffset::getFixed(Offset + StackSize);
2744}
2745
2747 Register &FrameReg) const {
2748 const MachineFrameInfo &MFI = MF.getFrameInfo();
2750 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2751 const auto it = WinEHXMMSlotInfo.find(FI);
2752
2753 if (it == WinEHXMMSlotInfo.end())
2754 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2755
2756 FrameReg = TRI->getStackRegister();
2757 return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
2758 it->second;
2759}
2760
2763 Register &FrameReg,
2764 int Adjustment) const {
2765 const MachineFrameInfo &MFI = MF.getFrameInfo();
2766 FrameReg = TRI->getStackRegister();
2767 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2768 getOffsetOfLocalArea() + Adjustment);
2769}
2770
2773 int FI, Register &FrameReg,
2774 bool IgnoreSPUpdates) const {
2775
2776 const MachineFrameInfo &MFI = MF.getFrameInfo();
2777 // Does not include any dynamic realign.
2778 const uint64_t StackSize = MFI.getStackSize();
2779 // LLVM arranges the stack as follows:
2780 // ...
2781 // ARG2
2782 // ARG1
2783 // RETADDR
2784 // PUSH RBP <-- RBP points here
2785 // PUSH CSRs
2786 // ~~~~~~~ <-- possible stack realignment (non-win64)
2787 // ...
2788 // STACK OBJECTS
2789 // ... <-- RSP after prologue points here
2790 // ~~~~~~~ <-- possible stack realignment (win64)
2791 //
2792 // if (hasVarSizedObjects()):
2793 // ... <-- "base pointer" (ESI/RBX) points here
2794 // DYNAMIC ALLOCAS
2795 // ... <-- RSP points here
2796 //
2797 // Case 1: In the simple case of no stack realignment and no dynamic
2798 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2799 // with fixed offsets from RSP.
2800 //
2801 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2802 // stack objects are addressed with RBP and regular stack objects with RSP.
2803 //
2804 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2805 // to address stack arguments for outgoing calls and nothing else. The "base
2806 // pointer" points to local variables, and RBP points to fixed objects.
2807 //
2808 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2809 // answer we give is relative to the SP after the prologue, and not the
2810 // SP in the middle of the function.
2811
2812 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2813 !STI.isTargetWin64())
2814 return getFrameIndexReference(MF, FI, FrameReg);
2815
2816 // If !hasReservedCallFrame the function might have SP adjustement in the
2817 // body. So, even though the offset is statically known, it depends on where
2818 // we are in the function.
2819 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2820 return getFrameIndexReference(MF, FI, FrameReg);
2821
2822 // We don't handle tail calls, and shouldn't be seeing them either.
2824 "we don't handle this case!");
2825
2826 // This is how the math works out:
2827 //
2828 // %rsp grows (i.e. gets lower) left to right. Each box below is
2829 // one word (eight bytes). Obj0 is the stack slot we're trying to
2830 // get to.
2831 //
2832 // ----------------------------------
2833 // | BP | Obj0 | Obj1 | ... | ObjN |
2834 // ----------------------------------
2835 // ^ ^ ^ ^
2836 // A B C E
2837 //
2838 // A is the incoming stack pointer.
2839 // (B - A) is the local area offset (-8 for x86-64) [1]
2840 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2841 //
2842 // |(E - B)| is the StackSize (absolute value, positive). For a
2843 // stack that grown down, this works out to be (B - E). [3]
2844 //
2845 // E is also the value of %rsp after stack has been set up, and we
2846 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2847 // (C - E) == (C - A) - (B - A) + (B - E)
2848 // { Using [1], [2] and [3] above }
2849 // == getObjectOffset - LocalAreaOffset + StackSize
2850
2851 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2852}
2853
2856 std::vector<CalleeSavedInfo> &CSI) const {
2857 MachineFrameInfo &MFI = MF.getFrameInfo();
2859
2860 unsigned CalleeSavedFrameSize = 0;
2861 unsigned XMMCalleeSavedFrameSize = 0;
2862 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2863 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2864
2865 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2866
2867 if (TailCallReturnAddrDelta < 0) {
2868 // create RETURNADDR area
2869 // arg
2870 // arg
2871 // RETADDR
2872 // { ...
2873 // RETADDR area
2874 // ...
2875 // }
2876 // [EBP]
2877 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2878 TailCallReturnAddrDelta - SlotSize, true);
2879 }
2880
2881 // Spill the BasePtr if it's used.
2882 if (this->TRI->hasBasePointer(MF)) {
2883 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2884 if (MF.hasEHFunclets()) {
2886 X86FI->setHasSEHFramePtrSave(true);
2887 X86FI->setSEHFramePtrSaveIndex(FI);
2888 }
2889 }
2890
2891 if (hasFP(MF)) {
2892 // emitPrologue always spills frame register the first thing.
2893 SpillSlotOffset -= SlotSize;
2894 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2895
2896 // The async context lives directly before the frame pointer, and we
2897 // allocate a second slot to preserve stack alignment.
2898 if (X86FI->hasSwiftAsyncContext()) {
2899 SpillSlotOffset -= SlotSize;
2900 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2901 SpillSlotOffset -= SlotSize;
2902 }
2903
2904 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2905 // the frame register, we can delete it from CSI list and not have to worry
2906 // about avoiding it later.
2907 Register FPReg = TRI->getFrameRegister(MF);
2908 for (unsigned i = 0; i < CSI.size(); ++i) {
2909 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2910 CSI.erase(CSI.begin() + i);
2911 break;
2912 }
2913 }
2914 }
2915
2916 // Strategy:
2917 // 1. Use push2 when
2918 // a) number of CSR > 1 if no need padding
2919 // b) number of CSR > 2 if need padding
2920 // c) stack alignment >= 16 bytes
2921 // 2. When the number of CSR push is odd
2922 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2923 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2924 // 3. When the number of CSR push is even, start to use push2 from the 1st
2925 // push and make the stack 16B aligned before the push
2926 unsigned NumRegsForPush2 = 0;
2927 if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
2928 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2929 return X86::GR64RegClass.contains(I.getReg());
2930 });
2931 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2932 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2933 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2934 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2935 if (X86FI->padForPush2Pop2()) {
2936 SpillSlotOffset -= SlotSize;
2937 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2938 }
2939 }
2940
2941 // Assign slots for GPRs. It increases frame size.
2942 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2943 MCRegister Reg = I.getReg();
2944
2945 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2946 continue;
2947
2948 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2949 // or only an odd number of registers in the candidates.
2950 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2951 (SpillSlotOffset % 16 == 0 ||
2952 X86FI->getNumCandidatesForPush2Pop2() % 2))
2953 X86FI->addCandidateForPush2Pop2(Reg);
2954
2955 SpillSlotOffset -= SlotSize;
2956 CalleeSavedFrameSize += SlotSize;
2957
2958 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2959 I.setFrameIdx(SlotIndex);
2960 }
2961
2962 // Adjust the offset of spill slot as we know the accurate callee saved frame
2963 // size.
2964 if (X86FI->getRestoreBasePointer()) {
2965 SpillSlotOffset -= SlotSize;
2966 CalleeSavedFrameSize += SlotSize;
2967
2968 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2969 // TODO: saving the slot index is better?
2970 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2971 }
2972 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2973 "Expect even candidates for push2/pop2");
2974 if (X86FI->getNumCandidatesForPush2Pop2())
2975 ++NumFunctionUsingPush2Pop2;
2976 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2977 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2978
2979 // Assign slots for XMMs.
2980 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2981 MCRegister Reg = I.getReg();
2982 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2983 continue;
2984
2985 // If this is k-register make sure we lookup via the largest legal type.
2986 MVT VT = MVT::Other;
2987 if (X86::VK16RegClass.contains(Reg))
2988 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2989
2990 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2991 unsigned Size = TRI->getSpillSize(*RC);
2992 Align Alignment = TRI->getSpillAlign(*RC);
2993 // ensure alignment
2994 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2995 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2996
2997 // spill into slot
2998 SpillSlotOffset -= Size;
2999 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
3000 I.setFrameIdx(SlotIndex);
3001 MFI.ensureMaxAlignment(Alignment);
3002
3003 // Save the start offset and size of XMM in stack frame for funclets.
3004 if (X86::VR128RegClass.contains(Reg)) {
3005 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
3006 XMMCalleeSavedFrameSize += Size;
3007 }
3008 }
3009
3010 return true;
3011}
3012
3016 DebugLoc DL = MBB.findDebugLoc(MI);
3017
3018 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
3019 // for us, and there are no XMM CSRs on Win32.
3020 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
3021 return true;
3022
3023 // Push GPRs. It increases frame size.
3024 const MachineFunction &MF = *MBB.getParent();
3026 if (X86FI->padForPush2Pop2()) {
3027 assert(SlotSize == 8 && "Unexpected slot size for padding!");
3028 BuildMI(MBB, MI, DL, TII.get(X86::PUSH64r))
3029 .addReg(X86::RAX, RegState::Undef)
3031 }
3032
3033 // Update LiveIn of the basic block and decide whether we can add a kill flag
3034 // to the use.
3035 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3036 const MachineRegisterInfo &MRI = MF.getRegInfo();
3037 // Do not set a kill flag on values that are also marked as live-in. This
3038 // happens with the @llvm-returnaddress intrinsic and with arguments
3039 // passed in callee saved registers.
3040 // Omitting the kill flags is conservatively correct even if the live-in
3041 // is not used after all.
3042 if (MRI.isLiveIn(Reg))
3043 return false;
3044 MBB.addLiveIn(Reg);
3045 // Check if any subregister is live-in
3046 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3047 if (MRI.isLiveIn(*AReg))
3048 return false;
3049 return true;
3050 };
3051 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3052 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3053 };
3054
3055 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3056 MCRegister Reg = RI->getReg();
3057 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3058 continue;
3059
3060 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3061 MCRegister Reg2 = (++RI)->getReg();
3063 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3064 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3066 } else {
3067 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3068 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3070 }
3071 }
3072
3073 if (X86FI->getRestoreBasePointer()) {
3074 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3075 Register BaseReg = this->TRI->getBaseRegister();
3076 BuildMI(MBB, MI, DL, TII.get(Opc))
3077 .addReg(BaseReg, getKillRegState(true))
3079 }
3080
3081 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3082 // It can be done by spilling XMMs to stack frame.
3083 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3084 MCRegister Reg = I.getReg();
3085 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3086 continue;
3087
3088 // If this is k-register make sure we lookup via the largest legal type.
3089 MVT VT = MVT::Other;
3090 if (X86::VK16RegClass.contains(Reg))
3091 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3092
3093 // Add the callee-saved register as live-in. It's killed at the spill.
3094 MBB.addLiveIn(Reg);
3095 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3096
3097 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3099 }
3100
3101 return true;
3102}
3103
3104void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3106 MachineInstr *CatchRet) const {
3107 // SEH shouldn't use catchret.
3109 MBB.getParent()->getFunction().getPersonalityFn())) &&
3110 "SEH should not use CATCHRET");
3111 const DebugLoc &DL = CatchRet->getDebugLoc();
3112 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3113
3114 // Fill EAX/RAX with the address of the target block.
3115 if (STI.is64Bit()) {
3116 // LEA64r CatchRetTarget(%rip), %rax
3117 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3118 .addReg(X86::RIP)
3119 .addImm(0)
3120 .addReg(0)
3121 .addMBB(CatchRetTarget)
3122 .addReg(0);
3123 } else {
3124 // MOV32ri $CatchRetTarget, %eax
3125 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3126 .addMBB(CatchRetTarget);
3127 }
3128
3129 // Record that we've taken the address of CatchRetTarget and no longer just
3130 // reference it in a terminator.
3131 CatchRetTarget->setMachineBlockAddressTaken();
3132}
3133
3137 if (CSI.empty())
3138 return false;
3139
3140 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3141 // Don't restore CSRs in 32-bit EH funclets. Matches
3142 // spillCalleeSavedRegisters.
3143 if (STI.is32Bit())
3144 return true;
3145 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3146 // funclets. emitEpilogue transforms these to normal jumps.
3147 if (MI->getOpcode() == X86::CATCHRET) {
3148 const Function &F = MBB.getParent()->getFunction();
3149 bool IsSEH = isAsynchronousEHPersonality(
3150 classifyEHPersonality(F.getPersonalityFn()));
3151 if (IsSEH)
3152 return true;
3153 }
3154 }
3155
3156 DebugLoc DL = MBB.findDebugLoc(MI);
3157
3158 // Reload XMMs from stack frame.
3159 for (const CalleeSavedInfo &I : CSI) {
3160 MCRegister Reg = I.getReg();
3161 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3162 continue;
3163
3164 // If this is k-register make sure we lookup via the largest legal type.
3165 MVT VT = MVT::Other;
3166 if (X86::VK16RegClass.contains(Reg))
3167 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3168
3169 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3170 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3171 Register());
3172 }
3173
3174 // Clear the stack slot for spill base pointer register.
3175 MachineFunction &MF = *MBB.getParent();
3177 if (X86FI->getRestoreBasePointer()) {
3178 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3179 Register BaseReg = this->TRI->getBaseRegister();
3180 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3182 }
3183
3184 // POP GPRs.
3185 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3186 MCRegister Reg = I->getReg();
3187 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3188 continue;
3189
3190 if (X86FI->isCandidateForPush2Pop2(Reg))
3191 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3194 else
3195 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3197 }
3198 if (X86FI->padForPush2Pop2())
3199 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3200
3201 return true;
3202}
3203
3205 BitVector &SavedRegs,
3206 RegScavenger *RS) const {
3208
3209 // Spill the BasePtr if it's used.
3210 if (TRI->hasBasePointer(MF)) {
3211 Register BasePtr = TRI->getBaseRegister();
3212 if (STI.isTarget64BitILP32())
3213 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3214 SavedRegs.set(BasePtr);
3215 }
3216}
3217
3218static bool HasNestArgument(const MachineFunction *MF) {
3219 const Function &F = MF->getFunction();
3220 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3221 I++) {
3222 if (I->hasNestAttr() && !I->use_empty())
3223 return true;
3224 }
3225 return false;
3226}
3227
3228/// GetScratchRegister - Get a temp register for performing work in the
3229/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3230/// and the properties of the function either one or two registers will be
3231/// needed. Set primary to true for the first register, false for the second.
3232static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3233 const MachineFunction &MF, bool Primary) {
3234 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3235
3236 // Erlang stuff.
3237 if (CallingConvention == CallingConv::HiPE) {
3238 if (Is64Bit)
3239 return Primary ? X86::R14 : X86::R13;
3240 else
3241 return Primary ? X86::EBX : X86::EDI;
3242 }
3243
3244 if (Is64Bit) {
3245 if (IsLP64)
3246 return Primary ? X86::R11 : X86::R12;
3247 else
3248 return Primary ? X86::R11D : X86::R12D;
3249 }
3250
3251 bool IsNested = HasNestArgument(&MF);
3252
3253 if (CallingConvention == CallingConv::X86_FastCall ||
3254 CallingConvention == CallingConv::Fast ||
3255 CallingConvention == CallingConv::Tail) {
3256 if (IsNested)
3257 report_fatal_error("Segmented stacks does not support fastcall with "
3258 "nested function.");
3259 return Primary ? X86::EAX : X86::ECX;
3260 }
3261 if (IsNested)
3262 return Primary ? X86::EDX : X86::EAX;
3263 return Primary ? X86::ECX : X86::EAX;
3264}
3265
3266// The stack limit in the TCB is set to this many bytes above the actual stack
3267// limit.
3269
3271 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3272 MachineFrameInfo &MFI = MF.getFrameInfo();
3273 uint64_t StackSize;
3274 unsigned TlsReg, TlsOffset;
3275 DebugLoc DL;
3276
3277 // To support shrink-wrapping we would need to insert the new blocks
3278 // at the right place and update the branches to PrologueMBB.
3279 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3280
3281 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3282 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3283 "Scratch register is live-in");
3284
3285 if (MF.getFunction().isVarArg())
3286 report_fatal_error("Segmented stacks do not support vararg functions.");
3287 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3288 !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
3289 !STI.isTargetDragonFly())
3290 report_fatal_error("Segmented stacks not supported on this platform.");
3291
3292 // Eventually StackSize will be calculated by a link-time pass; which will
3293 // also decide whether checking code needs to be injected into this particular
3294 // prologue.
3295 StackSize = MFI.getStackSize();
3296
3297 if (!MFI.needsSplitStackProlog())
3298 return;
3299
3303 bool IsNested = false;
3304
3305 // We need to know if the function has a nest argument only in 64 bit mode.
3306 if (Is64Bit)
3307 IsNested = HasNestArgument(&MF);
3308
3309 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3310 // allocMBB needs to be last (terminating) instruction.
3311
3312 for (const auto &LI : PrologueMBB.liveins()) {
3313 allocMBB->addLiveIn(LI);
3314 checkMBB->addLiveIn(LI);
3315 }
3316
3317 if (IsNested)
3318 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3319
3320 MF.push_front(allocMBB);
3321 MF.push_front(checkMBB);
3322
3323 // When the frame size is less than 256 we just compare the stack
3324 // boundary directly to the value of the stack pointer, per gcc.
3325 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3326
3327 // Read the limit off the current stacklet off the stack_guard location.
3328 if (Is64Bit) {
3329 if (STI.isTargetLinux()) {
3330 TlsReg = X86::FS;
3331 TlsOffset = IsLP64 ? 0x70 : 0x40;
3332 } else if (STI.isTargetDarwin()) {
3333 TlsReg = X86::GS;
3334 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3335 } else if (STI.isTargetWin64()) {
3336 TlsReg = X86::GS;
3337 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3338 } else if (STI.isTargetFreeBSD()) {
3339 TlsReg = X86::FS;
3340 TlsOffset = 0x18;
3341 } else if (STI.isTargetDragonFly()) {
3342 TlsReg = X86::FS;
3343 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3344 } else {
3345 report_fatal_error("Segmented stacks not supported on this platform.");
3346 }
3347
3348 if (CompareStackPointer)
3349 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3350 else
3351 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3352 ScratchReg)
3353 .addReg(X86::RSP)
3354 .addImm(1)
3355 .addReg(0)
3356 .addImm(-StackSize)
3357 .addReg(0);
3358
3359 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3360 .addReg(ScratchReg)
3361 .addReg(0)
3362 .addImm(1)
3363 .addReg(0)
3364 .addImm(TlsOffset)
3365 .addReg(TlsReg);
3366 } else {
3367 if (STI.isTargetLinux()) {
3368 TlsReg = X86::GS;
3369 TlsOffset = 0x30;
3370 } else if (STI.isTargetDarwin()) {
3371 TlsReg = X86::GS;
3372 TlsOffset = 0x48 + 90 * 4;
3373 } else if (STI.isTargetWin32()) {
3374 TlsReg = X86::FS;
3375 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3376 } else if (STI.isTargetDragonFly()) {
3377 TlsReg = X86::FS;
3378 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3379 } else if (STI.isTargetFreeBSD()) {
3380 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3381 } else {
3382 report_fatal_error("Segmented stacks not supported on this platform.");
3383 }
3384
3385 if (CompareStackPointer)
3386 ScratchReg = X86::ESP;
3387 else
3388 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3389 .addReg(X86::ESP)
3390 .addImm(1)
3391 .addReg(0)
3392 .addImm(-StackSize)
3393 .addReg(0);
3394
3395 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
3396 STI.isTargetDragonFly()) {
3397 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3398 .addReg(ScratchReg)
3399 .addReg(0)
3400 .addImm(0)
3401 .addReg(0)
3402 .addImm(TlsOffset)
3403 .addReg(TlsReg);
3404 } else if (STI.isTargetDarwin()) {
3405
3406 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3407 unsigned ScratchReg2;
3408 bool SaveScratch2;
3409 if (CompareStackPointer) {
3410 // The primary scratch register is available for holding the TLS offset.
3411 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3412 SaveScratch2 = false;
3413 } else {
3414 // Need to use a second register to hold the TLS offset
3415 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3416
3417 // Unfortunately, with fastcc the second scratch register may hold an
3418 // argument.
3419 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3420 }
3421
3422 // If Scratch2 is live-in then it needs to be saved.
3423 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3424 "Scratch register is live-in and not saved");
3425
3426 if (SaveScratch2)
3427 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3428 .addReg(ScratchReg2, RegState::Kill);
3429
3430 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3431 .addImm(TlsOffset);
3432 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3433 .addReg(ScratchReg)
3434 .addReg(ScratchReg2)
3435 .addImm(1)
3436 .addReg(0)
3437 .addImm(0)
3438 .addReg(TlsReg);
3439
3440 if (SaveScratch2)
3441 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3442 }
3443 }
3444
3445 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3446 // It jumps to normal execution of the function body.
3447 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3448 .addMBB(&PrologueMBB)
3450
3451 // On 32 bit we first push the arguments size and then the frame size. On 64
3452 // bit, we pass the stack frame size in r10 and the argument size in r11.
3453 if (Is64Bit) {
3454 // Functions with nested arguments use R10, so it needs to be saved across
3455 // the call to _morestack
3456
3457 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3458 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3459 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3460 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3461
3462 if (IsNested)
3463 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3464
3465 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3466 .addImm(StackSize);
3467 BuildMI(allocMBB, DL,
3469 Reg11)
3470 .addImm(X86FI->getArgumentStackSize());
3471 } else {
3472 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3473 .addImm(X86FI->getArgumentStackSize());
3474 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3475 }
3476
3477 // __morestack is in libgcc
3479 // Under the large code model, we cannot assume that __morestack lives
3480 // within 2^31 bytes of the call site, so we cannot use pc-relative
3481 // addressing. We cannot perform the call via a temporary register,
3482 // as the rax register may be used to store the static chain, and all
3483 // other suitable registers may be either callee-save or used for
3484 // parameter passing. We cannot use the stack at this point either
3485 // because __morestack manipulates the stack directly.
3486 //
3487 // To avoid these issues, perform an indirect call via a read-only memory
3488 // location containing the address.
3489 //
3490 // This solution is not perfect, as it assumes that the .rodata section
3491 // is laid out within 2^31 bytes of each function body, but this seems
3492 // to be sufficient for JIT.
3493 // FIXME: Add retpoline support and remove the error here..
3494 if (STI.useIndirectThunkCalls())
3495 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3496 "code model and thunks not yet implemented.");
3497 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3498 .addReg(X86::RIP)
3499 .addImm(0)
3500 .addReg(0)
3501 .addExternalSymbol("__morestack_addr")
3502 .addReg(0);
3503 } else {
3504 if (Is64Bit)
3505 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3506 .addExternalSymbol("__morestack");
3507 else
3508 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3509 .addExternalSymbol("__morestack");
3510 }
3511
3512 if (IsNested)
3513 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3514 else
3515 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3516
3517 allocMBB->addSuccessor(&PrologueMBB);
3518
3519 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3520 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3521
3522#ifdef EXPENSIVE_CHECKS
3523 MF.verify();
3524#endif
3525}
3526
3527/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3528/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3529/// to fields it needs, through a named metadata node "hipe.literals" containing
3530/// name-value pairs.
3531static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3532 const StringRef LiteralName) {
3533 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3534 MDNode *Node = HiPELiteralsMD->getOperand(i);
3535 if (Node->getNumOperands() != 2)
3536 continue;
3537 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3538 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3539 if (!NodeName || !NodeVal)
3540 continue;
3541 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3542 if (ValConst && NodeName->getString() == LiteralName) {
3543 return ValConst->getZExtValue();
3544 }
3545 }
3546
3547 report_fatal_error("HiPE literal " + LiteralName +
3548 " required but not provided");
3549}
3550
3551// Return true if there are no non-ehpad successors to MBB and there are no
3552// non-meta instructions between MBBI and MBB.end().
3555 return llvm::all_of(
3556 MBB.successors(),
3557 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3558 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3559 return MI.isMetaInstruction();
3560 });
3561}
3562
3563/// Erlang programs may need a special prologue to handle the stack size they
3564/// might need at runtime. That is because Erlang/OTP does not implement a C
3565/// stack but uses a custom implementation of hybrid stack/heap architecture.
3566/// (for more information see Eric Stenman's Ph.D. thesis:
3567/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3568///
3569/// CheckStack:
3570/// temp0 = sp - MaxStack
3571/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3572/// OldStart:
3573/// ...
3574/// IncStack:
3575/// call inc_stack # doubles the stack space
3576/// temp0 = sp - MaxStack
3577/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3579 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3580 MachineFrameInfo &MFI = MF.getFrameInfo();
3581 DebugLoc DL;
3582
3583 // To support shrink-wrapping we would need to insert the new blocks
3584 // at the right place and update the branches to PrologueMBB.
3585 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3586
3587 // HiPE-specific values
3588 NamedMDNode *HiPELiteralsMD =
3589 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3590 if (!HiPELiteralsMD)
3592 "Can't generate HiPE prologue without runtime parameters");
3593 const unsigned HipeLeafWords = getHiPELiteral(
3594 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3595 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3596 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3597 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3598 ? MF.getFunction().arg_size() - CCRegisteredArgs
3599 : 0;
3600 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3601
3602 assert(STI.isTargetLinux() &&
3603 "HiPE prologue is only supported on Linux operating systems.");
3604
3605 // Compute the largest caller's frame that is needed to fit the callees'
3606 // frames. This 'MaxStack' is computed from:
3607 //
3608 // a) the fixed frame size, which is the space needed for all spilled temps,
3609 // b) outgoing on-stack parameter areas, and
3610 // c) the minimum stack space this function needs to make available for the
3611 // functions it calls (a tunable ABI property).
3612 if (MFI.hasCalls()) {
3613 unsigned MoreStackForCalls = 0;
3614
3615 for (auto &MBB : MF) {
3616 for (auto &MI : MBB) {
3617 if (!MI.isCall())
3618 continue;
3619
3620 // Get callee operand.
3621 const MachineOperand &MO = MI.getOperand(0);
3622
3623 // Only take account of global function calls (no closures etc.).
3624 if (!MO.isGlobal())
3625 continue;
3626
3627 const Function *F = dyn_cast<Function>(MO.getGlobal());
3628 if (!F)
3629 continue;
3630
3631 // Do not update 'MaxStack' for primitive and built-in functions
3632 // (encoded with names either starting with "erlang."/"bif_" or not
3633 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3634 // "_", such as the BIF "suspend_0") as they are executed on another
3635 // stack.
3636 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3637 F->getName().find_first_of("._") == StringRef::npos)
3638 continue;
3639
3640 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3641 ? F->arg_size() - CCRegisteredArgs
3642 : 0;
3643 if (HipeLeafWords - 1 > CalleeStkArity)
3644 MoreStackForCalls =
3645 std::max(MoreStackForCalls,
3646 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3647 }
3648 }
3649 MaxStack += MoreStackForCalls;
3650 }
3651
3652 // If the stack frame needed is larger than the guaranteed then runtime checks
3653 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3654 if (MaxStack > Guaranteed) {
3655 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3656 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3657
3658 for (const auto &LI : PrologueMBB.liveins()) {
3659 stackCheckMBB->addLiveIn(LI);
3660 incStackMBB->addLiveIn(LI);
3661 }
3662
3663 MF.push_front(incStackMBB);
3664 MF.push_front(stackCheckMBB);
3665
3666 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3667 unsigned LEAop, CMPop, CALLop;
3668 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3669 if (Is64Bit) {
3670 SPReg = X86::RSP;
3671 PReg = X86::RBP;
3672 LEAop = X86::LEA64r;
3673 CMPop = X86::CMP64rm;
3674 CALLop = X86::CALL64pcrel32;
3675 } else {
3676 SPReg = X86::ESP;
3677 PReg = X86::EBP;
3678 LEAop = X86::LEA32r;
3679 CMPop = X86::CMP32rm;
3680 CALLop = X86::CALLpcrel32;
3681 }
3682
3683 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3684 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3685 "HiPE prologue scratch register is live-in");
3686
3687 // Create new MBB for StackCheck:
3688 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3689 false, -MaxStack);
3690 // SPLimitOffset is in a fixed heap location (pointed by BP).
3691 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3692 PReg, false, SPLimitOffset);
3693 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3694 .addMBB(&PrologueMBB)
3696
3697 // Create new MBB for IncStack:
3698 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3699 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3700 false, -MaxStack);
3701 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3702 PReg, false, SPLimitOffset);
3703 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3704 .addMBB(incStackMBB)
3706
3707 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3708 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3709 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3710 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3711 }
3712#ifdef EXPENSIVE_CHECKS
3713 MF.verify();
3714#endif
3715}
3716
3717bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3719 const DebugLoc &DL,
3720 int Offset) const {
3721 if (Offset <= 0)
3722 return false;
3723
3724 if (Offset % SlotSize)
3725 return false;
3726
3727 int NumPops = Offset / SlotSize;
3728 // This is only worth it if we have at most 2 pops.
3729 if (NumPops != 1 && NumPops != 2)
3730 return false;
3731
3732 // Handle only the trivial case where the adjustment directly follows
3733 // a call. This is the most common one, anyway.
3734 if (MBBI == MBB.begin())
3735 return false;
3736 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3737 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3738 return false;
3739
3740 unsigned Regs[2];
3741 unsigned FoundRegs = 0;
3742
3743 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3744 const MachineOperand &RegMask = Prev->getOperand(1);
3745
3746 auto &RegClass =
3747 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3748 // Try to find up to NumPops free registers.
3749 for (auto Candidate : RegClass) {
3750 // Poor man's liveness:
3751 // Since we're immediately after a call, any register that is clobbered
3752 // by the call and not defined by it can be considered dead.
3753 if (!RegMask.clobbersPhysReg(Candidate))
3754 continue;
3755
3756 // Don't clobber reserved registers
3757 if (MRI.isReserved(Candidate))
3758 continue;
3759
3760 bool IsDef = false;
3761 for (const MachineOperand &MO : Prev->implicit_operands()) {
3762 if (MO.isReg() && MO.isDef() &&
3763 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3764 IsDef = true;
3765 break;
3766 }
3767 }
3768
3769 if (IsDef)
3770 continue;
3771
3772 Regs[FoundRegs++] = Candidate;
3773 if (FoundRegs == (unsigned)NumPops)
3774 break;
3775 }
3776
3777 if (FoundRegs == 0)
3778 return false;
3779
3780 // If we found only one free register, but need two, reuse the same one twice.
3781 while (FoundRegs < (unsigned)NumPops)
3782 Regs[FoundRegs++] = Regs[0];
3783
3784 for (int i = 0; i < NumPops; ++i)
3785 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3786 Regs[i]);
3787
3788 return true;
3789}
3790
3794 bool reserveCallFrame = hasReservedCallFrame(MF);
3795 unsigned Opcode = I->getOpcode();
3796 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3797 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3798 uint64_t Amount = TII.getFrameSize(*I);
3799 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3800 I = MBB.erase(I);
3801 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3802
3803 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3804 // typically because the function is marked noreturn (abort, throw,
3805 // assert_fail, etc).
3806 if (isDestroy && blockEndIsUnreachable(MBB, I))
3807 return I;
3808
3809 if (!reserveCallFrame) {
3810 // If the stack pointer can be changed after prologue, turn the
3811 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3812 // adjcallstackdown instruction into 'add ESP, <amt>'
3813
3814 // We need to keep the stack aligned properly. To do this, we round the
3815 // amount of space needed for the outgoing arguments up to the next
3816 // alignment boundary.
3817 Amount = alignTo(Amount, getStackAlign());
3818
3819 const Function &F = MF.getFunction();
3820 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3821 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3822
3823 // If we have any exception handlers in this function, and we adjust
3824 // the SP before calls, we may need to indicate this to the unwinder
3825 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3826 // Amount == 0, because the preceding function may have set a non-0
3827 // GNU_ARGS_SIZE.
3828 // TODO: We don't need to reset this between subsequent functions,
3829 // if it didn't change.
3830 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3831
3832 if (HasDwarfEHHandlers && !isDestroy &&
3834 BuildCFI(MBB, InsertPos, DL,
3835 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3836
3837 if (Amount == 0)
3838 return I;
3839
3840 // Factor out the amount that gets handled inside the sequence
3841 // (Pushes of argument for frame setup, callee pops for frame destroy)
3842 Amount -= InternalAmt;
3843
3844 // TODO: This is needed only if we require precise CFA.
3845 // If this is a callee-pop calling convention, emit a CFA adjust for
3846 // the amount the callee popped.
3847 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3848 BuildCFI(MBB, InsertPos, DL,
3849 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3850
3851 // Add Amount to SP to destroy a frame, or subtract to setup.
3852 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3853 int64_t CfaAdjustment = StackAdjustment;
3854
3855 if (StackAdjustment) {
3856 // Merge with any previous or following adjustment instruction. Note: the
3857 // instructions merged with here do not have CFI, so their stack
3858 // adjustments do not feed into CfaAdjustment
3859
3860 auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI,
3861 int64_t Offset) {
3862 CfaAdjustment += Offset;
3863 };
3864 auto CalcNewOffset = [&StackAdjustment](int64_t Offset) {
3865 return StackAdjustment + Offset;
3866 };
3867 StackAdjustment =
3868 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, true);
3869 StackAdjustment =
3870 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, false);
3871
3872 if (StackAdjustment) {
3873 if (!(F.hasMinSize() &&
3874 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3875 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3876 /*InEpilogue=*/false);
3877 }
3878 }
3879
3880 if (DwarfCFI && !hasFP(MF) && CfaAdjustment) {
3881 // If we don't have FP, but need to generate unwind information,
3882 // we need to set the correct CFA offset after the stack adjustment.
3883 // How much we adjust the CFA offset depends on whether we're emitting
3884 // CFI only for EH purposes or for debugging. EH only requires the CFA
3885 // offset to be correct at each call site, while for debugging we want
3886 // it to be more precise.
3887
3888 // TODO: When not using precise CFA, we also need to adjust for the
3889 // InternalAmt here.
3890 BuildCFI(
3891 MBB, InsertPos, DL,
3892 MCCFIInstruction::createAdjustCfaOffset(nullptr, -CfaAdjustment));
3893 }
3894
3895 return I;
3896 }
3897
3898 if (InternalAmt) {
3901 while (CI != B && !std::prev(CI)->isCall())
3902 --CI;
3903 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3904 }
3905
3906 return I;
3907}
3908
3910 assert(MBB.getParent() && "Block is not attached to a function!");
3911 const MachineFunction &MF = *MBB.getParent();
3912 if (!MBB.isLiveIn(X86::EFLAGS))
3913 return true;
3914
3915 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3916 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3918 const X86TargetLowering &TLI = *STI.getTargetLowering();
3919 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3920 return false;
3921
3923 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3924}
3925
3927 assert(MBB.getParent() && "Block is not attached to a function!");
3928
3929 // Win64 has strict requirements in terms of epilogue and we are
3930 // not taking a chance at messing with them.
3931 // I.e., unless this block is already an exit block, we can't use
3932 // it as an epilogue.
3933 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3934 return false;
3935
3936 // Swift async context epilogue has a BTR instruction that clobbers parts of
3937 // EFLAGS.
3938 const MachineFunction &MF = *MBB.getParent();
3941
3942 if (canUseLEAForSPInEpilogue(*MBB.getParent()))
3943 return true;
3944
3945 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3946 // clobbers the EFLAGS. Check that we do not need to preserve it,
3947 // otherwise, conservatively assume this is not
3948 // safe to insert the epilogue here.
3950}
3951
3953 // If we may need to emit frameless compact unwind information, give
3954 // up as this is currently broken: PR25614.
3955 bool CompactUnwind =
3957 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3958 !CompactUnwind) &&
3959 // The lowering of segmented stack and HiPE only support entry
3960 // blocks as prologue blocks: PR26107. This limitation may be
3961 // lifted if we fix:
3962 // - adjustForSegmentedStacks
3963 // - adjustForHiPEPrologue
3965 !MF.shouldSplitStack();
3966}
3967
3970 const DebugLoc &DL, bool RestoreSP) const {
3971 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3972 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3973 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3974 "restoring EBP/ESI on non-32-bit target");
3975
3976 MachineFunction &MF = *MBB.getParent();
3977 Register FramePtr = TRI->getFrameRegister(MF);
3978 Register BasePtr = TRI->getBaseRegister();
3979 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3981 MachineFrameInfo &MFI = MF.getFrameInfo();
3982
3983 // FIXME: Don't set FrameSetup flag in catchret case.
3984
3985 int FI = FuncInfo.EHRegNodeFrameIndex;
3986 int EHRegSize = MFI.getObjectSize(FI);
3987
3988 if (RestoreSP) {
3989 // MOV32rm -EHRegSize(%ebp), %esp
3990 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3991 X86::EBP, true, -EHRegSize)
3993 }
3994
3995 Register UsedReg;
3996 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3997 int EndOffset = -EHRegOffset - EHRegSize;
3998 FuncInfo.EHRegNodeEndOffset = EndOffset;
3999
4000 if (UsedReg == FramePtr) {
4001 // ADD $offset, %ebp
4002 unsigned ADDri = getADDriOpcode(false);
4003 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
4005 .addImm(EndOffset)
4007 ->getOperand(3)
4008 .setIsDead();
4009 assert(EndOffset >= 0 &&
4010 "end of registration object above normal EBP position!");
4011 } else if (UsedReg == BasePtr) {
4012 // LEA offset(%ebp), %esi
4013 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
4014 FramePtr, false, EndOffset)
4016 // MOV32rm SavedEBPOffset(%esi), %ebp
4017 assert(X86FI->getHasSEHFramePtrSave());
4018 int Offset =
4019 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
4020 .getFixed();
4021 assert(UsedReg == BasePtr);
4022 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
4023 UsedReg, true, Offset)
4025 } else {
4026 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
4027 }
4028 return MBBI;
4029}
4030
4032 return TRI->getSlotSize();
4033}
4034
4039
4043 Register FrameRegister = RI->getFrameRegister(MF);
4044 if (getInitialCFARegister(MF) == FrameRegister &&
4046 DwarfFrameBase FrameBase;
4047 FrameBase.Kind = DwarfFrameBase::CFA;
4048 FrameBase.Location.Offset =
4050 return FrameBase;
4051 }
4052
4053 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4054}
4055
4056namespace {
4057// Struct used by orderFrameObjects to help sort the stack objects.
4058struct X86FrameSortingObject {
4059 bool IsValid = false; // true if we care about this Object.
4060 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4061 unsigned ObjectSize = 0; // Size of Object in bytes.
4062 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4063 unsigned ObjectNumUses = 0; // Object static number of uses.
4064};
4065
4066// The comparison function we use for std::sort to order our local
4067// stack symbols. The current algorithm is to use an estimated
4068// "density". This takes into consideration the size and number of
4069// uses each object has in order to roughly minimize code size.
4070// So, for example, an object of size 16B that is referenced 5 times
4071// will get higher priority than 4 4B objects referenced 1 time each.
4072// It's not perfect and we may be able to squeeze a few more bytes out of
4073// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4074// fringe end can have special consideration, given their size is less
4075// important, etc.), but the algorithmic complexity grows too much to be
4076// worth the extra gains we get. This gets us pretty close.
4077// The final order leaves us with objects with highest priority going
4078// at the end of our list.
4079struct X86FrameSortingComparator {
4080 inline bool operator()(const X86FrameSortingObject &A,
4081 const X86FrameSortingObject &B) const {
4082 uint64_t DensityAScaled, DensityBScaled;
4083
4084 // For consistency in our comparison, all invalid objects are placed
4085 // at the end. This also allows us to stop walking when we hit the
4086 // first invalid item after it's all sorted.
4087 if (!A.IsValid)
4088 return false;
4089 if (!B.IsValid)
4090 return true;
4091
4092 // The density is calculated by doing :
4093 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4094 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4095 // Since this approach may cause inconsistencies in
4096 // the floating point <, >, == comparisons, depending on the floating
4097 // point model with which the compiler was built, we're going
4098 // to scale both sides by multiplying with
4099 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4100 // the division and, with it, the need for any floating point
4101 // arithmetic.
4102 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4103 static_cast<uint64_t>(B.ObjectSize);
4104 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4105 static_cast<uint64_t>(A.ObjectSize);
4106
4107 // If the two densities are equal, prioritize highest alignment
4108 // objects. This allows for similar alignment objects
4109 // to be packed together (given the same density).
4110 // There's room for improvement here, also, since we can pack
4111 // similar alignment (different density) objects next to each
4112 // other to save padding. This will also require further
4113 // complexity/iterations, and the overall gain isn't worth it,
4114 // in general. Something to keep in mind, though.
4115 if (DensityAScaled == DensityBScaled)
4116 return A.ObjectAlignment < B.ObjectAlignment;
4117
4118 return DensityAScaled < DensityBScaled;
4119 }
4120};
4121} // namespace
4122
4123// Order the symbols in the local stack.
4124// We want to place the local stack objects in some sort of sensible order.
4125// The heuristic we use is to try and pack them according to static number
4126// of uses and size of object in order to minimize code size.
4128 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4129 const MachineFrameInfo &MFI = MF.getFrameInfo();
4130
4131 // Don't waste time if there's nothing to do.
4132 if (ObjectsToAllocate.empty())
4133 return;
4134
4135 // Create an array of all MFI objects. We won't need all of these
4136 // objects, but we're going to create a full array of them to make
4137 // it easier to index into when we're counting "uses" down below.
4138 // We want to be able to easily/cheaply access an object by simply
4139 // indexing into it, instead of having to search for it every time.
4140 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4141
4142 // Walk the objects we care about and mark them as such in our working
4143 // struct.
4144 for (auto &Obj : ObjectsToAllocate) {
4145 SortingObjects[Obj].IsValid = true;
4146 SortingObjects[Obj].ObjectIndex = Obj;
4147 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4148 // Set the size.
4149 int ObjectSize = MFI.getObjectSize(Obj);
4150 if (ObjectSize == 0)
4151 // Variable size. Just use 4.
4152 SortingObjects[Obj].ObjectSize = 4;
4153 else
4154 SortingObjects[Obj].ObjectSize = ObjectSize;
4155 }
4156
4157 // Count the number of uses for each object.
4158 for (auto &MBB : MF) {
4159 for (auto &MI : MBB) {
4160 if (MI.isDebugInstr())
4161 continue;
4162 for (const MachineOperand &MO : MI.operands()) {
4163 // Check to see if it's a local stack symbol.
4164 if (!MO.isFI())
4165 continue;
4166 int Index = MO.getIndex();
4167 // Check to see if it falls within our range, and is tagged
4168 // to require ordering.
4169 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4170 SortingObjects[Index].IsValid)
4171 SortingObjects[Index].ObjectNumUses++;
4172 }
4173 }
4174 }
4175
4176 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4177 // info).
4178 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4179
4180 // Now modify the original list to represent the final order that
4181 // we want. The order will depend on whether we're going to access them
4182 // from the stack pointer or the frame pointer. For SP, the list should
4183 // end up with the END containing objects that we want with smaller offsets.
4184 // For FP, it should be flipped.
4185 int i = 0;
4186 for (auto &Obj : SortingObjects) {
4187 // All invalid items are sorted at the end, so it's safe to stop.
4188 if (!Obj.IsValid)
4189 break;
4190 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4191 }
4192
4193 // Flip it if we're accessing off of the FP.
4194 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4195 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4196}
4197
4198unsigned
4200 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4201 unsigned Offset = 16;
4202 // RBP is immediately pushed.
4203 Offset += SlotSize;
4204 // All callee-saved registers are then pushed.
4205 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4206 // Every funclet allocates enough stack space for the largest outgoing call.
4207 Offset += getWinEHFuncletFrameSize(MF);
4208 return Offset;
4209}
4210
4212 MachineFunction &MF, RegScavenger *RS) const {
4213 // Mark the function as not having WinCFI. We will set it back to true in
4214 // emitPrologue if it gets called and emits CFI.
4215 MF.setHasWinCFI(false);
4216
4217 MachineFrameInfo &MFI = MF.getFrameInfo();
4218 // If the frame is big enough that we might need to scavenge a register to
4219 // handle huge offsets, reserve a stack slot for that now.
4220 if (!isInt<32>(MFI.estimateStackSize(MF))) {
4221 int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
4223 }
4224
4225 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4226 // aligned. The format doesn't support misaligned stack adjustments.
4229
4230 // If this function isn't doing Win64-style C++ EH, we don't need to do
4231 // anything.
4232 if (STI.is64Bit() && MF.hasEHFunclets() &&
4235 adjustFrameForMsvcCxxEh(MF);
4236 }
4237}
4238
4239void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4240 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4241 // relative to RSP after the prologue. Find the offset of the last fixed
4242 // object, so that we can allocate a slot immediately following it. If there
4243 // were no fixed objects, use offset -SlotSize, which is immediately after the
4244 // return address. Fixed objects have negative frame indices.
4245 MachineFrameInfo &MFI = MF.getFrameInfo();
4246 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4247 int64_t MinFixedObjOffset = -SlotSize;
4248 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4249 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4250
4251 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4252 for (WinEHHandlerType &H : TBME.HandlerArray) {
4253 int FrameIndex = H.CatchObj.FrameIndex;
4254 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {
4255 // Ensure alignment.
4256 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4257 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4258 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4259 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4260 }
4261 }
4262 }
4263
4264 // Ensure alignment.
4265 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4266 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4267 int UnwindHelpFI =
4268 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4269 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4270
4271 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4272 // other frame setup instructions.
4273 MachineBasicBlock &MBB = MF.front();
4274 auto MBBI = MBB.begin();
4275 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4276 ++MBBI;
4277
4279 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4280 UnwindHelpFI)
4281 .addImm(-2);
4282}
4283
4285 MachineFunction &MF, RegScavenger *RS) const {
4286 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4287
4288 if (STI.is32Bit() && MF.hasEHFunclets())
4290 // We have emitted prolog and epilog. Don't need stack pointer saving
4291 // instruction any more.
4292 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4293 MI->eraseFromParent();
4294 X86FI->setStackPtrSaveMI(nullptr);
4295 }
4296}
4297
4299 MachineFunction &MF) const {
4300 // 32-bit functions have to restore stack pointers when control is transferred
4301 // back to the parent function. These blocks are identified as eh pads that
4302 // are not funclet entries.
4303 bool IsSEH = isAsynchronousEHPersonality(
4305 for (MachineBasicBlock &MBB : MF) {
4306 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4307 if (NeedsRestore)
4309 /*RestoreSP=*/IsSEH);
4310 }
4311}
4312
4313// Compute the alignment gap between current SP after spilling FP/BP and the
4314// next properly aligned stack offset.
4316 const TargetRegisterClass *RC,
4317 unsigned NumSpilledRegs) {
4319 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4320 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4321 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4322 return AlignedSize - AllocSize;
4323}
4324
4325void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4327 Register FP, Register BP,
4328 int SPAdjust) const {
4329 assert(FP.isValid() || BP.isValid());
4330
4331 MachineBasicBlock *MBB = BeforeMI->getParent();
4332 DebugLoc DL = BeforeMI->getDebugLoc();
4333
4334 // Spill FP.
4335 if (FP.isValid()) {
4336 BuildMI(*MBB, BeforeMI, DL,
4337 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4338 .addReg(FP);
4339 }
4340
4341 // Spill BP.
4342 if (BP.isValid()) {
4343 BuildMI(*MBB, BeforeMI, DL,
4344 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4345 .addReg(BP);
4346 }
4347
4348 // Make sure SP is aligned.
4349 if (SPAdjust)
4350 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4351
4352 // Emit unwinding information.
4353 if (FP.isValid() && needsDwarfCFI(MF)) {
4354 // Emit .cfi_remember_state to remember old frame.
4355 unsigned CFIIndex =
4357 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4358 .addCFIIndex(CFIIndex);
4359
4360 // Setup new CFA value with DW_CFA_def_cfa_expression:
4361 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4362 SmallString<64> CfaExpr;
4363 uint8_t buffer[16];
4364 int Offset = SPAdjust;
4365 if (BP.isValid())
4366 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4367 // If BeforeMI is a frame setup instruction, we need to adjust the position
4368 // and offset of the new cfi instruction.
4369 if (TII.isFrameSetup(*BeforeMI)) {
4370 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4371 BeforeMI = std::next(BeforeMI);
4372 }
4373 Register StackPtr = TRI->getStackRegister();
4374 if (STI.isTarget64BitILP32())
4376 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4377 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4378 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4379 CfaExpr.push_back(dwarf::DW_OP_deref);
4380 CfaExpr.push_back(dwarf::DW_OP_consts);
4381 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4382 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4383
4384 SmallString<64> DefCfaExpr;
4385 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4386 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4387 DefCfaExpr.append(CfaExpr.str());
4388 BuildCFI(*MBB, BeforeMI, DL,
4389 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4391 }
4392}
4393
4394void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4396 Register FP, Register BP,
4397 int SPAdjust) const {
4398 assert(FP.isValid() || BP.isValid());
4399
4400 // Adjust SP so it points to spilled FP or BP.
4401 MachineBasicBlock *MBB = AfterMI->getParent();
4402 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4403 DebugLoc DL = AfterMI->getDebugLoc();
4404 if (SPAdjust)
4405 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4406
4407 // Restore BP.
4408 if (BP.isValid()) {
4409 BuildMI(*MBB, Pos, DL,
4410 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4411 }
4412
4413 // Restore FP.
4414 if (FP.isValid()) {
4415 BuildMI(*MBB, Pos, DL,
4416 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
4417
4418 // Emit unwinding information.
4419 if (needsDwarfCFI(MF)) {
4420 // Restore original frame with .cfi_restore_state.
4421 unsigned CFIIndex =
4423 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4424 .addCFIIndex(CFIIndex);
4425 }
4426 }
4427}
4428
4429void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4431 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4432 assert(SpillFP || SpillBP);
4433
4434 Register FP, BP;
4435 const TargetRegisterClass *RC;
4436 unsigned NumRegs = 0;
4437
4438 if (SpillFP) {
4439 FP = TRI->getFrameRegister(MF);
4440 if (STI.isTarget64BitILP32())
4442 RC = TRI->getMinimalPhysRegClass(FP);
4443 ++NumRegs;
4444 }
4445 if (SpillBP) {
4446 BP = TRI->getBaseRegister();
4447 if (STI.isTarget64BitILP32())
4448 BP = Register(getX86SubSuperRegister(BP, 64));
4449 RC = TRI->getMinimalPhysRegClass(BP);
4450 ++NumRegs;
4451 }
4452 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4453
4454 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4455 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4456}
4457
4458bool X86FrameLowering::skipSpillFPBP(
4460 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4461 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4462 // SaveRbx = COPY RBX
4463 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4464 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4465 // We should skip this instruction sequence.
4466 int FI;
4467 Register Reg;
4468 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4469 MI->getOperand(1).getReg() == X86::RBX) &&
4470 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4471 ++MI;
4472 return true;
4473 }
4474 return false;
4475}
4476
4478 const TargetRegisterInfo *TRI, bool &AccessFP,
4479 bool &AccessBP) {
4480 AccessFP = AccessBP = false;
4481 if (FP) {
4482 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4483 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4484 AccessFP = true;
4485 }
4486 if (BP) {
4487 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4488 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4489 AccessBP = true;
4490 }
4491 return AccessFP || AccessBP;
4492}
4493
4494// Invoke instruction has been lowered to normal function call. We try to figure
4495// out if MI comes from Invoke.
4496// Do we have any better method?
4497static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4498 if (!MI.isCall())
4499 return false;
4500 if (InsideEHLabels)
4501 return true;
4502
4503 const MachineBasicBlock *MBB = MI.getParent();
4504 if (!MBB->hasEHPadSuccessor())
4505 return false;
4506
4507 // Check if there is another call instruction from MI to the end of MBB.
4509 for (++MBBI; MBBI != ME; ++MBBI)
4510 if (MBBI->isCall())
4511 return false;
4512 return true;
4513}
4514
4515/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4516/// interfered stack access in the range, usually generated by register spill.
4517void X86FrameLowering::checkInterferedAccess(
4519 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4520 bool SpillBP) const {
4521 if (DefMI == KillMI)
4522 return;
4523 if (TRI->hasBasePointer(MF)) {
4524 if (!SpillBP)
4525 return;
4526 } else {
4527 if (!SpillFP)
4528 return;
4529 }
4530
4531 auto MI = KillMI;
4532 while (MI != DefMI) {
4533 if (any_of(MI->operands(),
4534 [](const MachineOperand &MO) { return MO.isFI(); }))
4535 MF.getContext().reportError(SMLoc(),
4536 "Interference usage of base pointer/frame "
4537 "pointer.");
4538 MI++;
4539 }
4540}
4541
4542/// If a function uses base pointer and the base pointer is clobbered by inline
4543/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4544/// contains garbage value.
4545/// For example if a 32b x86 function uses base pointer esi, and esi is
4546/// clobbered by following inline asm
4547/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4548/// We need to save esi before the asm and restore it after the asm.
4549///
4550/// The problem can also occur to frame pointer if there is a function call, and
4551/// the callee uses a different calling convention and clobbers the fp.
4552///
4553/// Because normal frame objects (spill slots) are accessed through fp/bp
4554/// register, so we can't spill fp/bp to normal spill slots.
4555///
4556/// FIXME: There are 2 possible enhancements:
4557/// 1. In many cases there are different physical registers not clobbered by
4558/// inline asm, we can use one of them as base pointer. Or use a virtual
4559/// register as base pointer and let RA allocate a physical register to it.
4560/// 2. If there is no other instructions access stack with fp/bp from the
4561/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4562/// skip the save and restore operations.
4564 Register FP, BP;
4566 if (TFI.hasFP(MF))
4567 FP = TRI->getFrameRegister(MF);
4568 if (TRI->hasBasePointer(MF))
4569 BP = TRI->getBaseRegister();
4570
4571 // Currently only inline asm and function call can clobbers fp/bp. So we can
4572 // do some quick test and return early.
4573 if (!MF.hasInlineAsm()) {
4575 if (!X86FI->getFPClobberedByCall())
4576 FP = 0;
4577 if (!X86FI->getBPClobberedByCall())
4578 BP = 0;
4579 }
4580 if (!FP && !BP)
4581 return;
4582
4583 for (MachineBasicBlock &MBB : MF) {
4584 bool InsideEHLabels = false;
4585 auto MI = MBB.rbegin(), ME = MBB.rend();
4586 auto TermMI = MBB.getFirstTerminator();
4587 if (TermMI == MBB.begin())
4588 continue;
4589 MI = *(std::prev(TermMI));
4590
4591 while (MI != ME) {
4592 // Skip frame setup/destroy instructions.
4593 // Skip Invoke (call inside try block) instructions.
4594 // Skip instructions handled by target.
4595 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4597 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4598 ++MI;
4599 continue;
4600 }
4601
4602 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4603 InsideEHLabels = !InsideEHLabels;
4604 ++MI;
4605 continue;
4606 }
4607
4608 bool AccessFP, AccessBP;
4609 // Check if fp or bp is used in MI.
4610 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4611 ++MI;
4612 continue;
4613 }
4614
4615 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4616 // used.
4617 bool FPLive = false, BPLive = false;
4618 bool SpillFP = false, SpillBP = false;
4619 auto DefMI = MI, KillMI = MI;
4620 do {
4621 SpillFP |= AccessFP;
4622 SpillBP |= AccessBP;
4623
4624 // Maintain FPLive and BPLive.
4625 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4626 FPLive = false;
4627 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4628 FPLive = true;
4629 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4630 BPLive = false;
4631 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4632 BPLive = true;
4633
4634 DefMI = MI++;
4635 } while ((MI != ME) &&
4636 (FPLive || BPLive ||
4637 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4638
4639 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4640 if (FPLive && !SpillBP)
4641 continue;
4642
4643 // If the bp is clobbered by a call, we should save and restore outside of
4644 // the frame setup instructions.
4645 if (KillMI->isCall() && DefMI != ME) {
4646 auto FrameSetup = std::next(DefMI);
4647 // Look for frame setup instruction toward the start of the BB.
4648 // If we reach another call instruction, it means no frame setup
4649 // instruction for the current call instruction.
4650 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4651 !FrameSetup->isCall())
4652 ++FrameSetup;
4653 // If a frame setup instruction is found, we need to find out the
4654 // corresponding frame destroy instruction.
4655 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4656 (TII.getFrameSize(*FrameSetup) ||
4657 TII.getFrameAdjustment(*FrameSetup))) {
4658 while (!TII.isFrameInstr(*KillMI))
4659 --KillMI;
4660 DefMI = FrameSetup;
4661 MI = DefMI;
4662 ++MI;
4663 }
4664 }
4665
4666 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4667
4668 // Call target function to spill and restore FP and BP registers.
4669 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4670 }
4671 }
4672}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
constexpr int64_t MaxSPChunk
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
reverse_iterator rend() const
Definition ArrayRef.h:139
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
reverse_iterator rbegin() const
Definition ArrayRef.h:138
BitVector & reset()
Definition BitVector.h:392
BitVector & set()
Definition BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
A debug info location.
Definition DebugLoc.h:124
unsigned size() const
Definition DenseMap.h:108
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:903
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
size_t arg_size() const
Definition Function.h:899
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:681
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Module * getParent()
Get the module that this global value is contained inside of...
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition MCDwarf.h:592
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition MCDwarf.h:703
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition MCDwarf.h:666
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition MCDwarf.h:686
OpType getOperation() const
Definition MCDwarf.h:720
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition MCDwarf.h:608
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition MCDwarf.h:691
const MCObjectFileInfo * getObjectFileInfo() const
Definition MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
Metadata node.
Definition Metadata.h:1077
A single uniqued string.
Definition Metadata.h:720
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:617
Machine Value Type.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
LLVM_ABI int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
BasicBlockListType::iterator iterator
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition Module.cpp:296
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition Module.cpp:597
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
A tuple of MDNodes.
Definition Metadata.h:1753
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
TargetFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl=Align(1), bool StackReal=true)
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
bool isUEFI() const
Tests whether the OS is UEFI.
Definition Triple.h:674
bool isOSWindows() const
Tests whether the OS is Windows.
Definition Triple.h:679
Value wrapper in the Metadata hierarchy.
Definition Metadata.h:457
Value * getValue() const
Definition Metadata.h:497
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int64_t AddOffset, bool doMergeWithPrevious) const
Equivalent to: mergeSPUpdates(MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; }...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
const X86TargetLowering * getTargetLowering() const override
bool isTargetWindowsCoreCLR() const
self_iterator getIterator()
Definition ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition ARMWinEH.h:200
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
void stable_sort(R &&Range)
Definition STLExtras.h:2060
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
Definition CodeGen.h:55
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:551
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
auto reverse(ContainerTy &&C)
Definition STLExtras.h:420
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition LEB128.h:24
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1963
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition LEB128.h:81
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, Register Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@004076321055032247336074224075335064105264310375 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
SmallVector< WinEHHandlerType, 1 > HandlerArray