LLVM 22.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SavePReg:
64 case AArch64::SEH_SaveZReg:
65 return true;
66 }
67}
68
69// Convenience function to determine whether I is part of the PPR callee saves.
71 switch (I->getOpcode()) {
72 default:
73 return false;
74 case AArch64::STR_PXI:
75 case AArch64::LDR_PXI:
76 return I->getFlag(MachineInstr::FrameSetup) ||
78 }
79}
80
81// Convenience function to determine whether I is part of the SVE callee saves.
85
89 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
90 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
91 RegInfo(*Subtarget.getRegisterInfo()) {
92 TII = Subtarget.getInstrInfo();
93 AFI = MF.getInfo<AArch64FunctionInfo>();
94
95 HasFP = AFL.hasFP(MF);
96 NeedsWinCFI = AFL.needsWinCFI(MF);
97}
98
101 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
102 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
103 unsigned NewOpc;
104
105 // If the function contains streaming mode changes, we expect instructions
106 // to calculate the value of VG before spilling. Move past these instructions
107 // if necessary.
108 if (AFL.requiresSaveVG(MF)) {
109 auto &TLI = *Subtarget.getTargetLowering();
110 while (isVGInstruction(MBBI, TLI))
111 ++MBBI;
112 }
113
114 switch (MBBI->getOpcode()) {
115 default:
116 llvm_unreachable("Unexpected callee-save save/restore opcode!");
117 case AArch64::STPXi:
118 NewOpc = AArch64::STPXpre;
119 break;
120 case AArch64::STPDi:
121 NewOpc = AArch64::STPDpre;
122 break;
123 case AArch64::STPQi:
124 NewOpc = AArch64::STPQpre;
125 break;
126 case AArch64::STRXui:
127 NewOpc = AArch64::STRXpre;
128 break;
129 case AArch64::STRDui:
130 NewOpc = AArch64::STRDpre;
131 break;
132 case AArch64::STRQui:
133 NewOpc = AArch64::STRQpre;
134 break;
135 case AArch64::LDPXi:
136 NewOpc = AArch64::LDPXpost;
137 break;
138 case AArch64::LDPDi:
139 NewOpc = AArch64::LDPDpost;
140 break;
141 case AArch64::LDPQi:
142 NewOpc = AArch64::LDPQpost;
143 break;
144 case AArch64::LDRXui:
145 NewOpc = AArch64::LDRXpost;
146 break;
147 case AArch64::LDRDui:
148 NewOpc = AArch64::LDRDpost;
149 break;
150 case AArch64::LDRQui:
151 NewOpc = AArch64::LDRQpost;
152 break;
153 }
154 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
155 int64_t MinOffset, MaxOffset;
156 bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
157 NewOpc, Scale, Width, MinOffset, MaxOffset);
158 (void)Success;
159 assert(Success && "unknown load/store opcode");
160
161 // If the first store isn't right where we want SP then we can't fold the
162 // update in so create a normal arithmetic instruction instead.
163 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
164 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
165 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
166 // If we are destroying the frame, make sure we add the increment after the
167 // last frame operation.
168 if (FrameFlag == MachineInstr::FrameDestroy) {
169 ++MBBI;
170 // Also skip the SEH instruction, if needed
171 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
172 ++MBBI;
173 }
174 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
175 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
176 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
177 StackOffset::getFixed(CFAOffset));
178
179 return std::prev(MBBI);
180 }
181
182 // Get rid of the SEH code associated with the old instruction.
183 if (NeedsWinCFI) {
184 auto SEH = std::next(MBBI);
185 if (AArch64InstrInfo::isSEHInstruction(*SEH))
186 SEH->eraseFromParent();
187 }
188
189 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
190 MIB.addReg(AArch64::SP, RegState::Define);
191
192 // Copy all operands other than the immediate offset.
193 unsigned OpndIdx = 0;
194 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
195 ++OpndIdx)
196 MIB.add(MBBI->getOperand(OpndIdx));
197
198 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
199 "Unexpected immediate offset in first/last callee-save save/restore "
200 "instruction!");
201 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
202 "Unexpected base register in callee-save save/restore instruction!");
203 assert(CSStackSizeInc % Scale == 0);
204 MIB.addImm(CSStackSizeInc / (int)Scale);
205
206 MIB.setMIFlags(MBBI->getFlags());
207 MIB.setMemRefs(MBBI->memoperands());
208
209 // Generate a new SEH code that corresponds to the new instruction.
210 if (NeedsWinCFI) {
211 HasWinCFI = true;
212 AFL.insertSEH(*MIB, *TII, FrameFlag);
213 }
214
215 if (EmitCFI)
216 CFIInstBuilder(MBB, MBBI, FrameFlag)
217 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
218
219 return std::prev(MBB.erase(MBBI));
220}
221
222// Fix up the SEH opcode associated with the save/restore instruction.
224 unsigned LocalStackSize) {
225 MachineOperand *ImmOpnd = nullptr;
226 unsigned ImmIdx = MBBI->getNumOperands() - 1;
227 switch (MBBI->getOpcode()) {
228 default:
229 llvm_unreachable("Fix the offset in the SEH instruction");
230 case AArch64::SEH_SaveFPLR:
231 case AArch64::SEH_SaveRegP:
232 case AArch64::SEH_SaveReg:
233 case AArch64::SEH_SaveFRegP:
234 case AArch64::SEH_SaveFReg:
235 case AArch64::SEH_SaveAnyRegQP:
236 case AArch64::SEH_SaveAnyRegQPX:
237 ImmOpnd = &MBBI->getOperand(ImmIdx);
238 break;
239 }
240 if (ImmOpnd)
241 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
242}
243
245 MachineInstr &MI, uint64_t LocalStackSize) const {
246 if (AArch64InstrInfo::isSEHInstruction(MI))
247 return;
248
249 unsigned Opc = MI.getOpcode();
250 unsigned Scale;
251 switch (Opc) {
252 case AArch64::STPXi:
253 case AArch64::STRXui:
254 case AArch64::STPDi:
255 case AArch64::STRDui:
256 case AArch64::LDPXi:
257 case AArch64::LDRXui:
258 case AArch64::LDPDi:
259 case AArch64::LDRDui:
260 Scale = 8;
261 break;
262 case AArch64::STPQi:
263 case AArch64::STRQui:
264 case AArch64::LDPQi:
265 case AArch64::LDRQui:
266 Scale = 16;
267 break;
268 default:
269 llvm_unreachable("Unexpected callee-save save/restore opcode!");
270 }
271
272 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
273 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
274 "Unexpected base register in callee-save save/restore instruction!");
275 // Last operand is immediate offset that needs fixing.
276 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
277 // All generated opcodes have scaled offsets.
278 assert(LocalStackSize % Scale == 0);
279 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
280
281 if (NeedsWinCFI) {
282 HasWinCFI = true;
283 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
284 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
285 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
286 "Expecting a SEH instruction");
287 fixupSEHOpcode(MBBI, LocalStackSize);
288 }
289}
290
292 uint64_t StackBumpBytes) const {
293 if (AFL.homogeneousPrologEpilog(MF))
294 return false;
295
296 if (AFI->getLocalStackSize() == 0)
297 return false;
298
299 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
300 // (to force a stp with predecrement) to match the packed unwind format,
301 // provided that there actually are any callee saved registers to merge the
302 // decrement with.
303 // This is potentially marginally slower, but allows using the packed
304 // unwind format for functions that both have a local area and callee saved
305 // registers. Using the packed unwind format notably reduces the size of
306 // the unwind info.
307 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
308 MF.getFunction().hasOptSize())
309 return false;
310
311 // 512 is the maximum immediate for stp/ldp that will be used for
312 // callee-save save/restores
313 if (StackBumpBytes >= 512 ||
314 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
315 return false;
316
317 if (MFI.hasVarSizedObjects())
318 return false;
319
320 if (RegInfo.hasStackRealignment(MF))
321 return false;
322
323 // This isn't strictly necessary, but it simplifies things a bit since the
324 // current RedZone handling code assumes the SP is adjusted by the
325 // callee-save save/restore code.
326 if (AFL.canUseRedZone(MF))
327 return false;
328
329 // When there is an SVE area on the stack, always allocate the
330 // callee-saves and spills/locals separately.
331 if (AFI->hasSVEStackSize())
332 return false;
333
334 return true;
335}
336
341 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
342 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
343 IsFunclet = MBB.isEHFuncletEntry();
344 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
345
346#ifndef NDEBUG
347 collectBlockLiveins();
348#endif
349}
350
351#ifndef NDEBUG
352/// Collect live registers from the end of \p MI's parent up to (including) \p
353/// MI in \p LiveRegs.
356
357 MachineBasicBlock &MBB = *MI.getParent();
358 LiveRegs.addLiveOuts(MBB);
359 for (const MachineInstr &MI :
360 reverse(make_range(MI.getIterator(), MBB.instr_end())))
361 LiveRegs.stepBackward(MI);
362}
363
364void AArch64PrologueEmitter::collectBlockLiveins() {
365 // Collect live register from the end of MBB up to the start of the existing
366 // frame setup instructions.
367 PrologueEndI = MBB.begin();
368 while (PrologueEndI != MBB.end() &&
369 PrologueEndI->getFlag(MachineInstr::FrameSetup))
370 ++PrologueEndI;
371
372 if (PrologueEndI != MBB.end()) {
373 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
374 // Ignore registers used for stack management for now.
375 LiveRegs.removeReg(AArch64::SP);
376 LiveRegs.removeReg(AArch64::X19);
377 LiveRegs.removeReg(AArch64::FP);
378 LiveRegs.removeReg(AArch64::LR);
379
380 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
381 // This is necessary to spill VG if required where SVE is unavailable, but
382 // X0 is preserved around this call.
383 if (requiresGetVGCall())
384 LiveRegs.removeReg(AArch64::X0);
385 }
386}
387
388void AArch64PrologueEmitter::verifyPrologueClobbers() const {
389 if (PrologueEndI == MBB.end())
390 return;
391 // Check if any of the newly instructions clobber any of the live registers.
392 for (MachineInstr &MI :
393 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
394 for (auto &Op : MI.operands())
395 if (Op.isReg() && Op.isDef())
396 assert(!LiveRegs.contains(Op.getReg()) &&
397 "live register clobbered by inserted prologue instructions");
398 }
399}
400#endif
401
402void AArch64PrologueEmitter::determineLocalsStackSize(
403 uint64_t StackSize, uint64_t PrologueSaveSize) {
404 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
405 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
406}
407
408// Return the maximum possible number of bytes for `Size` due to the
409// architectural limit on the size of a SVE register.
410static int64_t upperBound(StackOffset Size) {
411 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
412 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
413}
414
415void AArch64PrologueEmitter::allocateStackSpace(
416 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
417 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
418 bool FollowupAllocs) {
419
420 if (!AllocSize)
421 return;
422
423 DebugLoc DL;
424 const int64_t MaxAlign = MFI.getMaxAlign().value();
425 const uint64_t AndMask = ~(MaxAlign - 1);
426
428 Register TargetReg = RealignmentPadding
429 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
430 : AArch64::SP;
431 // SUB Xd/SP, SP, AllocSize
432 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
434 EmitCFI, InitialOffset);
435
436 if (RealignmentPadding) {
437 // AND SP, X9, 0b11111...0000
438 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
439 .addReg(TargetReg, RegState::Kill)
442 AFI->setStackRealigned(true);
443
444 // No need for SEH instructions here; if we're realigning the stack,
445 // we've set a frame pointer and already finished the SEH prologue.
447 }
448 return;
449 }
450
451 //
452 // Stack probing allocation.
453 //
454
455 // Fixed length allocation. If we don't need to re-align the stack and don't
456 // have SVE objects, we can use a more efficient sequence for stack probing.
457 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
458 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
459 assert(ScratchReg != AArch64::NoRegister);
460 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
461 .addDef(ScratchReg)
462 .addImm(AllocSize.getFixed())
463 .addImm(InitialOffset.getFixed())
464 .addImm(InitialOffset.getScalable());
465 // The fixed allocation may leave unprobed bytes at the top of the
466 // stack. If we have subsequent allocation (e.g. if we have variable-sized
467 // objects), we need to issue an extra probe, so these allocations start in
468 // a known state.
469 if (FollowupAllocs) {
470 // STR XZR, [SP]
471 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
472 .addReg(AArch64::XZR)
473 .addReg(AArch64::SP)
474 .addImm(0)
476 }
477
478 return;
479 }
480
481 // Variable length allocation.
482
483 // If the (unknown) allocation size cannot exceed the probe size, decrement
484 // the stack pointer right away.
485 int64_t ProbeSize = AFI->getStackProbeSize();
486 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
487 Register ScratchReg = RealignmentPadding
488 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
489 : AArch64::SP;
490 assert(ScratchReg != AArch64::NoRegister);
491 // SUB Xd, SP, AllocSize
492 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
494 EmitCFI, InitialOffset);
495 if (RealignmentPadding) {
496 // AND SP, Xn, 0b11111...0000
497 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
498 .addReg(ScratchReg, RegState::Kill)
501 AFI->setStackRealigned(true);
502 }
503 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
505 // STR XZR, [SP]
506 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
507 .addReg(AArch64::XZR)
508 .addReg(AArch64::SP)
509 .addImm(0)
511 }
512 return;
513 }
514
515 // Emit a variable-length allocation probing loop.
516 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
517 // each of them guaranteed to adjust the stack by less than the probe size.
518 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
519 assert(TargetReg != AArch64::NoRegister);
520 // SUB Xd, SP, AllocSize
521 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
523 EmitCFI, InitialOffset);
524 if (RealignmentPadding) {
525 // AND Xn, Xn, 0b11111...0000
526 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
527 .addReg(TargetReg, RegState::Kill)
530 }
531
532 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
533 .addReg(TargetReg);
534 if (EmitCFI) {
535 // Set the CFA register back to SP.
536 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
537 .buildDefCFARegister(AArch64::SP);
538 }
539 if (RealignmentPadding)
540 AFI->setStackRealigned(true);
541}
542
544 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
545 const MachineBasicBlock::iterator EndI = MBB.end();
546
547 // At this point, we're going to decide whether or not the function uses a
548 // redzone. In most cases, the function doesn't have a redzone so let's
549 // assume that's false and set it to true in the case that there's a redzone.
550 AFI->setHasRedZone(false);
551
552 // Debug location must be unknown since the first debug location is used
553 // to determine the end of the prologue.
554 DebugLoc DL;
555
556 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
557 // have a tail-call where the caller only needs to adjust the stack pointer in
558 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
559 // See `seh-minimal-prologue-epilogue.ll` test cases.
560 if (AFI->getArgumentStackToRestore())
561 HasWinCFI = true;
562
563 if (AFI->shouldSignReturnAddress(MF)) {
564 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
565 // are inserted by emitPacRetPlusLeafHardening().
566 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
567 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
569 }
570 // AArch64PointerAuth pass will insert SEH_PACSignLR
572 }
573
574 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
575 emitShadowCallStackPrologue(PrologueBeginI, DL);
577 }
578
579 if (EmitCFI && AFI->isMTETagged())
580 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
582
583 // We signal the presence of a Swift extended frame to external tools by
584 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
585 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
586 // bits so that is still true.
587 if (HasFP && AFI->hasSwiftAsyncContext())
588 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
589
590 // All calls are tail calls in GHC calling conv, and functions have no
591 // prologue/epilogue.
592 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
593 return;
594
595 // Set tagged base pointer to the requested stack slot. Ideally it should
596 // match SP value after prologue.
597 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
598 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
599 else
600 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
601
602 // getStackSize() includes all the locals in its size calculation. We don't
603 // include these locals when computing the stack size of a funclet, as they
604 // are allocated in the parent's stack frame and accessed via the frame
605 // pointer from the funclet. We only save the callee saved registers in the
606 // funclet, which are really the callee saved registers of the parent
607 // function, including the funclet.
608 int64_t NumBytes =
609 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
610 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
611 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
612
613 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
614 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
615
616 // Windows unwind can't represent the required stack adjustments if we have
617 // both SVE callee-saves and dynamic stack allocations, and the frame
618 // pointer is before the SVE spills. The allocation of the frame pointer
619 // must be the last instruction in the prologue so the unwinder can restore
620 // the stack pointer correctly. (And there isn't any unwind opcode for
621 // `addvl sp, x29, -17`.)
622 //
623 // Because of this, we do spills in the opposite order on Windows: first SVE,
624 // then GPRs. The main side-effect of this is that it makes accessing
625 // parameters passed on the stack more expensive.
626 //
627 // We could consider rearranging the spills for simpler cases.
628 bool FPAfterSVECalleeSaves =
629 Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
630
631 if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
632 reportFatalUsageError("SME hazard padding is not supported on Windows");
633
634 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
635 // All of the remaining stack allocations are for locals.
636 determineLocalsStackSize(NumBytes, PrologueSaveSize);
637
638 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
639 if (FPAfterSVECalleeSaves) {
640 // If we're doing SVE saves first, we need to immediately allocate space
641 // for fixed objects, then space for the SVE callee saves.
642 //
643 // Windows unwind requires that the scalable size is a multiple of 16;
644 // that's handled when the callee-saved size is computed.
645 auto SaveSize =
646 StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
647 StackOffset::getFixed(FixedObject);
648 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
649 /*FollowupAllocs=*/true);
650 NumBytes -= FixedObject;
651
652 // Now allocate space for the GPR callee saves.
653 MachineBasicBlock::iterator MBBI = PrologueBeginI;
654 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
655 ++MBBI;
657 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
658 NumBytes -= AFI->getCalleeSavedStackSize();
659 } else if (CombineSPBump) {
660 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
661 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
662 StackOffset::getFixed(-NumBytes), TII,
664 EmitAsyncCFI);
665 NumBytes = 0;
666 } else if (HomPrologEpilog) {
667 // Stack has been already adjusted.
668 NumBytes -= PrologueSaveSize;
669 } else if (PrologueSaveSize != 0) {
671 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
672 NumBytes -= PrologueSaveSize;
673 }
674 assert(NumBytes >= 0 && "Negative stack allocation size!?");
675
676 // Move past the saves of the callee-saved registers, fixing up the offsets
677 // and pre-inc if we decided to combine the callee-save and local stack
678 // pointer bump above.
679 auto &TLI = *Subtarget.getTargetLowering();
680
681 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
682 while (AfterGPRSavesI != EndI &&
683 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
684 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
685 if (CombineSPBump &&
686 // Only fix-up frame-setup load/store instructions.
687 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
688 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
689 AFI->getLocalStackSize());
690 ++AfterGPRSavesI;
691 }
692
693 // For funclets the FP belongs to the containing function. Only set up FP if
694 // we actually need to.
695 if (!IsFunclet && HasFP)
696 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
697
698 // Now emit the moves for whatever callee saved regs we have (including FP,
699 // LR if those are saved). Frame instructions for SVE register are emitted
700 // later, after the instruction which actually save SVE regs.
701 if (EmitAsyncCFI)
702 emitCalleeSavedGPRLocations(AfterGPRSavesI);
703
704 // Alignment is required for the parent frame, not the funclet
705 const bool NeedsRealignment =
706 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
707 const int64_t RealignmentPadding =
708 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
709 ? MFI.getMaxAlign().value() - 16
710 : 0;
711
712 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
713 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
714
715 StackOffset PPRCalleeSavesSize =
716 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
717 StackOffset ZPRCalleeSavesSize =
718 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
719 StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
720 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
721 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
722
723 std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
724 ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
725
726 StackOffset CFAOffset =
727 StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
728 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
729 if (!FPAfterSVECalleeSaves) {
730 // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
731 // areas.
732 PPRCalleeSavesBegin = AfterGPRSavesI;
733 if (PPRCalleeSavesSize) {
734 LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
735 << PPRCalleeSavesSize.getScalable() << "\n");
736
737 assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
738 "Unexpected instruction");
739 while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
740 AfterSVESavesI != MBB.getFirstTerminator())
741 ++AfterSVESavesI;
742 }
743 PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
744 if (ZPRCalleeSavesSize) {
745 LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
746 << ZPRCalleeSavesSize.getScalable() << "\n");
747 assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
748 "Unexpected instruction");
749 while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
750 AfterSVESavesI != MBB.getFirstTerminator())
751 ++AfterSVESavesI;
752 }
753 ZPRCalleeSavesEnd = AfterSVESavesI;
754 }
755
756 if (EmitAsyncCFI)
757 emitCalleeSavedSVELocations(AfterSVESavesI);
758
759 if (AFI->hasSplitSVEObjects()) {
760 assert(!FPAfterSVECalleeSaves &&
761 "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects");
762 assert(!AFL.canUseRedZone(MF) &&
763 "Cannot use redzone with aarch64-split-sve-objects");
764 // TODO: Handle HasWinCFI/NeedsWinCFI?
766 "WinCFI with aarch64-split-sve-objects is not supported");
767
768 // Split ZPR and PPR allocation.
769 // Allocate PPR callee saves
770 allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize,
771 EmitAsyncCFI && !HasFP, CFAOffset,
772 MFI.hasVarSizedObjects() || ZPRCalleeSavesSize ||
773 ZPRLocalsSize || PPRLocalsSize);
774 CFAOffset += PPRCalleeSavesSize;
775
776 // Allocate PPR locals + ZPR callee saves
777 assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin &&
778 "Expected ZPR callee saves after PPR locals");
779 allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding,
780 PPRLocalsSize + ZPRCalleeSavesSize,
781 EmitAsyncCFI && !HasFP, CFAOffset,
782 MFI.hasVarSizedObjects() || ZPRLocalsSize);
783 CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize;
784
785 // Allocate ZPR locals
786 allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding,
787 ZPRLocalsSize + StackOffset::getFixed(NumBytes),
788 EmitAsyncCFI && !HasFP, CFAOffset,
789 MFI.hasVarSizedObjects());
790 } else {
791 // Allocate space for the callee saves (if any).
792 StackOffset LocalsSize =
793 PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
794 if (!FPAfterSVECalleeSaves)
795 allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
796 EmitAsyncCFI && !HasFP, CFAOffset,
797 MFI.hasVarSizedObjects() || LocalsSize);
798 CFAOffset += SVECalleeSavesSize;
799
800 // Allocate space for the rest of the frame including SVE locals. Align the
801 // stack as necessary.
802 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
803 "Cannot use redzone with stack realignment");
804 if (!AFL.canUseRedZone(MF)) {
805 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
806 // the correct value here, as NumBytes also includes padding bytes,
807 // which shouldn't be counted here.
808 StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
809 allocateStackSpace(AfterSVESavesI, RealignmentPadding,
810 SVELocalsSize + StackOffset::getFixed(NumBytes),
811 EmitAsyncCFI && !HasFP, CFAOffset,
812 MFI.hasVarSizedObjects());
813 }
814 }
815
816 // If we need a base pointer, set it up here. It's whatever the value of the
817 // stack pointer is at this point. Any variable size objects will be allocated
818 // after this, so we can still use the base pointer to reference locals.
819 //
820 // FIXME: Clarify FrameSetup flags here.
821 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
822 // needed.
823 // For funclets the BP belongs to the containing function.
824 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
825 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
826 AArch64::SP, false);
827 if (NeedsWinCFI) {
828 HasWinCFI = true;
829 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
831 }
832 }
833
834 // The very last FrameSetup instruction indicates the end of prologue. Emit a
835 // SEH opcode indicating the prologue end.
836 if (NeedsWinCFI && HasWinCFI) {
837 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
839 }
840
841 // SEH funclets are passed the frame pointer in X1. If the parent
842 // function uses the base register, then the base register is used
843 // directly, and is not retrieved from X1.
844 if (IsFunclet && F.hasPersonalityFn()) {
845 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
847 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
848 AArch64::FP)
849 .addReg(AArch64::X1)
851 MBB.addLiveIn(AArch64::X1);
852 }
853 }
854
855 if (EmitCFI && !EmitAsyncCFI) {
856 if (HasFP) {
857 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
858 } else {
859 StackOffset TotalSize =
860 AFL.getSVEStackSize(MF) +
861 StackOffset::getFixed((int64_t)MFI.getStackSize());
862 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
863 CFIBuilder.insertCFIInst(
864 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
865 TotalSize, /*LastAdjustmentWasScalable=*/false));
866 }
867 emitCalleeSavedGPRLocations(AfterSVESavesI);
868 emitCalleeSavedSVELocations(AfterSVESavesI);
869 }
870}
871
872void AArch64PrologueEmitter::emitShadowCallStackPrologue(
874 // Shadow call stack prolog: str x30, [x18], #8
875 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
876 .addReg(AArch64::X18, RegState::Define)
877 .addReg(AArch64::LR)
878 .addReg(AArch64::X18)
879 .addImm(8)
881
882 // This instruction also makes x18 live-in to the entry block.
883 MBB.addLiveIn(AArch64::X18);
884
885 if (NeedsWinCFI)
886 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
888
889 if (EmitCFI) {
890 // Emit a CFI instruction that causes 8 to be subtracted from the value of
891 // x18 when unwinding past this frame.
892 static const char CFIInst[] = {
893 dwarf::DW_CFA_val_expression,
894 18, // register
895 2, // length
896 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
897 static_cast<char>(-8) & 0x7f, // addend (sleb128)
898 };
899 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
900 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
901 }
902}
903
904void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
906 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
908 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
909 // The special symbol below is absolute and has a *value* that can be
910 // combined with the frame pointer to signal an extended frame.
911 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
912 .addExternalSymbol("swift_async_extendedFramePointerFlags",
914 if (NeedsWinCFI) {
915 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
917 HasWinCFI = true;
918 }
919 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
920 .addUse(AArch64::FP)
921 .addUse(AArch64::X16)
922 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
923 if (NeedsWinCFI) {
924 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
926 HasWinCFI = true;
927 }
928 break;
929 }
930 [[fallthrough]];
931
933 // ORR x29, x29, #0x1000_0000_0000_0000
934 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
935 .addUse(AArch64::FP)
936 .addImm(0x1100)
938 if (NeedsWinCFI) {
939 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
941 HasWinCFI = true;
942 }
943 break;
944
946 break;
947 }
948}
949
950void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
951 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
952 const DebugLoc &DL) const {
953 assert(!HasFP && "unexpected function without stack frame but with FP");
954 assert(!AFL.getSVEStackSize(MF) &&
955 "unexpected function without stack frame but with SVE objects");
956 // All of the stack allocation is for locals.
957 AFI->setLocalStackSize(NumBytes);
958 if (!NumBytes) {
959 if (NeedsWinCFI && HasWinCFI) {
960 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
962 }
963 return;
964 }
965 // REDZONE: If the stack size is less than 128 bytes, we don't need
966 // to actually allocate.
967 if (AFL.canUseRedZone(MF)) {
968 AFI->setHasRedZone(true);
969 ++NumRedZoneFunctions;
970 } else {
971 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
972 StackOffset::getFixed(-NumBytes), TII,
974 if (EmitCFI) {
975 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
976 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
977 // Encode the stack size of the leaf function.
978 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
979 .buildDefCFAOffset(NumBytes, FrameLabel);
980 }
981 }
982
983 if (NeedsWinCFI) {
984 HasWinCFI = true;
985 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
987 }
988}
989
990void AArch64PrologueEmitter::emitFramePointerSetup(
992 unsigned FixedObject) {
993 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
994 if (CombineSPBump)
995 FPOffset += AFI->getLocalStackSize();
996
997 if (AFI->hasSwiftAsyncContext()) {
998 // Before we update the live FP we have to ensure there's a valid (or
999 // null) asynchronous context in its slot just before FP in the frame
1000 // record, so store it now.
1001 const auto &Attrs = MF.getFunction().getAttributes();
1002 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1003 if (HaveInitialContext)
1004 MBB.addLiveIn(AArch64::X22);
1005 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1006 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1007 .addUse(Reg)
1008 .addUse(AArch64::SP)
1009 .addImm(FPOffset - 8)
1011 if (NeedsWinCFI) {
1012 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1013 // to multiple instructions, should be mutually-exclusive.
1014 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1015 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1017 HasWinCFI = true;
1018 }
1019 }
1020
1021 if (HomPrologEpilog) {
1022 auto Prolog = MBBI;
1023 --Prolog;
1024 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1025 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1026 } else {
1027 // Issue sub fp, sp, FPOffset or
1028 // mov fp,sp when FPOffset is zero.
1029 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1030 // This code marks the instruction(s) that set the FP also.
1031 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1032 StackOffset::getFixed(FPOffset), TII,
1034 if (NeedsWinCFI && HasWinCFI) {
1035 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1037 // After setting up the FP, the rest of the prolog doesn't need to be
1038 // included in the SEH unwind info.
1039 NeedsWinCFI = false;
1040 }
1041 }
1042 if (EmitAsyncCFI)
1043 emitDefineCFAWithFP(MBBI, FixedObject);
1044}
1045
1046// Define the current CFA rule to use the provided FP.
1047void AArch64PrologueEmitter::emitDefineCFAWithFP(
1048 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1049 const int OffsetToFirstCalleeSaveFromFP =
1050 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1051 AFI->getCalleeSavedStackSize();
1052 Register FramePtr = RegInfo.getFrameRegister(MF);
1053 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1054 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1055}
1056
1057void AArch64PrologueEmitter::emitWindowsStackProbe(
1058 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1059 int64_t RealignmentPadding) const {
1060 if (AFI->getSVECalleeSavedStackSize())
1061 report_fatal_error("SVE callee saves not yet supported with stack probing");
1062
1063 // Find an available register to spill the value of X15 to, if X15 is being
1064 // used already for nest.
1065 unsigned X15Scratch = AArch64::NoRegister;
1066 if (llvm::any_of(MBB.liveins(),
1067 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1068 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1069 LiveIn.PhysReg);
1070 })) {
1071 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1072 assert(X15Scratch != AArch64::NoRegister &&
1073 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1074#ifndef NDEBUG
1075 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1076#endif
1077 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1078 .addReg(AArch64::XZR)
1079 .addReg(AArch64::X15, RegState::Undef)
1080 .addReg(AArch64::X15, RegState::Implicit)
1082 }
1083
1084 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1085 if (NeedsWinCFI) {
1086 HasWinCFI = true;
1087 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1088 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1089 // This is at most two instructions, MOVZ followed by MOVK.
1090 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1091 // exceeding 256MB in size.
1092 if (NumBytes >= (1 << 28))
1093 report_fatal_error("Stack size cannot exceed 256MB for stack "
1094 "unwinding purposes");
1095
1096 uint32_t LowNumWords = NumWords & 0xFFFF;
1097 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1098 .addImm(LowNumWords)
1101 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1103 if ((NumWords & 0xFFFF0000) != 0) {
1104 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1105 .addReg(AArch64::X15)
1106 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1109 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1111 }
1112 } else {
1113 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1114 .addImm(NumWords)
1116 }
1117
1118 const char *ChkStk = Subtarget.getChkStkName();
1119 switch (MF.getTarget().getCodeModel()) {
1120 case CodeModel::Tiny:
1121 case CodeModel::Small:
1122 case CodeModel::Medium:
1123 case CodeModel::Kernel:
1124 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1125 .addExternalSymbol(ChkStk)
1126 .addReg(AArch64::X15, RegState::Implicit)
1127 .addReg(AArch64::X16,
1129 .addReg(AArch64::X17,
1131 .addReg(AArch64::NZCV,
1134 if (NeedsWinCFI) {
1135 HasWinCFI = true;
1136 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1138 }
1139 break;
1140 case CodeModel::Large:
1141 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1142 .addReg(AArch64::X16, RegState::Define)
1143 .addExternalSymbol(ChkStk)
1144 .addExternalSymbol(ChkStk)
1146 if (NeedsWinCFI) {
1147 HasWinCFI = true;
1148 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1150 }
1151
1153 .addReg(AArch64::X16, RegState::Kill)
1155 .addReg(AArch64::X16,
1157 .addReg(AArch64::X17,
1159 .addReg(AArch64::NZCV,
1162 if (NeedsWinCFI) {
1163 HasWinCFI = true;
1164 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1166 }
1167 break;
1168 }
1169
1170 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1171 .addReg(AArch64::SP, RegState::Kill)
1172 .addReg(AArch64::X15, RegState::Kill)
1175 if (NeedsWinCFI) {
1176 HasWinCFI = true;
1177 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1178 .addImm(NumBytes)
1180 }
1181 NumBytes = 0;
1182
1183 if (RealignmentPadding > 0) {
1184 if (RealignmentPadding >= 4096) {
1185 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1186 .addReg(AArch64::X16, RegState::Define)
1187 .addImm(RealignmentPadding)
1189 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1190 .addReg(AArch64::SP)
1191 .addReg(AArch64::X16, RegState::Kill)
1194 } else {
1195 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1196 .addReg(AArch64::SP)
1197 .addImm(RealignmentPadding)
1198 .addImm(0)
1200 }
1201
1202 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1203 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1204 .addReg(AArch64::X15, RegState::Kill)
1206 AFI->setStackRealigned(true);
1207
1208 // No need for SEH instructions here; if we're realigning the stack,
1209 // we've set a frame pointer and already finished the SEH prologue.
1211 }
1212 if (X15Scratch != AArch64::NoRegister) {
1213 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1214 .addReg(AArch64::XZR)
1215 .addReg(X15Scratch, RegState::Undef)
1216 .addReg(X15Scratch, RegState::Implicit)
1218 }
1219}
1220
1221void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1223 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1224 if (CSI.empty())
1225 return;
1226
1227 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1228 for (const auto &Info : CSI) {
1229 unsigned FrameIdx = Info.getFrameIdx();
1230 if (MFI.hasScalableStackID(FrameIdx))
1231 continue;
1232
1233 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1234 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1235 CFIBuilder.buildOffset(Info.getReg(), Offset);
1236 }
1237}
1238
1239void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1241 // Add callee saved registers to move list.
1242 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1243 if (CSI.empty())
1244 return;
1245
1246 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1247
1248 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1249 if (AFL.requiresSaveVG(MF)) {
1250 auto IncomingVG = *find_if(
1251 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1252 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1253 AFL.getOffsetOfLocalArea();
1254 }
1255
1256 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1257 for (const auto &Info : CSI) {
1258 int FI = Info.getFrameIdx();
1259 if (!MFI.hasScalableStackID(FI))
1260 continue;
1261
1262 // Not all unwinders may know about SVE registers, so assume the lowest
1263 // common denominator.
1264 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1265 MCRegister Reg = Info.getReg();
1266 if (!RegInfo.regNeedsCFI(Reg, Reg))
1267 continue;
1268
1269 StackOffset Offset =
1270 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1271 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1272
1273 if (AFI->hasSplitSVEObjects() &&
1274 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1275 Offset -= PPRStackSize;
1276
1277 CFIBuilder.insertCFIInst(
1278 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1279 }
1280}
1281
1283 switch (MI.getOpcode()) {
1284 default:
1285 return false;
1286 case AArch64::CATCHRET:
1287 case AArch64::CLEANUPRET:
1288 return true;
1289 }
1290}
1291
1296 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1297 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1298 SEHEpilogueStartI = MBB.end();
1299}
1300
1302 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1303 if (MBB.end() != EpilogueEndI) {
1304 DL = EpilogueEndI->getDebugLoc();
1305 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1306 }
1307
1308 int64_t NumBytes =
1309 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1310
1311 // All calls are tail calls in GHC calling conv, and functions have no
1312 // prologue/epilogue.
1313 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1314 return;
1315
1316 // How much of the stack used by incoming arguments this function is expected
1317 // to restore in this particular epilogue.
1318 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1319 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1320 MF.getFunction().isVarArg());
1321 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1322
1323 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1324 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1325 // We cannot rely on the local stack size set in emitPrologue if the function
1326 // has funclets, as funclets have different local stack size requirements, and
1327 // the current value set in emitPrologue may be that of the containing
1328 // function.
1329 if (MF.hasEHFunclets())
1330 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1331
1332 if (HomPrologEpilog) {
1334 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1335 if (FirstHomogenousEpilogI != MBB.begin()) {
1336 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1337 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1338 FirstHomogenousEpilogI = HomogeneousEpilog;
1339 }
1340
1341 // Adjust local stack
1342 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1343 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1345
1346 // SP has been already adjusted while restoring callee save regs.
1347 // We've bailed-out the case with adjusting SP for arguments.
1348 assert(AfterCSRPopSize == 0);
1349 return;
1350 }
1351
1352 bool FPAfterSVECalleeSaves =
1353 Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
1354
1355 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1356 // Assume we can't combine the last pop with the sp restore.
1357 bool CombineAfterCSRBump = false;
1358 if (FPAfterSVECalleeSaves) {
1359 AfterCSRPopSize += FixedObject;
1360 } else if (!CombineSPBump && PrologueSaveSize != 0) {
1361 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1362 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1363 AArch64InstrInfo::isSEHInstruction(*Pop))
1364 Pop = std::prev(Pop);
1365 // Converting the last ldp to a post-index ldp is valid only if the last
1366 // ldp's offset is 0.
1367 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1368 // If the offset is 0 and the AfterCSR pop is not actually trying to
1369 // allocate more stack for arguments (in space that an untimely interrupt
1370 // may clobber), convert it to a post-index ldp.
1371 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1373 Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
1374 PrologueSaveSize);
1375 } else {
1376 // If not, make sure to emit an add after the last ldp.
1377 // We're doing this by transferring the size to be restored from the
1378 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1379 // pops.
1380 AfterCSRPopSize += PrologueSaveSize;
1381 CombineAfterCSRBump = true;
1382 }
1383 }
1384
1385 // Move past the restores of the callee-saved registers.
1386 // If we plan on combining the sp bump of the local stack size and the callee
1387 // save stack size, we might need to adjust the CSR save and restore offsets.
1388 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1389 MachineBasicBlock::iterator Begin = MBB.begin();
1390 while (FirstGPRRestoreI != Begin) {
1391 --FirstGPRRestoreI;
1392 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1393 (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1394 ++FirstGPRRestoreI;
1395 break;
1396 } else if (CombineSPBump)
1397 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1398 AFI->getLocalStackSize());
1399 }
1400
1401 if (NeedsWinCFI) {
1402 // Note that there are cases where we insert SEH opcodes in the
1403 // epilogue when we had no SEH opcodes in the prologue. For
1404 // example, when there is no stack frame but there are stack
1405 // arguments. Insert the SEH_EpilogStart and remove it later if it
1406 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1407 // functions that don't need it.
1408 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1410 SEHEpilogueStartI = FirstGPRRestoreI;
1411 --SEHEpilogueStartI;
1412 }
1413
1414 if (HasFP && AFI->hasSwiftAsyncContext())
1415 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1416
1417 StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
1418 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1419 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1420
1421 // If there is a single SP update, insert it before the ret and we're done.
1422 if (CombineSPBump) {
1423 assert(!SVEStackSize && "Cannot combine SP bump with SVE");
1424
1425 // When we are about to restore the CSRs, the CFA register is SP again.
1426 if (EmitCFI && HasFP)
1428 .buildDefCFA(AArch64::SP, NumBytes);
1429
1430 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1431 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1433 EmitCFI, StackOffset::getFixed(NumBytes));
1434 return;
1435 }
1436
1437 NumBytes -= PrologueSaveSize;
1438 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1439
1440 if (!AFI->hasSplitSVEObjects()) {
1441 // Process the SVE callee-saves to determine what space needs to be
1442 // deallocated.
1443 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1444 MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1445 RestoreEnd = FirstGPRRestoreI;
1446 int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1447 int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1448 int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1449
1450 if (SVECalleeSavedSize) {
1451 if (FPAfterSVECalleeSaves)
1452 RestoreEnd = MBB.getFirstTerminator();
1453
1454 RestoreBegin = std::prev(RestoreEnd);
1455 while (RestoreBegin != MBB.begin() &&
1456 isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1457 --RestoreBegin;
1458
1459 assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1460 isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1461 "Unexpected instruction");
1462
1463 StackOffset CalleeSavedSizeAsOffset =
1464 StackOffset::getScalable(SVECalleeSavedSize);
1465 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1466 DeallocateAfter = CalleeSavedSizeAsOffset;
1467 }
1468
1469 // Deallocate the SVE area.
1470 if (FPAfterSVECalleeSaves) {
1471 // If the callee-save area is before FP, restoring the FP implicitly
1472 // deallocates non-callee-save SVE allocations. Otherwise, deallocate
1473 // them explicitly.
1474 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1475 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1476 DeallocateBefore, TII, MachineInstr::FrameDestroy,
1477 false, NeedsWinCFI, &HasWinCFI);
1478 }
1479
1480 // Deallocate callee-save non-SVE registers.
1481 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1482 StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
1484 &HasWinCFI);
1485
1486 // Deallocate fixed objects.
1487 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1488 StackOffset::getFixed(FixedObject), TII,
1490 &HasWinCFI);
1491
1492 // Deallocate callee-save SVE registers.
1493 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1494 DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1496 } else if (SVEStackSize) {
1497 int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1498 // If we have stack realignment or variable-sized objects we must use the
1499 // FP to restore SVE callee saves (as there is an unknown amount of
1500 // data/padding between the SP and SVE CS area).
1501 Register BaseForSVEDealloc =
1502 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1503 : AArch64::SP;
1504 if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1505 Register CalleeSaveBase = AArch64::FP;
1506 if (int64_t CalleeSaveBaseOffset =
1507 AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1508 // If we have have an non-zero offset to the non-SVE CS base we need
1509 // to compute the base address by subtracting the offest in a
1510 // temporary register first (to avoid briefly deallocating the SVE
1511 // CS).
1512 CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
1513 &AArch64::GPR64RegClass);
1514 emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1515 StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1517 }
1518 // The code below will deallocate the stack space space by moving the
1519 // SP to the start of the SVE callee-save area.
1520 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1521 StackOffset::getScalable(-SVECalleeSavedSize), TII,
1523 } else if (BaseForSVEDealloc == AArch64::SP) {
1524 if (SVECalleeSavedSize) {
1525 // Deallocate the non-SVE locals first before we can deallocate (and
1526 // restore callee saves) from the SVE area.
1527 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1528 StackOffset::getFixed(NumBytes), TII,
1530 &HasWinCFI, EmitCFI && !HasFP,
1531 SVEStackSize + StackOffset::getFixed(
1532 NumBytes + PrologueSaveSize));
1533 NumBytes = 0;
1534 }
1535
1536 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1537 DeallocateBefore, TII, MachineInstr::FrameDestroy,
1538 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1539 SVEStackSize +
1540 StackOffset::getFixed(NumBytes + PrologueSaveSize));
1541
1542 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1543 DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1545 DeallocateAfter +
1546 StackOffset::getFixed(NumBytes + PrologueSaveSize));
1547 }
1548
1549 if (EmitCFI)
1550 emitCalleeSavedSVERestores(RestoreEnd);
1551 }
1552 } else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
1553 // TODO: Support stack realigment and variable-sized objects.
1554 assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() &&
1555 "unexpected stack realignment or variable sized objects with split "
1556 "SVE stack objects");
1557 // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR
1558 // areas.
1559 auto ZPRCalleeSavedSize =
1560 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
1561 auto PPRCalleeSavedSize =
1562 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
1563 StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize;
1564 StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize;
1565
1566 MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI,
1567 PPRRestoreEnd = FirstGPRRestoreI;
1568 if (PPRCalleeSavedSize) {
1569 PPRRestoreBegin = std::prev(PPRRestoreEnd);
1570 while (PPRRestoreBegin != MBB.begin() &&
1571 isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin)))
1572 --PPRRestoreBegin;
1573 }
1574
1575 MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin,
1576 ZPRRestoreEnd = PPRRestoreBegin;
1577 if (ZPRCalleeSavedSize) {
1578 ZPRRestoreBegin = std::prev(ZPRRestoreEnd);
1579 while (ZPRRestoreBegin != MBB.begin() &&
1580 isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin)))
1581 --ZPRRestoreBegin;
1582 }
1583
1584 auto CFAOffset =
1585 SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
1586 if (PPRCalleeSavedSize || ZPRCalleeSavedSize) {
1587 // Deallocate the non-SVE locals first before we can deallocate (and
1588 // restore callee saves) from the SVE area.
1589 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1590 emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
1591 NonSVELocals, TII, MachineInstr::FrameDestroy, false,
1592 false, nullptr, EmitCFI && !HasFP, CFAOffset);
1593 NumBytes = 0;
1594 CFAOffset -= NonSVELocals;
1595 }
1596
1597 if (ZPRLocalsSize) {
1598 emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
1599 ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false,
1600 false, nullptr, EmitCFI && !HasFP, CFAOffset);
1601 CFAOffset -= ZPRLocalsSize;
1602 }
1603
1604 if (PPRLocalsSize || ZPRCalleeSavedSize) {
1605 assert(PPRRestoreBegin == ZPRRestoreEnd &&
1606 "Expected PPR restores after ZPR");
1607 emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
1608 PPRLocalsSize + ZPRCalleeSavedSize, TII,
1609 MachineInstr::FrameDestroy, false, false, nullptr,
1610 EmitCFI && !HasFP, CFAOffset);
1611 CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize;
1612 }
1613 if (PPRCalleeSavedSize) {
1614 emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP,
1615 PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy,
1616 false, false, nullptr, EmitCFI && !HasFP, CFAOffset);
1617 }
1618
1619 // We only emit CFI information for ZPRs so emit CFI after the ZPR restores.
1620 if (EmitCFI)
1621 emitCalleeSavedSVERestores(ZPRRestoreEnd);
1622 }
1623
1624 if (!HasFP) {
1625 bool RedZone = AFL.canUseRedZone(MF);
1626 // If this was a redzone leaf function, we don't need to restore the
1627 // stack pointer (but we may need to pop stack args for fastcc).
1628 if (RedZone && AfterCSRPopSize == 0)
1629 return;
1630
1631 // Pop the local variables off the stack. If there are no callee-saved
1632 // registers, it means we are actually positioned at the terminator and can
1633 // combine stack increment for the locals and the stack increment for
1634 // callee-popped arguments into (possibly) a single instruction and be done.
1635 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1636 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1637 if (NoCalleeSaveRestore)
1638 StackRestoreBytes += AfterCSRPopSize;
1639
1641 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1642 StackOffset::getFixed(StackRestoreBytes), TII,
1644 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1645
1646 // If we were able to combine the local stack pop with the argument pop,
1647 // then we're done.
1648 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1649 return;
1650
1651 NumBytes = 0;
1652 }
1653
1654 // Restore the original stack pointer.
1655 // FIXME: Rather than doing the math here, we should instead just use
1656 // non-post-indexed loads for the restores if we aren't actually going to
1657 // be able to save any instructions.
1658 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1660 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1661 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1663 } else if (NumBytes)
1664 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1665 StackOffset::getFixed(NumBytes), TII,
1667
1668 // When we are about to restore the CSRs, the CFA register is SP again.
1669 if (EmitCFI && HasFP)
1671 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1672
1673 // This must be placed after the callee-save restore code because that code
1674 // assumes the SP is at the same location as it was after the callee-save save
1675 // code in the prologue.
1676 if (AfterCSRPopSize) {
1677 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1678 "interrupt may have clobbered");
1679
1681 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1683 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1684 StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
1685 }
1686}
1687
1688bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1689 uint64_t StackBumpBytes) const {
1691 StackBumpBytes))
1692 return false;
1693 if (MBB.empty())
1694 return true;
1695
1696 // Disable combined SP bump if the last instruction is an MTE tag store. It
1697 // is almost always better to merge SP adjustment into those instructions.
1698 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1699 MachineBasicBlock::iterator Begin = MBB.begin();
1700 while (LastI != Begin) {
1701 --LastI;
1702 if (LastI->isTransient())
1703 continue;
1704 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1705 break;
1706 }
1707 switch (LastI->getOpcode()) {
1708 case AArch64::STGloop:
1709 case AArch64::STZGloop:
1710 case AArch64::STGi:
1711 case AArch64::STZGi:
1712 case AArch64::ST2Gi:
1713 case AArch64::STZ2Gi:
1714 return false;
1715 default:
1716 return true;
1717 }
1718 llvm_unreachable("unreachable");
1719}
1720
1721void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1723 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1725 // Avoid the reload as it is GOT relative, and instead fall back to the
1726 // hardcoded value below. This allows a mismatch between the OS and
1727 // application without immediately terminating on the difference.
1728 [[fallthrough]];
1730 // We need to reset FP to its untagged state on return. Bit 60 is
1731 // currently used to show the presence of an extended frame.
1732
1733 // BIC x29, x29, #0x1000_0000_0000_0000
1734 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1735 AArch64::FP)
1736 .addUse(AArch64::FP)
1737 .addImm(0x10fe)
1739 if (NeedsWinCFI) {
1740 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1742 HasWinCFI = true;
1743 }
1744 break;
1745
1747 break;
1748 }
1749}
1750
1751void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1753 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1754 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1755 .addReg(AArch64::X18, RegState::Define)
1756 .addReg(AArch64::LR, RegState::Define)
1757 .addReg(AArch64::X18)
1758 .addImm(-8)
1760
1761 if (NeedsWinCFI)
1762 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1764
1765 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1766 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1767 .buildRestore(AArch64::X18);
1768}
1769
1770void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1771 MachineBasicBlock::iterator MBBI, bool SVE) const {
1772 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1773 if (CSI.empty())
1774 return;
1775
1776 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1777
1778 for (const auto &Info : CSI) {
1779 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1780 continue;
1781
1782 MCRegister Reg = Info.getReg();
1783 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1784 continue;
1785
1786 CFIBuilder.buildRestore(Info.getReg());
1787 }
1788}
1789
1790void AArch64EpilogueEmitter::finalizeEpilogue() const {
1791 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1792 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1794 }
1795 if (EmitCFI)
1796 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1797 if (AFI->shouldSignReturnAddress(MF)) {
1798 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1799 // are inserted by emitPacRetPlusLeafHardening().
1800 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1801 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1802 TII->get(AArch64::PAUTH_EPILOGUE))
1804 }
1805 // AArch64PointerAuth pass will insert SEH_PACSignLR
1807 }
1808 if (HasWinCFI) {
1809 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1811 if (!MF.hasWinCFI())
1812 MF.setHasWinCFI(true);
1813 }
1814 if (NeedsWinCFI) {
1815 assert(SEHEpilogueStartI != MBB.end());
1816 if (!HasWinCFI)
1817 MBB.erase(SEHEpilogueStartI);
1818 }
1819}
1820
1821} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:124
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
Wrapper class representing virtual and physical registers.
Definition Register.h:19
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77