LLVM 22.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::CPY_ZPzI_B:
61 case AArch64::CMPNE_PPzZI_B:
62 case AArch64::PTRUE_C_B:
63 case AArch64::PTRUE_B:
64 return I->getFlag(MachineInstr::FrameSetup) ||
66 case AArch64::SEH_SavePReg:
67 case AArch64::SEH_SaveZReg:
68 return true;
69 }
70}
71
72// Convenience function to determine whether I is part of the PPR callee saves.
74 switch (I->getOpcode()) {
75 default:
76 return false;
77 case AArch64::STR_PXI:
78 case AArch64::LDR_PXI:
79 return I->getFlag(MachineInstr::FrameSetup) ||
81 }
82}
83
84// Convenience function to determine whether I is part of the SVE callee saves.
88
92 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
93 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
94 RegInfo(*Subtarget.getRegisterInfo()) {
95 TII = Subtarget.getInstrInfo();
96 AFI = MF.getInfo<AArch64FunctionInfo>();
97
98 HasFP = AFL.hasFP(MF);
99 NeedsWinCFI = AFL.needsWinCFI(MF);
100}
101
104 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
105 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
106 unsigned NewOpc;
107
108 // If the function contains streaming mode changes, we expect instructions
109 // to calculate the value of VG before spilling. Move past these instructions
110 // if necessary.
111 if (AFL.requiresSaveVG(MF)) {
112 auto &TLI = *Subtarget.getTargetLowering();
113 while (isVGInstruction(MBBI, TLI))
114 ++MBBI;
115 }
116
117 switch (MBBI->getOpcode()) {
118 default:
119 llvm_unreachable("Unexpected callee-save save/restore opcode!");
120 case AArch64::STPXi:
121 NewOpc = AArch64::STPXpre;
122 break;
123 case AArch64::STPDi:
124 NewOpc = AArch64::STPDpre;
125 break;
126 case AArch64::STPQi:
127 NewOpc = AArch64::STPQpre;
128 break;
129 case AArch64::STRXui:
130 NewOpc = AArch64::STRXpre;
131 break;
132 case AArch64::STRDui:
133 NewOpc = AArch64::STRDpre;
134 break;
135 case AArch64::STRQui:
136 NewOpc = AArch64::STRQpre;
137 break;
138 case AArch64::LDPXi:
139 NewOpc = AArch64::LDPXpost;
140 break;
141 case AArch64::LDPDi:
142 NewOpc = AArch64::LDPDpost;
143 break;
144 case AArch64::LDPQi:
145 NewOpc = AArch64::LDPQpost;
146 break;
147 case AArch64::LDRXui:
148 NewOpc = AArch64::LDRXpost;
149 break;
150 case AArch64::LDRDui:
151 NewOpc = AArch64::LDRDpost;
152 break;
153 case AArch64::LDRQui:
154 NewOpc = AArch64::LDRQpost;
155 break;
156 }
157 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
158 int64_t MinOffset, MaxOffset;
159 bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
160 NewOpc, Scale, Width, MinOffset, MaxOffset);
161 (void)Success;
162 assert(Success && "unknown load/store opcode");
163
164 // If the first store isn't right where we want SP then we can't fold the
165 // update in so create a normal arithmetic instruction instead.
166 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
167 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
168 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
169 // If we are destroying the frame, make sure we add the increment after the
170 // last frame operation.
171 if (FrameFlag == MachineInstr::FrameDestroy) {
172 ++MBBI;
173 // Also skip the SEH instruction, if needed
174 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
175 ++MBBI;
176 }
177 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
178 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
179 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
180 StackOffset::getFixed(CFAOffset));
181
182 return std::prev(MBBI);
183 }
184
185 // Get rid of the SEH code associated with the old instruction.
186 if (NeedsWinCFI) {
187 auto SEH = std::next(MBBI);
188 if (AArch64InstrInfo::isSEHInstruction(*SEH))
189 SEH->eraseFromParent();
190 }
191
192 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
193 MIB.addReg(AArch64::SP, RegState::Define);
194
195 // Copy all operands other than the immediate offset.
196 unsigned OpndIdx = 0;
197 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
198 ++OpndIdx)
199 MIB.add(MBBI->getOperand(OpndIdx));
200
201 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
202 "Unexpected immediate offset in first/last callee-save save/restore "
203 "instruction!");
204 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
205 "Unexpected base register in callee-save save/restore instruction!");
206 assert(CSStackSizeInc % Scale == 0);
207 MIB.addImm(CSStackSizeInc / (int)Scale);
208
209 MIB.setMIFlags(MBBI->getFlags());
210 MIB.setMemRefs(MBBI->memoperands());
211
212 // Generate a new SEH code that corresponds to the new instruction.
213 if (NeedsWinCFI) {
214 HasWinCFI = true;
215 AFL.insertSEH(*MIB, *TII, FrameFlag);
216 }
217
218 if (EmitCFI)
219 CFIInstBuilder(MBB, MBBI, FrameFlag)
220 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
221
222 return std::prev(MBB.erase(MBBI));
223}
224
225// Fix up the SEH opcode associated with the save/restore instruction.
227 unsigned LocalStackSize) {
228 MachineOperand *ImmOpnd = nullptr;
229 unsigned ImmIdx = MBBI->getNumOperands() - 1;
230 switch (MBBI->getOpcode()) {
231 default:
232 llvm_unreachable("Fix the offset in the SEH instruction");
233 case AArch64::SEH_SaveFPLR:
234 case AArch64::SEH_SaveRegP:
235 case AArch64::SEH_SaveReg:
236 case AArch64::SEH_SaveFRegP:
237 case AArch64::SEH_SaveFReg:
238 case AArch64::SEH_SaveAnyRegQP:
239 case AArch64::SEH_SaveAnyRegQPX:
240 ImmOpnd = &MBBI->getOperand(ImmIdx);
241 break;
242 }
243 if (ImmOpnd)
244 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
245}
246
248 MachineInstr &MI, uint64_t LocalStackSize) const {
249 if (AArch64InstrInfo::isSEHInstruction(MI))
250 return;
251
252 unsigned Opc = MI.getOpcode();
253 unsigned Scale;
254 switch (Opc) {
255 case AArch64::STPXi:
256 case AArch64::STRXui:
257 case AArch64::STPDi:
258 case AArch64::STRDui:
259 case AArch64::LDPXi:
260 case AArch64::LDRXui:
261 case AArch64::LDPDi:
262 case AArch64::LDRDui:
263 Scale = 8;
264 break;
265 case AArch64::STPQi:
266 case AArch64::STRQui:
267 case AArch64::LDPQi:
268 case AArch64::LDRQui:
269 Scale = 16;
270 break;
271 default:
272 llvm_unreachable("Unexpected callee-save save/restore opcode!");
273 }
274
275 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
276 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
277 "Unexpected base register in callee-save save/restore instruction!");
278 // Last operand is immediate offset that needs fixing.
279 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
280 // All generated opcodes have scaled offsets.
281 assert(LocalStackSize % Scale == 0);
282 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
283
284 if (NeedsWinCFI) {
285 HasWinCFI = true;
286 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
287 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
288 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
289 "Expecting a SEH instruction");
290 fixupSEHOpcode(MBBI, LocalStackSize);
291 }
292}
293
295 uint64_t StackBumpBytes) const {
296 if (AFL.homogeneousPrologEpilog(MF))
297 return false;
298
299 if (AFI->getLocalStackSize() == 0)
300 return false;
301
302 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
303 // (to force a stp with predecrement) to match the packed unwind format,
304 // provided that there actually are any callee saved registers to merge the
305 // decrement with.
306 // This is potentially marginally slower, but allows using the packed
307 // unwind format for functions that both have a local area and callee saved
308 // registers. Using the packed unwind format notably reduces the size of
309 // the unwind info.
310 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
311 MF.getFunction().hasOptSize())
312 return false;
313
314 // 512 is the maximum immediate for stp/ldp that will be used for
315 // callee-save save/restores
316 if (StackBumpBytes >= 512 ||
317 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
318 return false;
319
320 if (MFI.hasVarSizedObjects())
321 return false;
322
323 if (RegInfo.hasStackRealignment(MF))
324 return false;
325
326 // This isn't strictly necessary, but it simplifies things a bit since the
327 // current RedZone handling code assumes the SP is adjusted by the
328 // callee-save save/restore code.
329 if (AFL.canUseRedZone(MF))
330 return false;
331
332 // When there is an SVE area on the stack, always allocate the
333 // callee-saves and spills/locals separately.
334 if (AFI->hasSVEStackSize())
335 return false;
336
337 return true;
338}
339
344 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
345 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
346 IsFunclet = MBB.isEHFuncletEntry();
347 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
348
349#ifndef NDEBUG
350 collectBlockLiveins();
351#endif
352}
353
354#ifndef NDEBUG
355/// Collect live registers from the end of \p MI's parent up to (including) \p
356/// MI in \p LiveRegs.
359
360 MachineBasicBlock &MBB = *MI.getParent();
361 LiveRegs.addLiveOuts(MBB);
362 for (const MachineInstr &MI :
363 reverse(make_range(MI.getIterator(), MBB.instr_end())))
364 LiveRegs.stepBackward(MI);
365}
366
367void AArch64PrologueEmitter::collectBlockLiveins() {
368 // Collect live register from the end of MBB up to the start of the existing
369 // frame setup instructions.
370 PrologueEndI = MBB.begin();
371 while (PrologueEndI != MBB.end() &&
372 PrologueEndI->getFlag(MachineInstr::FrameSetup))
373 ++PrologueEndI;
374
375 if (PrologueEndI != MBB.end()) {
376 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
377 // Ignore registers used for stack management for now.
378 LiveRegs.removeReg(AArch64::SP);
379 LiveRegs.removeReg(AArch64::X19);
380 LiveRegs.removeReg(AArch64::FP);
381 LiveRegs.removeReg(AArch64::LR);
382
383 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
384 // This is necessary to spill VG if required where SVE is unavailable, but
385 // X0 is preserved around this call.
386 if (requiresGetVGCall())
387 LiveRegs.removeReg(AArch64::X0);
388 }
389}
390
391void AArch64PrologueEmitter::verifyPrologueClobbers() const {
392 if (PrologueEndI == MBB.end())
393 return;
394 // Check if any of the newly instructions clobber any of the live registers.
395 for (MachineInstr &MI :
396 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
397 for (auto &Op : MI.operands())
398 if (Op.isReg() && Op.isDef())
399 assert(!LiveRegs.contains(Op.getReg()) &&
400 "live register clobbered by inserted prologue instructions");
401 }
402}
403#endif
404
405void AArch64PrologueEmitter::determineLocalsStackSize(
406 uint64_t StackSize, uint64_t PrologueSaveSize) {
407 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
408 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
409}
410
411// Return the maximum possible number of bytes for `Size` due to the
412// architectural limit on the size of a SVE register.
413static int64_t upperBound(StackOffset Size) {
414 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
415 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
416}
417
418void AArch64PrologueEmitter::allocateStackSpace(
419 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
420 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
421 bool FollowupAllocs) {
422
423 if (!AllocSize)
424 return;
425
426 DebugLoc DL;
427 const int64_t MaxAlign = MFI.getMaxAlign().value();
428 const uint64_t AndMask = ~(MaxAlign - 1);
429
431 Register TargetReg = RealignmentPadding
432 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
433 : AArch64::SP;
434 // SUB Xd/SP, SP, AllocSize
435 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
437 EmitCFI, InitialOffset);
438
439 if (RealignmentPadding) {
440 // AND SP, X9, 0b11111...0000
441 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
442 .addReg(TargetReg, RegState::Kill)
445 AFI->setStackRealigned(true);
446
447 // No need for SEH instructions here; if we're realigning the stack,
448 // we've set a frame pointer and already finished the SEH prologue.
450 }
451 return;
452 }
453
454 //
455 // Stack probing allocation.
456 //
457
458 // Fixed length allocation. If we don't need to re-align the stack and don't
459 // have SVE objects, we can use a more efficient sequence for stack probing.
460 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
461 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
462 assert(ScratchReg != AArch64::NoRegister);
463 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
464 .addDef(ScratchReg)
465 .addImm(AllocSize.getFixed())
466 .addImm(InitialOffset.getFixed())
467 .addImm(InitialOffset.getScalable());
468 // The fixed allocation may leave unprobed bytes at the top of the
469 // stack. If we have subsequent allocation (e.g. if we have variable-sized
470 // objects), we need to issue an extra probe, so these allocations start in
471 // a known state.
472 if (FollowupAllocs) {
473 // STR XZR, [SP]
474 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
475 .addReg(AArch64::XZR)
476 .addReg(AArch64::SP)
477 .addImm(0)
479 }
480
481 return;
482 }
483
484 // Variable length allocation.
485
486 // If the (unknown) allocation size cannot exceed the probe size, decrement
487 // the stack pointer right away.
488 int64_t ProbeSize = AFI->getStackProbeSize();
489 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
490 Register ScratchReg = RealignmentPadding
491 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
492 : AArch64::SP;
493 assert(ScratchReg != AArch64::NoRegister);
494 // SUB Xd, SP, AllocSize
495 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
497 EmitCFI, InitialOffset);
498 if (RealignmentPadding) {
499 // AND SP, Xn, 0b11111...0000
500 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
501 .addReg(ScratchReg, RegState::Kill)
504 AFI->setStackRealigned(true);
505 }
506 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
508 // STR XZR, [SP]
509 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
510 .addReg(AArch64::XZR)
511 .addReg(AArch64::SP)
512 .addImm(0)
514 }
515 return;
516 }
517
518 // Emit a variable-length allocation probing loop.
519 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
520 // each of them guaranteed to adjust the stack by less than the probe size.
521 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
522 assert(TargetReg != AArch64::NoRegister);
523 // SUB Xd, SP, AllocSize
524 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
526 EmitCFI, InitialOffset);
527 if (RealignmentPadding) {
528 // AND Xn, Xn, 0b11111...0000
529 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
530 .addReg(TargetReg, RegState::Kill)
533 }
534
535 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
536 .addReg(TargetReg);
537 if (EmitCFI) {
538 // Set the CFA register back to SP.
539 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
540 .buildDefCFARegister(AArch64::SP);
541 }
542 if (RealignmentPadding)
543 AFI->setStackRealigned(true);
544}
545
547 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
548 const MachineBasicBlock::iterator EndI = MBB.end();
549
550 // At this point, we're going to decide whether or not the function uses a
551 // redzone. In most cases, the function doesn't have a redzone so let's
552 // assume that's false and set it to true in the case that there's a redzone.
553 AFI->setHasRedZone(false);
554
555 // Debug location must be unknown since the first debug location is used
556 // to determine the end of the prologue.
557 DebugLoc DL;
558
559 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
560 // have a tail-call where the caller only needs to adjust the stack pointer in
561 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
562 // See `seh-minimal-prologue-epilogue.ll` test cases.
563 if (AFI->getArgumentStackToRestore())
564 HasWinCFI = true;
565
566 if (AFI->shouldSignReturnAddress(MF)) {
567 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
568 // are inserted by emitPacRetPlusLeafHardening().
569 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
570 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
572 }
573 // AArch64PointerAuth pass will insert SEH_PACSignLR
575 }
576
577 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
578 emitShadowCallStackPrologue(PrologueBeginI, DL);
580 }
581
582 if (EmitCFI && AFI->isMTETagged())
583 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
585
586 // We signal the presence of a Swift extended frame to external tools by
587 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
588 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
589 // bits so that is still true.
590 if (HasFP && AFI->hasSwiftAsyncContext())
591 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
592
593 // All calls are tail calls in GHC calling conv, and functions have no
594 // prologue/epilogue.
595 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
596 return;
597
598 // Set tagged base pointer to the requested stack slot. Ideally it should
599 // match SP value after prologue.
600 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
601 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
602 else
603 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
604
605 // getStackSize() includes all the locals in its size calculation. We don't
606 // include these locals when computing the stack size of a funclet, as they
607 // are allocated in the parent's stack frame and accessed via the frame
608 // pointer from the funclet. We only save the callee saved registers in the
609 // funclet, which are really the callee saved registers of the parent
610 // function, including the funclet.
611 int64_t NumBytes =
612 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
613 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
614 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
615
616 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
617 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
618
619 // Windows unwind can't represent the required stack adjustments if we have
620 // both SVE callee-saves and dynamic stack allocations, and the frame
621 // pointer is before the SVE spills. The allocation of the frame pointer
622 // must be the last instruction in the prologue so the unwinder can restore
623 // the stack pointer correctly. (And there isn't any unwind opcode for
624 // `addvl sp, x29, -17`.)
625 //
626 // Because of this, we do spills in the opposite order on Windows: first SVE,
627 // then GPRs. The main side-effect of this is that it makes accessing
628 // parameters passed on the stack more expensive.
629 //
630 // We could consider rearranging the spills for simpler cases.
631 bool FPAfterSVECalleeSaves =
632 Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
633
634 if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
635 reportFatalUsageError("SME hazard padding is not supported on Windows");
636
637 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
638 // All of the remaining stack allocations are for locals.
639 determineLocalsStackSize(NumBytes, PrologueSaveSize);
640
641 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
642 if (FPAfterSVECalleeSaves) {
643 // If we're doing SVE saves first, we need to immediately allocate space
644 // for fixed objects, then space for the SVE callee saves.
645 //
646 // Windows unwind requires that the scalable size is a multiple of 16;
647 // that's handled when the callee-saved size is computed.
648 auto SaveSize =
649 StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
650 StackOffset::getFixed(FixedObject);
651 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
652 /*FollowupAllocs=*/true);
653 NumBytes -= FixedObject;
654
655 // Now allocate space for the GPR callee saves.
656 MachineBasicBlock::iterator MBBI = PrologueBeginI;
657 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
658 ++MBBI;
660 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
661 NumBytes -= AFI->getCalleeSavedStackSize();
662 } else if (CombineSPBump) {
663 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
664 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
665 StackOffset::getFixed(-NumBytes), TII,
667 EmitAsyncCFI);
668 NumBytes = 0;
669 } else if (HomPrologEpilog) {
670 // Stack has been already adjusted.
671 NumBytes -= PrologueSaveSize;
672 } else if (PrologueSaveSize != 0) {
674 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
675 NumBytes -= PrologueSaveSize;
676 }
677 assert(NumBytes >= 0 && "Negative stack allocation size!?");
678
679 // Move past the saves of the callee-saved registers, fixing up the offsets
680 // and pre-inc if we decided to combine the callee-save and local stack
681 // pointer bump above.
682 auto &TLI = *Subtarget.getTargetLowering();
683
684 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
685 while (AfterGPRSavesI != EndI &&
686 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
687 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
688 if (CombineSPBump &&
689 // Only fix-up frame-setup load/store instructions.
690 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
691 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
692 AFI->getLocalStackSize());
693 ++AfterGPRSavesI;
694 }
695
696 // For funclets the FP belongs to the containing function. Only set up FP if
697 // we actually need to.
698 if (!IsFunclet && HasFP)
699 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
700
701 // Now emit the moves for whatever callee saved regs we have (including FP,
702 // LR if those are saved). Frame instructions for SVE register are emitted
703 // later, after the instruction which actually save SVE regs.
704 if (EmitAsyncCFI)
705 emitCalleeSavedGPRLocations(AfterGPRSavesI);
706
707 // Alignment is required for the parent frame, not the funclet
708 const bool NeedsRealignment =
709 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
710 const int64_t RealignmentPadding =
711 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
712 ? MFI.getMaxAlign().value() - 16
713 : 0;
714
715 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
716 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
717
718 StackOffset PPRCalleeSavesSize =
719 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
720 StackOffset ZPRCalleeSavesSize =
721 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
722 StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
723 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
724 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
725
726 std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
727 ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
728
729 StackOffset CFAOffset =
730 StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
731 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
732 if (!FPAfterSVECalleeSaves) {
733 // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
734 // areas.
735 PPRCalleeSavesBegin = AfterGPRSavesI;
736 if (PPRCalleeSavesSize) {
737 LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
738 << PPRCalleeSavesSize.getScalable() << "\n");
739
740 assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
741 "Unexpected instruction");
742 while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
743 AfterSVESavesI != MBB.getFirstTerminator())
744 ++AfterSVESavesI;
745 }
746 PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
747 if (ZPRCalleeSavesSize) {
748 LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
749 << ZPRCalleeSavesSize.getScalable() << "\n");
750 assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
751 "Unexpected instruction");
752 while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
753 AfterSVESavesI != MBB.getFirstTerminator())
754 ++AfterSVESavesI;
755 }
756 ZPRCalleeSavesEnd = AfterSVESavesI;
757 }
758
759 if (EmitAsyncCFI)
760 emitCalleeSavedSVELocations(AfterSVESavesI);
761
762 if (AFI->hasSplitSVEObjects()) {
763 assert(!FPAfterSVECalleeSaves &&
764 "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects");
765 assert(!AFL.canUseRedZone(MF) &&
766 "Cannot use redzone with aarch64-split-sve-objects");
767 // TODO: Handle HasWinCFI/NeedsWinCFI?
769 "WinCFI with aarch64-split-sve-objects is not supported");
770
771 // Split ZPR and PPR allocation.
772 // Allocate PPR callee saves
773 allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize,
774 EmitAsyncCFI && !HasFP, CFAOffset,
775 MFI.hasVarSizedObjects() || ZPRCalleeSavesSize ||
776 ZPRLocalsSize || PPRLocalsSize);
777 CFAOffset += PPRCalleeSavesSize;
778
779 // Allocate PPR locals + ZPR callee saves
780 assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin &&
781 "Expected ZPR callee saves after PPR locals");
782 allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding,
783 PPRLocalsSize + ZPRCalleeSavesSize,
784 EmitAsyncCFI && !HasFP, CFAOffset,
785 MFI.hasVarSizedObjects() || ZPRLocalsSize);
786 CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize;
787
788 // Allocate ZPR locals
789 allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding,
790 ZPRLocalsSize + StackOffset::getFixed(NumBytes),
791 EmitAsyncCFI && !HasFP, CFAOffset,
792 MFI.hasVarSizedObjects());
793 } else {
794 // Allocate space for the callee saves (if any).
795 StackOffset LocalsSize =
796 PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
797 if (!FPAfterSVECalleeSaves)
798 allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
799 EmitAsyncCFI && !HasFP, CFAOffset,
800 MFI.hasVarSizedObjects() || LocalsSize);
801 CFAOffset += SVECalleeSavesSize;
802
803 // Allocate space for the rest of the frame including SVE locals. Align the
804 // stack as necessary.
805 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
806 "Cannot use redzone with stack realignment");
807 if (!AFL.canUseRedZone(MF)) {
808 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
809 // the correct value here, as NumBytes also includes padding bytes,
810 // which shouldn't be counted here.
811 StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
812 allocateStackSpace(AfterSVESavesI, RealignmentPadding,
813 SVELocalsSize + StackOffset::getFixed(NumBytes),
814 EmitAsyncCFI && !HasFP, CFAOffset,
815 MFI.hasVarSizedObjects());
816 }
817 }
818
819 // If we need a base pointer, set it up here. It's whatever the value of the
820 // stack pointer is at this point. Any variable size objects will be allocated
821 // after this, so we can still use the base pointer to reference locals.
822 //
823 // FIXME: Clarify FrameSetup flags here.
824 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
825 // needed.
826 // For funclets the BP belongs to the containing function.
827 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
828 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
829 AArch64::SP, false);
830 if (NeedsWinCFI) {
831 HasWinCFI = true;
832 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
834 }
835 }
836
837 // The very last FrameSetup instruction indicates the end of prologue. Emit a
838 // SEH opcode indicating the prologue end.
839 if (NeedsWinCFI && HasWinCFI) {
840 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
842 }
843
844 // SEH funclets are passed the frame pointer in X1. If the parent
845 // function uses the base register, then the base register is used
846 // directly, and is not retrieved from X1.
847 if (IsFunclet && F.hasPersonalityFn()) {
848 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
850 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
851 AArch64::FP)
852 .addReg(AArch64::X1)
854 MBB.addLiveIn(AArch64::X1);
855 }
856 }
857
858 if (EmitCFI && !EmitAsyncCFI) {
859 if (HasFP) {
860 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
861 } else {
862 StackOffset TotalSize =
863 AFL.getSVEStackSize(MF) +
864 StackOffset::getFixed((int64_t)MFI.getStackSize());
865 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
866 CFIBuilder.insertCFIInst(
867 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
868 TotalSize, /*LastAdjustmentWasScalable=*/false));
869 }
870 emitCalleeSavedGPRLocations(AfterSVESavesI);
871 emitCalleeSavedSVELocations(AfterSVESavesI);
872 }
873}
874
875void AArch64PrologueEmitter::emitShadowCallStackPrologue(
877 // Shadow call stack prolog: str x30, [x18], #8
878 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
879 .addReg(AArch64::X18, RegState::Define)
880 .addReg(AArch64::LR)
881 .addReg(AArch64::X18)
882 .addImm(8)
884
885 // This instruction also makes x18 live-in to the entry block.
886 MBB.addLiveIn(AArch64::X18);
887
888 if (NeedsWinCFI)
889 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
891
892 if (EmitCFI) {
893 // Emit a CFI instruction that causes 8 to be subtracted from the value of
894 // x18 when unwinding past this frame.
895 static const char CFIInst[] = {
896 dwarf::DW_CFA_val_expression,
897 18, // register
898 2, // length
899 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
900 static_cast<char>(-8) & 0x7f, // addend (sleb128)
901 };
902 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
903 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
904 }
905}
906
907void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
909 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
911 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
912 // The special symbol below is absolute and has a *value* that can be
913 // combined with the frame pointer to signal an extended frame.
914 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
915 .addExternalSymbol("swift_async_extendedFramePointerFlags",
917 if (NeedsWinCFI) {
918 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
920 HasWinCFI = true;
921 }
922 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
923 .addUse(AArch64::FP)
924 .addUse(AArch64::X16)
925 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
926 if (NeedsWinCFI) {
927 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
929 HasWinCFI = true;
930 }
931 break;
932 }
933 [[fallthrough]];
934
936 // ORR x29, x29, #0x1000_0000_0000_0000
937 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
938 .addUse(AArch64::FP)
939 .addImm(0x1100)
941 if (NeedsWinCFI) {
942 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
944 HasWinCFI = true;
945 }
946 break;
947
949 break;
950 }
951}
952
953void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
954 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
955 const DebugLoc &DL) const {
956 assert(!HasFP && "unexpected function without stack frame but with FP");
957 assert(!AFL.getSVEStackSize(MF) &&
958 "unexpected function without stack frame but with SVE objects");
959 // All of the stack allocation is for locals.
960 AFI->setLocalStackSize(NumBytes);
961 if (!NumBytes) {
962 if (NeedsWinCFI && HasWinCFI) {
963 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
965 }
966 return;
967 }
968 // REDZONE: If the stack size is less than 128 bytes, we don't need
969 // to actually allocate.
970 if (AFL.canUseRedZone(MF)) {
971 AFI->setHasRedZone(true);
972 ++NumRedZoneFunctions;
973 } else {
974 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
975 StackOffset::getFixed(-NumBytes), TII,
977 if (EmitCFI) {
978 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
979 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
980 // Encode the stack size of the leaf function.
981 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
982 .buildDefCFAOffset(NumBytes, FrameLabel);
983 }
984 }
985
986 if (NeedsWinCFI) {
987 HasWinCFI = true;
988 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
990 }
991}
992
993void AArch64PrologueEmitter::emitFramePointerSetup(
995 unsigned FixedObject) {
996 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
997 if (CombineSPBump)
998 FPOffset += AFI->getLocalStackSize();
999
1000 if (AFI->hasSwiftAsyncContext()) {
1001 // Before we update the live FP we have to ensure there's a valid (or
1002 // null) asynchronous context in its slot just before FP in the frame
1003 // record, so store it now.
1004 const auto &Attrs = MF.getFunction().getAttributes();
1005 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1006 if (HaveInitialContext)
1007 MBB.addLiveIn(AArch64::X22);
1008 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1009 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1010 .addUse(Reg)
1011 .addUse(AArch64::SP)
1012 .addImm(FPOffset - 8)
1014 if (NeedsWinCFI) {
1015 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1016 // to multiple instructions, should be mutually-exclusive.
1017 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1018 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1020 HasWinCFI = true;
1021 }
1022 }
1023
1024 if (HomPrologEpilog) {
1025 auto Prolog = MBBI;
1026 --Prolog;
1027 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1028 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1029 } else {
1030 // Issue sub fp, sp, FPOffset or
1031 // mov fp,sp when FPOffset is zero.
1032 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1033 // This code marks the instruction(s) that set the FP also.
1034 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1035 StackOffset::getFixed(FPOffset), TII,
1037 if (NeedsWinCFI && HasWinCFI) {
1038 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1040 // After setting up the FP, the rest of the prolog doesn't need to be
1041 // included in the SEH unwind info.
1042 NeedsWinCFI = false;
1043 }
1044 }
1045 if (EmitAsyncCFI)
1046 emitDefineCFAWithFP(MBBI, FixedObject);
1047}
1048
1049// Define the current CFA rule to use the provided FP.
1050void AArch64PrologueEmitter::emitDefineCFAWithFP(
1051 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1052 const int OffsetToFirstCalleeSaveFromFP =
1053 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1054 AFI->getCalleeSavedStackSize();
1055 Register FramePtr = RegInfo.getFrameRegister(MF);
1056 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1057 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1058}
1059
1060void AArch64PrologueEmitter::emitWindowsStackProbe(
1061 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1062 int64_t RealignmentPadding) const {
1063 if (AFI->getSVECalleeSavedStackSize())
1064 report_fatal_error("SVE callee saves not yet supported with stack probing");
1065
1066 // Find an available register to spill the value of X15 to, if X15 is being
1067 // used already for nest.
1068 unsigned X15Scratch = AArch64::NoRegister;
1069 if (llvm::any_of(MBB.liveins(),
1070 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1071 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1072 LiveIn.PhysReg);
1073 })) {
1074 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1075 assert(X15Scratch != AArch64::NoRegister &&
1076 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1077#ifndef NDEBUG
1078 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1079#endif
1080 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1081 .addReg(AArch64::XZR)
1082 .addReg(AArch64::X15, RegState::Undef)
1083 .addReg(AArch64::X15, RegState::Implicit)
1085 }
1086
1087 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1088 if (NeedsWinCFI) {
1089 HasWinCFI = true;
1090 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1091 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1092 // This is at most two instructions, MOVZ followed by MOVK.
1093 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1094 // exceeding 256MB in size.
1095 if (NumBytes >= (1 << 28))
1096 report_fatal_error("Stack size cannot exceed 256MB for stack "
1097 "unwinding purposes");
1098
1099 uint32_t LowNumWords = NumWords & 0xFFFF;
1100 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1101 .addImm(LowNumWords)
1104 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1106 if ((NumWords & 0xFFFF0000) != 0) {
1107 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1108 .addReg(AArch64::X15)
1109 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1112 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1114 }
1115 } else {
1116 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1117 .addImm(NumWords)
1119 }
1120
1121 const char *ChkStk = Subtarget.getChkStkName();
1122 switch (MF.getTarget().getCodeModel()) {
1123 case CodeModel::Tiny:
1124 case CodeModel::Small:
1125 case CodeModel::Medium:
1126 case CodeModel::Kernel:
1127 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1128 .addExternalSymbol(ChkStk)
1129 .addReg(AArch64::X15, RegState::Implicit)
1130 .addReg(AArch64::X16,
1132 .addReg(AArch64::X17,
1134 .addReg(AArch64::NZCV,
1137 if (NeedsWinCFI) {
1138 HasWinCFI = true;
1139 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1141 }
1142 break;
1143 case CodeModel::Large:
1144 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1145 .addReg(AArch64::X16, RegState::Define)
1146 .addExternalSymbol(ChkStk)
1147 .addExternalSymbol(ChkStk)
1149 if (NeedsWinCFI) {
1150 HasWinCFI = true;
1151 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1153 }
1154
1156 .addReg(AArch64::X16, RegState::Kill)
1158 .addReg(AArch64::X16,
1160 .addReg(AArch64::X17,
1162 .addReg(AArch64::NZCV,
1165 if (NeedsWinCFI) {
1166 HasWinCFI = true;
1167 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1169 }
1170 break;
1171 }
1172
1173 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1174 .addReg(AArch64::SP, RegState::Kill)
1175 .addReg(AArch64::X15, RegState::Kill)
1178 if (NeedsWinCFI) {
1179 HasWinCFI = true;
1180 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1181 .addImm(NumBytes)
1183 }
1184 NumBytes = 0;
1185
1186 if (RealignmentPadding > 0) {
1187 if (RealignmentPadding >= 4096) {
1188 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1189 .addReg(AArch64::X16, RegState::Define)
1190 .addImm(RealignmentPadding)
1192 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1193 .addReg(AArch64::SP)
1194 .addReg(AArch64::X16, RegState::Kill)
1197 } else {
1198 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1199 .addReg(AArch64::SP)
1200 .addImm(RealignmentPadding)
1201 .addImm(0)
1203 }
1204
1205 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1206 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1207 .addReg(AArch64::X15, RegState::Kill)
1209 AFI->setStackRealigned(true);
1210
1211 // No need for SEH instructions here; if we're realigning the stack,
1212 // we've set a frame pointer and already finished the SEH prologue.
1214 }
1215 if (X15Scratch != AArch64::NoRegister) {
1216 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1217 .addReg(AArch64::XZR)
1218 .addReg(X15Scratch, RegState::Undef)
1219 .addReg(X15Scratch, RegState::Implicit)
1221 }
1222}
1223
1224void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1226 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1227 if (CSI.empty())
1228 return;
1229
1230 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1231 for (const auto &Info : CSI) {
1232 unsigned FrameIdx = Info.getFrameIdx();
1233 if (MFI.hasScalableStackID(FrameIdx))
1234 continue;
1235
1236 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1237 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1238 CFIBuilder.buildOffset(Info.getReg(), Offset);
1239 }
1240}
1241
1242void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1244 // Add callee saved registers to move list.
1245 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1246 if (CSI.empty())
1247 return;
1248
1249 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1250
1251 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1252 if (AFL.requiresSaveVG(MF)) {
1253 auto IncomingVG = *find_if(
1254 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1255 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1256 AFL.getOffsetOfLocalArea();
1257 }
1258
1259 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1260 for (const auto &Info : CSI) {
1261 int FI = Info.getFrameIdx();
1262 if (!MFI.hasScalableStackID(FI))
1263 continue;
1264
1265 // Not all unwinders may know about SVE registers, so assume the lowest
1266 // common denominator.
1267 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1268 MCRegister Reg = Info.getReg();
1269 if (!RegInfo.regNeedsCFI(Reg, Reg))
1270 continue;
1271
1272 StackOffset Offset =
1273 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1274 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1275
1276 if (AFI->hasSplitSVEObjects() &&
1277 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1278 Offset -= PPRStackSize;
1279
1280 CFIBuilder.insertCFIInst(
1281 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1282 }
1283}
1284
1286 switch (MI.getOpcode()) {
1287 default:
1288 return false;
1289 case AArch64::CATCHRET:
1290 case AArch64::CLEANUPRET:
1291 return true;
1292 }
1293}
1294
1299 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1300 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1301 SEHEpilogueStartI = MBB.end();
1302}
1303
1305 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1306 if (MBB.end() != EpilogueEndI) {
1307 DL = EpilogueEndI->getDebugLoc();
1308 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1309 }
1310
1311 int64_t NumBytes =
1312 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1313
1314 // All calls are tail calls in GHC calling conv, and functions have no
1315 // prologue/epilogue.
1316 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1317 return;
1318
1319 // How much of the stack used by incoming arguments this function is expected
1320 // to restore in this particular epilogue.
1321 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1322 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1323 MF.getFunction().isVarArg());
1324 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1325
1326 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1327 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1328 // We cannot rely on the local stack size set in emitPrologue if the function
1329 // has funclets, as funclets have different local stack size requirements, and
1330 // the current value set in emitPrologue may be that of the containing
1331 // function.
1332 if (MF.hasEHFunclets())
1333 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1334
1335 if (HomPrologEpilog) {
1337 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1338 if (FirstHomogenousEpilogI != MBB.begin()) {
1339 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1340 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1341 FirstHomogenousEpilogI = HomogeneousEpilog;
1342 }
1343
1344 // Adjust local stack
1345 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1346 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1348
1349 // SP has been already adjusted while restoring callee save regs.
1350 // We've bailed-out the case with adjusting SP for arguments.
1351 assert(AfterCSRPopSize == 0);
1352 return;
1353 }
1354
1355 bool FPAfterSVECalleeSaves =
1356 Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
1357
1358 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1359 // Assume we can't combine the last pop with the sp restore.
1360 bool CombineAfterCSRBump = false;
1361 if (FPAfterSVECalleeSaves) {
1362 AfterCSRPopSize += FixedObject;
1363 } else if (!CombineSPBump && PrologueSaveSize != 0) {
1364 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1365 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1366 AArch64InstrInfo::isSEHInstruction(*Pop))
1367 Pop = std::prev(Pop);
1368 // Converting the last ldp to a post-index ldp is valid only if the last
1369 // ldp's offset is 0.
1370 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1371 // If the offset is 0 and the AfterCSR pop is not actually trying to
1372 // allocate more stack for arguments (in space that an untimely interrupt
1373 // may clobber), convert it to a post-index ldp.
1374 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1376 Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
1377 PrologueSaveSize);
1378 } else {
1379 // If not, make sure to emit an add after the last ldp.
1380 // We're doing this by transferring the size to be restored from the
1381 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1382 // pops.
1383 AfterCSRPopSize += PrologueSaveSize;
1384 CombineAfterCSRBump = true;
1385 }
1386 }
1387
1388 // Move past the restores of the callee-saved registers.
1389 // If we plan on combining the sp bump of the local stack size and the callee
1390 // save stack size, we might need to adjust the CSR save and restore offsets.
1391 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1392 MachineBasicBlock::iterator Begin = MBB.begin();
1393 while (FirstGPRRestoreI != Begin) {
1394 --FirstGPRRestoreI;
1395 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1396 (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1397 ++FirstGPRRestoreI;
1398 break;
1399 } else if (CombineSPBump)
1400 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1401 AFI->getLocalStackSize());
1402 }
1403
1404 if (NeedsWinCFI) {
1405 // Note that there are cases where we insert SEH opcodes in the
1406 // epilogue when we had no SEH opcodes in the prologue. For
1407 // example, when there is no stack frame but there are stack
1408 // arguments. Insert the SEH_EpilogStart and remove it later if it
1409 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1410 // functions that don't need it.
1411 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1413 SEHEpilogueStartI = FirstGPRRestoreI;
1414 --SEHEpilogueStartI;
1415 }
1416
1417 if (HasFP && AFI->hasSwiftAsyncContext())
1418 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1419
1420 StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
1421 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1422 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1423
1424 // If there is a single SP update, insert it before the ret and we're done.
1425 if (CombineSPBump) {
1426 assert(!SVEStackSize && "Cannot combine SP bump with SVE");
1427
1428 // When we are about to restore the CSRs, the CFA register is SP again.
1429 if (EmitCFI && HasFP)
1431 .buildDefCFA(AArch64::SP, NumBytes);
1432
1433 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1434 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1436 EmitCFI, StackOffset::getFixed(NumBytes));
1437 return;
1438 }
1439
1440 NumBytes -= PrologueSaveSize;
1441 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1442
1443 if (!AFI->hasSplitSVEObjects()) {
1444 // Process the SVE callee-saves to determine what space needs to be
1445 // deallocated.
1446 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1447 MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1448 RestoreEnd = FirstGPRRestoreI;
1449 int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1450 int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1451 int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1452
1453 if (SVECalleeSavedSize) {
1454 if (FPAfterSVECalleeSaves)
1455 RestoreEnd = MBB.getFirstTerminator();
1456
1457 RestoreBegin = std::prev(RestoreEnd);
1458 while (RestoreBegin != MBB.begin() &&
1459 isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1460 --RestoreBegin;
1461
1462 assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1463 isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1464 "Unexpected instruction");
1465
1466 StackOffset CalleeSavedSizeAsOffset =
1467 StackOffset::getScalable(SVECalleeSavedSize);
1468 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1469 DeallocateAfter = CalleeSavedSizeAsOffset;
1470 }
1471
1472 // Deallocate the SVE area.
1473 if (FPAfterSVECalleeSaves) {
1474 // If the callee-save area is before FP, restoring the FP implicitly
1475 // deallocates non-callee-save SVE allocations. Otherwise, deallocate
1476 // them explicitly.
1477 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1478 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1479 DeallocateBefore, TII, MachineInstr::FrameDestroy,
1480 false, NeedsWinCFI, &HasWinCFI);
1481 }
1482
1483 // Deallocate callee-save non-SVE registers.
1484 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1485 StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
1487 &HasWinCFI);
1488
1489 // Deallocate fixed objects.
1490 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1491 StackOffset::getFixed(FixedObject), TII,
1493 &HasWinCFI);
1494
1495 // Deallocate callee-save SVE registers.
1496 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1497 DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1499 } else if (SVEStackSize) {
1500 int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1501 // If we have stack realignment or variable-sized objects we must use the
1502 // FP to restore SVE callee saves (as there is an unknown amount of
1503 // data/padding between the SP and SVE CS area).
1504 Register BaseForSVEDealloc =
1505 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1506 : AArch64::SP;
1507 if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1508 Register CalleeSaveBase = AArch64::FP;
1509 if (int64_t CalleeSaveBaseOffset =
1510 AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1511 // If we have have an non-zero offset to the non-SVE CS base we need
1512 // to compute the base address by subtracting the offest in a
1513 // temporary register first (to avoid briefly deallocating the SVE
1514 // CS).
1515 CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
1516 &AArch64::GPR64RegClass);
1517 emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1518 StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1520 }
1521 // The code below will deallocate the stack space space by moving the
1522 // SP to the start of the SVE callee-save area.
1523 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1524 StackOffset::getScalable(-SVECalleeSavedSize), TII,
1526 } else if (BaseForSVEDealloc == AArch64::SP) {
1527 if (SVECalleeSavedSize) {
1528 // Deallocate the non-SVE locals first before we can deallocate (and
1529 // restore callee saves) from the SVE area.
1530 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1531 StackOffset::getFixed(NumBytes), TII,
1533 &HasWinCFI, EmitCFI && !HasFP,
1534 SVEStackSize + StackOffset::getFixed(
1535 NumBytes + PrologueSaveSize));
1536 NumBytes = 0;
1537 }
1538
1539 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1540 DeallocateBefore, TII, MachineInstr::FrameDestroy,
1541 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1542 SVEStackSize +
1543 StackOffset::getFixed(NumBytes + PrologueSaveSize));
1544
1545 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1546 DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1548 DeallocateAfter +
1549 StackOffset::getFixed(NumBytes + PrologueSaveSize));
1550 }
1551
1552 if (EmitCFI)
1553 emitCalleeSavedSVERestores(RestoreEnd);
1554 }
1555 } else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
1556 // TODO: Support stack realigment and variable-sized objects.
1557 assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() &&
1558 "unexpected stack realignment or variable sized objects with split "
1559 "SVE stack objects");
1560 // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR
1561 // areas.
1562 auto ZPRCalleeSavedSize =
1563 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
1564 auto PPRCalleeSavedSize =
1565 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
1566 StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize;
1567 StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize;
1568
1569 MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI,
1570 PPRRestoreEnd = FirstGPRRestoreI;
1571 if (PPRCalleeSavedSize) {
1572 PPRRestoreBegin = std::prev(PPRRestoreEnd);
1573 while (PPRRestoreBegin != MBB.begin() &&
1574 isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin)))
1575 --PPRRestoreBegin;
1576 }
1577
1578 MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin,
1579 ZPRRestoreEnd = PPRRestoreBegin;
1580 if (ZPRCalleeSavedSize) {
1581 ZPRRestoreBegin = std::prev(ZPRRestoreEnd);
1582 while (ZPRRestoreBegin != MBB.begin() &&
1583 isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin)))
1584 --ZPRRestoreBegin;
1585 }
1586
1587 auto CFAOffset =
1588 SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
1589 if (PPRCalleeSavedSize || ZPRCalleeSavedSize) {
1590 // Deallocate the non-SVE locals first before we can deallocate (and
1591 // restore callee saves) from the SVE area.
1592 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1593 emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
1594 NonSVELocals, TII, MachineInstr::FrameDestroy, false,
1595 false, nullptr, EmitCFI && !HasFP, CFAOffset);
1596 NumBytes = 0;
1597 CFAOffset -= NonSVELocals;
1598 }
1599
1600 if (ZPRLocalsSize) {
1601 emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
1602 ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false,
1603 false, nullptr, EmitCFI && !HasFP, CFAOffset);
1604 CFAOffset -= ZPRLocalsSize;
1605 }
1606
1607 if (PPRLocalsSize || ZPRCalleeSavedSize) {
1608 assert(PPRRestoreBegin == ZPRRestoreEnd &&
1609 "Expected PPR restores after ZPR");
1610 emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
1611 PPRLocalsSize + ZPRCalleeSavedSize, TII,
1612 MachineInstr::FrameDestroy, false, false, nullptr,
1613 EmitCFI && !HasFP, CFAOffset);
1614 CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize;
1615 }
1616 if (PPRCalleeSavedSize) {
1617 emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP,
1618 PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy,
1619 false, false, nullptr, EmitCFI && !HasFP, CFAOffset);
1620 }
1621
1622 // We only emit CFI information for ZPRs so emit CFI after the ZPR restores.
1623 if (EmitCFI)
1624 emitCalleeSavedSVERestores(ZPRRestoreEnd);
1625 }
1626
1627 if (!HasFP) {
1628 bool RedZone = AFL.canUseRedZone(MF);
1629 // If this was a redzone leaf function, we don't need to restore the
1630 // stack pointer (but we may need to pop stack args for fastcc).
1631 if (RedZone && AfterCSRPopSize == 0)
1632 return;
1633
1634 // Pop the local variables off the stack. If there are no callee-saved
1635 // registers, it means we are actually positioned at the terminator and can
1636 // combine stack increment for the locals and the stack increment for
1637 // callee-popped arguments into (possibly) a single instruction and be done.
1638 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1639 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1640 if (NoCalleeSaveRestore)
1641 StackRestoreBytes += AfterCSRPopSize;
1642
1644 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1645 StackOffset::getFixed(StackRestoreBytes), TII,
1647 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1648
1649 // If we were able to combine the local stack pop with the argument pop,
1650 // then we're done.
1651 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1652 return;
1653
1654 NumBytes = 0;
1655 }
1656
1657 // Restore the original stack pointer.
1658 // FIXME: Rather than doing the math here, we should instead just use
1659 // non-post-indexed loads for the restores if we aren't actually going to
1660 // be able to save any instructions.
1661 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1663 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1664 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1666 } else if (NumBytes)
1667 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1668 StackOffset::getFixed(NumBytes), TII,
1670
1671 // When we are about to restore the CSRs, the CFA register is SP again.
1672 if (EmitCFI && HasFP)
1674 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1675
1676 // This must be placed after the callee-save restore code because that code
1677 // assumes the SP is at the same location as it was after the callee-save save
1678 // code in the prologue.
1679 if (AfterCSRPopSize) {
1680 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1681 "interrupt may have clobbered");
1682
1684 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1686 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1687 StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
1688 }
1689}
1690
1691bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1692 uint64_t StackBumpBytes) const {
1694 StackBumpBytes))
1695 return false;
1696 if (MBB.empty())
1697 return true;
1698
1699 // Disable combined SP bump if the last instruction is an MTE tag store. It
1700 // is almost always better to merge SP adjustment into those instructions.
1701 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1702 MachineBasicBlock::iterator Begin = MBB.begin();
1703 while (LastI != Begin) {
1704 --LastI;
1705 if (LastI->isTransient())
1706 continue;
1707 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1708 break;
1709 }
1710 switch (LastI->getOpcode()) {
1711 case AArch64::STGloop:
1712 case AArch64::STZGloop:
1713 case AArch64::STGi:
1714 case AArch64::STZGi:
1715 case AArch64::ST2Gi:
1716 case AArch64::STZ2Gi:
1717 return false;
1718 default:
1719 return true;
1720 }
1721 llvm_unreachable("unreachable");
1722}
1723
1724void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1726 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1728 // Avoid the reload as it is GOT relative, and instead fall back to the
1729 // hardcoded value below. This allows a mismatch between the OS and
1730 // application without immediately terminating on the difference.
1731 [[fallthrough]];
1733 // We need to reset FP to its untagged state on return. Bit 60 is
1734 // currently used to show the presence of an extended frame.
1735
1736 // BIC x29, x29, #0x1000_0000_0000_0000
1737 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1738 AArch64::FP)
1739 .addUse(AArch64::FP)
1740 .addImm(0x10fe)
1742 if (NeedsWinCFI) {
1743 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1745 HasWinCFI = true;
1746 }
1747 break;
1748
1750 break;
1751 }
1752}
1753
1754void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1756 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1757 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1758 .addReg(AArch64::X18, RegState::Define)
1759 .addReg(AArch64::LR, RegState::Define)
1760 .addReg(AArch64::X18)
1761 .addImm(-8)
1763
1764 if (NeedsWinCFI)
1765 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1767
1768 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1769 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1770 .buildRestore(AArch64::X18);
1771}
1772
1773void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1774 MachineBasicBlock::iterator MBBI, bool SVE) const {
1775 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1776 if (CSI.empty())
1777 return;
1778
1779 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1780
1781 for (const auto &Info : CSI) {
1782 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1783 continue;
1784
1785 MCRegister Reg = Info.getReg();
1786 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1787 continue;
1788
1789 CFIBuilder.buildRestore(Info.getReg());
1790 }
1791}
1792
1793void AArch64EpilogueEmitter::finalizeEpilogue() const {
1794 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1795 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1797 }
1798 if (EmitCFI)
1799 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1800 if (AFI->shouldSignReturnAddress(MF)) {
1801 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1802 // are inserted by emitPacRetPlusLeafHardening().
1803 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1804 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1805 TII->get(AArch64::PAUTH_EPILOGUE))
1807 }
1808 // AArch64PointerAuth pass will insert SEH_PACSignLR
1810 }
1811 if (HasWinCFI) {
1812 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1814 if (!MF.hasWinCFI())
1815 MF.setHasWinCFI(true);
1816 }
1817 if (NeedsWinCFI) {
1818 assert(SEHEpilogueStartI != MBB.end());
1819 if (!HasWinCFI)
1820 MBB.erase(SEHEpilogueStartI);
1821 }
1822}
1823
1824} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:124
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
Wrapper class representing virtual and physical registers.
Definition Register.h:19
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77