LLVM 22.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(MachineInstr::FrameSetup) ||
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
86
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 // Windows unwind can't represent the required stack adjustments if we have
100 // both SVE callee-saves and dynamic stack allocations, and the frame pointer
101 // is before the SVE spills. The allocation of the frame pointer must be the
102 // last instruction in the prologue so the unwinder can restore the stack
103 // pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29,
104 // -17`.)
105 //
106 // Because of this, we do spills in the opposite order on Windows: first SVE,
107 // then GPRs. The main side-effect of this is that it makes accessing
108 // parameters passed on the stack more expensive.
109 //
110 // We could consider rearranging the spills for simpler cases.
111 if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) {
112 if (AFI->hasStackHazardSlotIndex())
113 reportFatalUsageError("SME hazard padding is not supported on Windows");
114 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
115 } else if (AFI->hasSplitSVEObjects()) {
116 SVELayout = SVEStackLayout::Split;
117 }
118}
119
122 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
123 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
124 unsigned NewOpc;
125
126 // If the function contains streaming mode changes, we expect instructions
127 // to calculate the value of VG before spilling. Move past these instructions
128 // if necessary.
129 if (AFL.requiresSaveVG(MF)) {
130 auto &TLI = *Subtarget.getTargetLowering();
131 while (isVGInstruction(MBBI, TLI))
132 ++MBBI;
133 }
134
135 switch (MBBI->getOpcode()) {
136 default:
137 llvm_unreachable("Unexpected callee-save save/restore opcode!");
138 case AArch64::STPXi:
139 NewOpc = AArch64::STPXpre;
140 break;
141 case AArch64::STPDi:
142 NewOpc = AArch64::STPDpre;
143 break;
144 case AArch64::STPQi:
145 NewOpc = AArch64::STPQpre;
146 break;
147 case AArch64::STRXui:
148 NewOpc = AArch64::STRXpre;
149 break;
150 case AArch64::STRDui:
151 NewOpc = AArch64::STRDpre;
152 break;
153 case AArch64::STRQui:
154 NewOpc = AArch64::STRQpre;
155 break;
156 case AArch64::LDPXi:
157 NewOpc = AArch64::LDPXpost;
158 break;
159 case AArch64::LDPDi:
160 NewOpc = AArch64::LDPDpost;
161 break;
162 case AArch64::LDPQi:
163 NewOpc = AArch64::LDPQpost;
164 break;
165 case AArch64::LDRXui:
166 NewOpc = AArch64::LDRXpost;
167 break;
168 case AArch64::LDRDui:
169 NewOpc = AArch64::LDRDpost;
170 break;
171 case AArch64::LDRQui:
172 NewOpc = AArch64::LDRQpost;
173 break;
174 }
175 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
176 int64_t MinOffset, MaxOffset;
177 bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
178 NewOpc, Scale, Width, MinOffset, MaxOffset);
179 (void)Success;
180 assert(Success && "unknown load/store opcode");
181
182 // If the first store isn't right where we want SP then we can't fold the
183 // update in so create a normal arithmetic instruction instead.
184 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
185 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
186 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
187 // If we are destroying the frame, make sure we add the increment after the
188 // last frame operation.
189 if (FrameFlag == MachineInstr::FrameDestroy) {
190 ++MBBI;
191 // Also skip the SEH instruction, if needed
192 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
193 ++MBBI;
194 }
195 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
196 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
197 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
198 StackOffset::getFixed(CFAOffset));
199
200 return std::prev(MBBI);
201 }
202
203 // Get rid of the SEH code associated with the old instruction.
204 if (NeedsWinCFI) {
205 auto SEH = std::next(MBBI);
206 if (AArch64InstrInfo::isSEHInstruction(*SEH))
207 SEH->eraseFromParent();
208 }
209
210 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
211 MIB.addReg(AArch64::SP, RegState::Define);
212
213 // Copy all operands other than the immediate offset.
214 unsigned OpndIdx = 0;
215 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
216 ++OpndIdx)
217 MIB.add(MBBI->getOperand(OpndIdx));
218
219 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
220 "Unexpected immediate offset in first/last callee-save save/restore "
221 "instruction!");
222 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
223 "Unexpected base register in callee-save save/restore instruction!");
224 assert(CSStackSizeInc % Scale == 0);
225 MIB.addImm(CSStackSizeInc / (int)Scale);
226
227 MIB.setMIFlags(MBBI->getFlags());
228 MIB.setMemRefs(MBBI->memoperands());
229
230 // Generate a new SEH code that corresponds to the new instruction.
231 if (NeedsWinCFI) {
232 HasWinCFI = true;
233 AFL.insertSEH(*MIB, *TII, FrameFlag);
234 }
235
236 if (EmitCFI)
237 CFIInstBuilder(MBB, MBBI, FrameFlag)
238 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
239
240 return std::prev(MBB.erase(MBBI));
241}
242
243// Fix up the SEH opcode associated with the save/restore instruction.
245 unsigned LocalStackSize) {
246 MachineOperand *ImmOpnd = nullptr;
247 unsigned ImmIdx = MBBI->getNumOperands() - 1;
248 switch (MBBI->getOpcode()) {
249 default:
250 llvm_unreachable("Fix the offset in the SEH instruction");
251 case AArch64::SEH_SaveFPLR:
252 case AArch64::SEH_SaveRegP:
253 case AArch64::SEH_SaveReg:
254 case AArch64::SEH_SaveFRegP:
255 case AArch64::SEH_SaveFReg:
256 case AArch64::SEH_SaveAnyRegQP:
257 case AArch64::SEH_SaveAnyRegQPX:
258 ImmOpnd = &MBBI->getOperand(ImmIdx);
259 break;
260 }
261 if (ImmOpnd)
262 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
263}
264
266 MachineInstr &MI, uint64_t LocalStackSize) const {
267 if (AArch64InstrInfo::isSEHInstruction(MI))
268 return;
269
270 unsigned Opc = MI.getOpcode();
271 unsigned Scale;
272 switch (Opc) {
273 case AArch64::STPXi:
274 case AArch64::STRXui:
275 case AArch64::STPDi:
276 case AArch64::STRDui:
277 case AArch64::LDPXi:
278 case AArch64::LDRXui:
279 case AArch64::LDPDi:
280 case AArch64::LDRDui:
281 Scale = 8;
282 break;
283 case AArch64::STPQi:
284 case AArch64::STRQui:
285 case AArch64::LDPQi:
286 case AArch64::LDRQui:
287 Scale = 16;
288 break;
289 default:
290 llvm_unreachable("Unexpected callee-save save/restore opcode!");
291 }
292
293 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
294 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
295 "Unexpected base register in callee-save save/restore instruction!");
296 // Last operand is immediate offset that needs fixing.
297 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
298 // All generated opcodes have scaled offsets.
299 assert(LocalStackSize % Scale == 0);
300 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
301
302 if (NeedsWinCFI) {
303 HasWinCFI = true;
304 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
305 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
306 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
307 "Expecting a SEH instruction");
308 fixupSEHOpcode(MBBI, LocalStackSize);
309 }
310}
311
313 uint64_t StackBumpBytes) const {
314 if (AFL.homogeneousPrologEpilog(MF))
315 return false;
316
317 if (AFI->getLocalStackSize() == 0)
318 return false;
319
320 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
321 // (to force a stp with predecrement) to match the packed unwind format,
322 // provided that there actually are any callee saved registers to merge the
323 // decrement with.
324 // This is potentially marginally slower, but allows using the packed
325 // unwind format for functions that both have a local area and callee saved
326 // registers. Using the packed unwind format notably reduces the size of
327 // the unwind info.
328 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
329 MF.getFunction().hasOptSize())
330 return false;
331
332 // 512 is the maximum immediate for stp/ldp that will be used for
333 // callee-save save/restores
334 if (StackBumpBytes >= 512 ||
335 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
336 return false;
337
338 if (MFI.hasVarSizedObjects())
339 return false;
340
341 if (RegInfo.hasStackRealignment(MF))
342 return false;
343
344 // This isn't strictly necessary, but it simplifies things a bit since the
345 // current RedZone handling code assumes the SP is adjusted by the
346 // callee-save save/restore code.
347 if (AFL.canUseRedZone(MF))
348 return false;
349
350 // When there is an SVE area on the stack, always allocate the
351 // callee-saves and spills/locals separately.
352 if (AFI->hasSVEStackSize())
353 return false;
354
355 return true;
356}
357
359 StackOffset PPRCalleeSavesSize =
360 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
361 StackOffset ZPRCalleeSavesSize =
362 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
363 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
364 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
366 return {{PPRCalleeSavesSize, PPRLocalsSize},
367 {ZPRCalleeSavesSize, ZPRLocalsSize}};
368 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
369 return {{PPRCalleeSavesSize, StackOffset{}},
370 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
371}
372
378
381 StackOffset PPRCalleeSavesSize,
382 StackOffset ZPRCalleeSavesSize,
383 bool IsEpilogue) {
386 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
387 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
388 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
389 if (PPRCalleeSavesSize) {
390 PPRsI = AdjustI(PPRsI);
391 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
392 while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
393 IsEpilogue ? (--PPRsI) : (++PPRsI);
394 }
395 MachineBasicBlock::iterator ZPRsI = PPRsI;
396 if (ZPRCalleeSavesSize) {
397 ZPRsI = AdjustI(ZPRsI);
398 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
399 while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
400 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
401 }
402 if (IsEpilogue)
403 return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
404 return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
405}
406
411 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
412 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
413 IsFunclet = MBB.isEHFuncletEntry();
414 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
415
416#ifndef NDEBUG
417 collectBlockLiveins();
418#endif
419}
420
421#ifndef NDEBUG
422/// Collect live registers from the end of \p MI's parent up to (including) \p
423/// MI in \p LiveRegs.
426
427 MachineBasicBlock &MBB = *MI.getParent();
428 LiveRegs.addLiveOuts(MBB);
429 for (const MachineInstr &MI :
430 reverse(make_range(MI.getIterator(), MBB.instr_end())))
431 LiveRegs.stepBackward(MI);
432}
433
434void AArch64PrologueEmitter::collectBlockLiveins() {
435 // Collect live register from the end of MBB up to the start of the existing
436 // frame setup instructions.
437 PrologueEndI = MBB.begin();
438 while (PrologueEndI != MBB.end() &&
439 PrologueEndI->getFlag(MachineInstr::FrameSetup))
440 ++PrologueEndI;
441
442 if (PrologueEndI != MBB.end()) {
443 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
444 // Ignore registers used for stack management for now.
445 LiveRegs.removeReg(AArch64::SP);
446 LiveRegs.removeReg(AArch64::X19);
447 LiveRegs.removeReg(AArch64::FP);
448 LiveRegs.removeReg(AArch64::LR);
449
450 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
451 // This is necessary to spill VG if required where SVE is unavailable, but
452 // X0 is preserved around this call.
453 if (requiresGetVGCall())
454 LiveRegs.removeReg(AArch64::X0);
455 }
456}
457
458void AArch64PrologueEmitter::verifyPrologueClobbers() const {
459 if (PrologueEndI == MBB.end())
460 return;
461 // Check if any of the newly instructions clobber any of the live registers.
462 for (MachineInstr &MI :
463 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
464 for (auto &Op : MI.operands())
465 if (Op.isReg() && Op.isDef())
466 assert(!LiveRegs.contains(Op.getReg()) &&
467 "live register clobbered by inserted prologue instructions");
468 }
469}
470#endif
471
472void AArch64PrologueEmitter::determineLocalsStackSize(
473 uint64_t StackSize, uint64_t PrologueSaveSize) {
474 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
475 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
476}
477
478// Return the maximum possible number of bytes for `Size` due to the
479// architectural limit on the size of a SVE register.
480static int64_t upperBound(StackOffset Size) {
481 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
482 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
483}
484
485void AArch64PrologueEmitter::allocateStackSpace(
486 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
487 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
488 bool FollowupAllocs) {
489
490 if (!AllocSize)
491 return;
492
493 DebugLoc DL;
494 const int64_t MaxAlign = MFI.getMaxAlign().value();
495 const uint64_t AndMask = ~(MaxAlign - 1);
496
498 Register TargetReg = RealignmentPadding
499 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
500 : AArch64::SP;
501 // SUB Xd/SP, SP, AllocSize
502 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
504 EmitCFI, InitialOffset);
505
506 if (RealignmentPadding) {
507 // AND SP, X9, 0b11111...0000
508 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
509 .addReg(TargetReg, RegState::Kill)
512 AFI->setStackRealigned(true);
513
514 // No need for SEH instructions here; if we're realigning the stack,
515 // we've set a frame pointer and already finished the SEH prologue.
517 }
518 return;
519 }
520
521 //
522 // Stack probing allocation.
523 //
524
525 // Fixed length allocation. If we don't need to re-align the stack and don't
526 // have SVE objects, we can use a more efficient sequence for stack probing.
527 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
528 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
529 assert(ScratchReg != AArch64::NoRegister);
530 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
531 .addDef(ScratchReg)
532 .addImm(AllocSize.getFixed())
533 .addImm(InitialOffset.getFixed())
534 .addImm(InitialOffset.getScalable());
535 // The fixed allocation may leave unprobed bytes at the top of the
536 // stack. If we have subsequent allocation (e.g. if we have variable-sized
537 // objects), we need to issue an extra probe, so these allocations start in
538 // a known state.
539 if (FollowupAllocs) {
540 // STR XZR, [SP]
541 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
542 .addReg(AArch64::XZR)
543 .addReg(AArch64::SP)
544 .addImm(0)
546 }
547
548 return;
549 }
550
551 // Variable length allocation.
552
553 // If the (unknown) allocation size cannot exceed the probe size, decrement
554 // the stack pointer right away.
555 int64_t ProbeSize = AFI->getStackProbeSize();
556 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
557 Register ScratchReg = RealignmentPadding
558 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
559 : AArch64::SP;
560 assert(ScratchReg != AArch64::NoRegister);
561 // SUB Xd, SP, AllocSize
562 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
564 EmitCFI, InitialOffset);
565 if (RealignmentPadding) {
566 // AND SP, Xn, 0b11111...0000
567 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
568 .addReg(ScratchReg, RegState::Kill)
571 AFI->setStackRealigned(true);
572 }
573 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
575 // STR XZR, [SP]
576 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
577 .addReg(AArch64::XZR)
578 .addReg(AArch64::SP)
579 .addImm(0)
581 }
582 return;
583 }
584
585 // Emit a variable-length allocation probing loop.
586 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
587 // each of them guaranteed to adjust the stack by less than the probe size.
588 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
589 assert(TargetReg != AArch64::NoRegister);
590 // SUB Xd, SP, AllocSize
591 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
593 EmitCFI, InitialOffset);
594 if (RealignmentPadding) {
595 // AND Xn, Xn, 0b11111...0000
596 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
597 .addReg(TargetReg, RegState::Kill)
600 }
601
602 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
603 .addReg(TargetReg);
604 if (EmitCFI) {
605 // Set the CFA register back to SP.
606 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
607 .buildDefCFARegister(AArch64::SP);
608 }
609 if (RealignmentPadding)
610 AFI->setStackRealigned(true);
611}
612
614 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
615 const MachineBasicBlock::iterator EndI = MBB.end();
616
617 // At this point, we're going to decide whether or not the function uses a
618 // redzone. In most cases, the function doesn't have a redzone so let's
619 // assume that's false and set it to true in the case that there's a redzone.
620 AFI->setHasRedZone(false);
621
622 // Debug location must be unknown since the first debug location is used
623 // to determine the end of the prologue.
624 DebugLoc DL;
625
626 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
627 // have a tail-call where the caller only needs to adjust the stack pointer in
628 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
629 // See `seh-minimal-prologue-epilogue.ll` test cases.
630 if (AFI->getArgumentStackToRestore())
631 HasWinCFI = true;
632
633 if (AFI->shouldSignReturnAddress(MF)) {
634 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
635 // are inserted by emitPacRetPlusLeafHardening().
636 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
637 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
639 }
640 // AArch64PointerAuth pass will insert SEH_PACSignLR
642 }
643
644 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
645 emitShadowCallStackPrologue(PrologueBeginI, DL);
647 }
648
649 if (EmitCFI && AFI->isMTETagged())
650 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
652
653 // We signal the presence of a Swift extended frame to external tools by
654 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
655 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
656 // bits so that is still true.
657 if (HasFP && AFI->hasSwiftAsyncContext())
658 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
659
660 // All calls are tail calls in GHC calling conv, and functions have no
661 // prologue/epilogue.
662 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
663 return;
664
665 // Set tagged base pointer to the requested stack slot. Ideally it should
666 // match SP value after prologue.
667 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
668 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
669 else
670 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
671
672 // getStackSize() includes all the locals in its size calculation. We don't
673 // include these locals when computing the stack size of a funclet, as they
674 // are allocated in the parent's stack frame and accessed via the frame
675 // pointer from the funclet. We only save the callee saved registers in the
676 // funclet, which are really the callee saved registers of the parent
677 // function, including the funclet.
678 int64_t NumBytes =
679 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
680 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
681 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
682
683 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
684 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
685
686 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
687 // All of the remaining stack allocations are for locals.
688 determineLocalsStackSize(NumBytes, PrologueSaveSize);
689
690 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
692 // If we're doing SVE saves first, we need to immediately allocate space
693 // for fixed objects, then space for the SVE callee saves.
694 //
695 // Windows unwind requires that the scalable size is a multiple of 16;
696 // that's handled when the callee-saved size is computed.
697 auto SaveSize =
698 StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
699 StackOffset::getFixed(FixedObject);
700 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
701 /*FollowupAllocs=*/true);
702 NumBytes -= FixedObject;
703
704 // Now allocate space for the GPR callee saves.
705 MachineBasicBlock::iterator MBBI = PrologueBeginI;
706 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
707 ++MBBI;
709 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
710 NumBytes -= AFI->getCalleeSavedStackSize();
711 } else if (CombineSPBump) {
712 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
713 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
714 StackOffset::getFixed(-NumBytes), TII,
716 EmitAsyncCFI);
717 NumBytes = 0;
718 } else if (HomPrologEpilog) {
719 // Stack has been already adjusted.
720 NumBytes -= PrologueSaveSize;
721 } else if (PrologueSaveSize != 0) {
723 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
724 NumBytes -= PrologueSaveSize;
725 }
726 assert(NumBytes >= 0 && "Negative stack allocation size!?");
727
728 // Move past the saves of the callee-saved registers, fixing up the offsets
729 // and pre-inc if we decided to combine the callee-save and local stack
730 // pointer bump above.
731 auto &TLI = *Subtarget.getTargetLowering();
732
733 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
734 while (AfterGPRSavesI != EndI &&
735 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
736 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
737 if (CombineSPBump &&
738 // Only fix-up frame-setup load/store instructions.
739 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
740 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
741 AFI->getLocalStackSize());
742 ++AfterGPRSavesI;
743 }
744
745 // For funclets the FP belongs to the containing function. Only set up FP if
746 // we actually need to.
747 if (!IsFunclet && HasFP)
748 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
749
750 // Now emit the moves for whatever callee saved regs we have (including FP,
751 // LR if those are saved). Frame instructions for SVE register are emitted
752 // later, after the instruction which actually save SVE regs.
753 if (EmitAsyncCFI)
754 emitCalleeSavedGPRLocations(AfterGPRSavesI);
755
756 // Alignment is required for the parent frame, not the funclet
757 const bool NeedsRealignment =
758 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
759 const int64_t RealignmentPadding =
760 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
761 ? MFI.getMaxAlign().value() - 16
762 : 0;
763
764 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
765 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
766
767 auto [PPR, ZPR] = getSVEStackFrameSizes();
768 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
769 StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
770 StackOffset CFAOffset =
771 StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
772
773 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
774 // Allocate space for the callee saves and PPR locals (if any).
776 auto [PPRRange, ZPRRange] =
777 partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
778 ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
779 AfterSVESavesI = ZPRRange.End;
780 if (EmitAsyncCFI)
781 emitCalleeSavedSVELocations(AfterSVESavesI);
782
783 StackOffset AllocateBeforePPRs = SVECalleeSavesSize;
784 StackOffset AllocateAfterPPRs = PPR.LocalsSize;
786 AllocateBeforePPRs = PPR.CalleeSavesSize;
787 AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize;
788 }
789 allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs,
790 EmitAsyncCFI && !HasFP, CFAOffset,
791 MFI.hasVarSizedObjects() || AllocateAfterPPRs ||
792 ZPR.LocalsSize || NonSVELocalsSize);
793 CFAOffset += AllocateBeforePPRs;
794 assert(PPRRange.End == ZPRRange.Begin &&
795 "Expected ZPR callee saves after PPR locals");
796 allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
797 EmitAsyncCFI && !HasFP, CFAOffset,
798 MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
799 NonSVELocalsSize);
800 CFAOffset += AllocateAfterPPRs;
801 } else {
803 // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been
804 // allocated (and separate PPR locals are not supported, all SVE locals,
805 // both PPR and ZPR, are within the ZPR locals area).
806 assert(!PPR.LocalsSize && "Unexpected PPR locals!");
807 CFAOffset += SVECalleeSavesSize;
808 }
809
810 // Allocate space for the rest of the frame including ZPR locals. Align the
811 // stack as necessary.
812 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
813 "Cannot use redzone with stack realignment");
814 if (!AFL.canUseRedZone(MF)) {
815 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
816 // correct value here, as NumBytes also includes padding bytes, which
817 // shouldn't be counted here.
818 allocateStackSpace(
819 AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize,
820 EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects());
821 }
822
823 // If we need a base pointer, set it up here. It's whatever the value of the
824 // stack pointer is at this point. Any variable size objects will be
825 // allocated after this, so we can still use the base pointer to reference
826 // locals.
827 //
828 // FIXME: Clarify FrameSetup flags here.
829 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
830 // needed.
831 // For funclets the BP belongs to the containing function.
832 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
833 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
834 AArch64::SP, false);
835 if (NeedsWinCFI) {
836 HasWinCFI = true;
837 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
839 }
840 }
841
842 // The very last FrameSetup instruction indicates the end of prologue. Emit a
843 // SEH opcode indicating the prologue end.
844 if (NeedsWinCFI && HasWinCFI) {
845 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
847 }
848
849 // SEH funclets are passed the frame pointer in X1. If the parent
850 // function uses the base register, then the base register is used
851 // directly, and is not retrieved from X1.
852 if (IsFunclet && F.hasPersonalityFn()) {
853 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
855 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
856 AArch64::FP)
857 .addReg(AArch64::X1)
859 MBB.addLiveIn(AArch64::X1);
860 }
861 }
862
863 if (EmitCFI && !EmitAsyncCFI) {
864 if (HasFP) {
865 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
866 } else {
867 StackOffset TotalSize =
868 AFL.getSVEStackSize(MF) +
869 StackOffset::getFixed((int64_t)MFI.getStackSize());
870 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
871 CFIBuilder.insertCFIInst(
872 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
873 TotalSize, /*LastAdjustmentWasScalable=*/false));
874 }
875 emitCalleeSavedGPRLocations(AfterSVESavesI);
876 emitCalleeSavedSVELocations(AfterSVESavesI);
877 }
878}
879
880void AArch64PrologueEmitter::emitShadowCallStackPrologue(
882 // Shadow call stack prolog: str x30, [x18], #8
883 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
884 .addReg(AArch64::X18, RegState::Define)
885 .addReg(AArch64::LR)
886 .addReg(AArch64::X18)
887 .addImm(8)
889
890 // This instruction also makes x18 live-in to the entry block.
891 MBB.addLiveIn(AArch64::X18);
892
893 if (NeedsWinCFI)
894 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
896
897 if (EmitCFI) {
898 // Emit a CFI instruction that causes 8 to be subtracted from the value of
899 // x18 when unwinding past this frame.
900 static const char CFIInst[] = {
901 dwarf::DW_CFA_val_expression,
902 18, // register
903 2, // length
904 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
905 static_cast<char>(-8) & 0x7f, // addend (sleb128)
906 };
907 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
908 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
909 }
910}
911
912void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
914 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
916 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
917 // The special symbol below is absolute and has a *value* that can be
918 // combined with the frame pointer to signal an extended frame.
919 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
920 .addExternalSymbol("swift_async_extendedFramePointerFlags",
922 if (NeedsWinCFI) {
923 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
925 HasWinCFI = true;
926 }
927 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
928 .addUse(AArch64::FP)
929 .addUse(AArch64::X16)
930 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
931 if (NeedsWinCFI) {
932 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
934 HasWinCFI = true;
935 }
936 break;
937 }
938 [[fallthrough]];
939
941 // ORR x29, x29, #0x1000_0000_0000_0000
942 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
943 .addUse(AArch64::FP)
944 .addImm(0x1100)
946 if (NeedsWinCFI) {
947 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
949 HasWinCFI = true;
950 }
951 break;
952
954 break;
955 }
956}
957
958void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
959 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
960 const DebugLoc &DL) const {
961 assert(!HasFP && "unexpected function without stack frame but with FP");
962 assert(!AFL.getSVEStackSize(MF) &&
963 "unexpected function without stack frame but with SVE objects");
964 // All of the stack allocation is for locals.
965 AFI->setLocalStackSize(NumBytes);
966 if (!NumBytes) {
967 if (NeedsWinCFI && HasWinCFI) {
968 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
970 }
971 return;
972 }
973 // REDZONE: If the stack size is less than 128 bytes, we don't need
974 // to actually allocate.
975 if (AFL.canUseRedZone(MF)) {
976 AFI->setHasRedZone(true);
977 ++NumRedZoneFunctions;
978 } else {
979 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
980 StackOffset::getFixed(-NumBytes), TII,
982 if (EmitCFI) {
983 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
984 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
985 // Encode the stack size of the leaf function.
986 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
987 .buildDefCFAOffset(NumBytes, FrameLabel);
988 }
989 }
990
991 if (NeedsWinCFI) {
992 HasWinCFI = true;
993 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
995 }
996}
997
998void AArch64PrologueEmitter::emitFramePointerSetup(
1000 unsigned FixedObject) {
1001 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1002 if (CombineSPBump)
1003 FPOffset += AFI->getLocalStackSize();
1004
1005 if (AFI->hasSwiftAsyncContext()) {
1006 // Before we update the live FP we have to ensure there's a valid (or
1007 // null) asynchronous context in its slot just before FP in the frame
1008 // record, so store it now.
1009 const auto &Attrs = MF.getFunction().getAttributes();
1010 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1011 if (HaveInitialContext)
1012 MBB.addLiveIn(AArch64::X22);
1013 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1014 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1015 .addUse(Reg)
1016 .addUse(AArch64::SP)
1017 .addImm(FPOffset - 8)
1019 if (NeedsWinCFI) {
1020 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1021 // to multiple instructions, should be mutually-exclusive.
1022 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1023 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1025 HasWinCFI = true;
1026 }
1027 }
1028
1029 if (HomPrologEpilog) {
1030 auto Prolog = MBBI;
1031 --Prolog;
1032 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1033 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1034 } else {
1035 // Issue sub fp, sp, FPOffset or
1036 // mov fp,sp when FPOffset is zero.
1037 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1038 // This code marks the instruction(s) that set the FP also.
1039 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1040 StackOffset::getFixed(FPOffset), TII,
1042 if (NeedsWinCFI && HasWinCFI) {
1043 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1045 // After setting up the FP, the rest of the prolog doesn't need to be
1046 // included in the SEH unwind info.
1047 NeedsWinCFI = false;
1048 }
1049 }
1050 if (EmitAsyncCFI)
1051 emitDefineCFAWithFP(MBBI, FixedObject);
1052}
1053
1054// Define the current CFA rule to use the provided FP.
1055void AArch64PrologueEmitter::emitDefineCFAWithFP(
1056 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1057 const int OffsetToFirstCalleeSaveFromFP =
1058 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1059 AFI->getCalleeSavedStackSize();
1060 Register FramePtr = RegInfo.getFrameRegister(MF);
1061 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1062 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1063}
1064
1065void AArch64PrologueEmitter::emitWindowsStackProbe(
1066 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1067 int64_t RealignmentPadding) const {
1068 if (AFI->getSVECalleeSavedStackSize())
1069 report_fatal_error("SVE callee saves not yet supported with stack probing");
1070
1071 // Find an available register to spill the value of X15 to, if X15 is being
1072 // used already for nest.
1073 unsigned X15Scratch = AArch64::NoRegister;
1074 if (llvm::any_of(MBB.liveins(),
1075 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1076 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1077 LiveIn.PhysReg);
1078 })) {
1079 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1080 assert(X15Scratch != AArch64::NoRegister &&
1081 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1082#ifndef NDEBUG
1083 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1084#endif
1085 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1086 .addReg(AArch64::XZR)
1087 .addReg(AArch64::X15, RegState::Undef)
1088 .addReg(AArch64::X15, RegState::Implicit)
1090 }
1091
1092 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1093 if (NeedsWinCFI) {
1094 HasWinCFI = true;
1095 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1096 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1097 // This is at most two instructions, MOVZ followed by MOVK.
1098 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1099 // exceeding 256MB in size.
1100 if (NumBytes >= (1 << 28))
1101 report_fatal_error("Stack size cannot exceed 256MB for stack "
1102 "unwinding purposes");
1103
1104 uint32_t LowNumWords = NumWords & 0xFFFF;
1105 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1106 .addImm(LowNumWords)
1109 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1111 if ((NumWords & 0xFFFF0000) != 0) {
1112 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1113 .addReg(AArch64::X15)
1114 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1117 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1119 }
1120 } else {
1121 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1122 .addImm(NumWords)
1124 }
1125
1126 const char *ChkStk = Subtarget.getChkStkName();
1127 switch (MF.getTarget().getCodeModel()) {
1128 case CodeModel::Tiny:
1129 case CodeModel::Small:
1130 case CodeModel::Medium:
1131 case CodeModel::Kernel:
1132 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1133 .addExternalSymbol(ChkStk)
1134 .addReg(AArch64::X15, RegState::Implicit)
1135 .addReg(AArch64::X16,
1137 .addReg(AArch64::X17,
1139 .addReg(AArch64::NZCV,
1142 if (NeedsWinCFI) {
1143 HasWinCFI = true;
1144 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1146 }
1147 break;
1148 case CodeModel::Large:
1149 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1150 .addReg(AArch64::X16, RegState::Define)
1151 .addExternalSymbol(ChkStk)
1152 .addExternalSymbol(ChkStk)
1154 if (NeedsWinCFI) {
1155 HasWinCFI = true;
1156 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1158 }
1159
1161 .addReg(AArch64::X16, RegState::Kill)
1163 .addReg(AArch64::X16,
1165 .addReg(AArch64::X17,
1167 .addReg(AArch64::NZCV,
1170 if (NeedsWinCFI) {
1171 HasWinCFI = true;
1172 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1174 }
1175 break;
1176 }
1177
1178 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1179 .addReg(AArch64::SP, RegState::Kill)
1180 .addReg(AArch64::X15, RegState::Kill)
1183 if (NeedsWinCFI) {
1184 HasWinCFI = true;
1185 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1186 .addImm(NumBytes)
1188 }
1189 NumBytes = 0;
1190
1191 if (RealignmentPadding > 0) {
1192 if (RealignmentPadding >= 4096) {
1193 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1194 .addReg(AArch64::X16, RegState::Define)
1195 .addImm(RealignmentPadding)
1197 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1198 .addReg(AArch64::SP)
1199 .addReg(AArch64::X16, RegState::Kill)
1202 } else {
1203 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1204 .addReg(AArch64::SP)
1205 .addImm(RealignmentPadding)
1206 .addImm(0)
1208 }
1209
1210 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1211 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1212 .addReg(AArch64::X15, RegState::Kill)
1214 AFI->setStackRealigned(true);
1215
1216 // No need for SEH instructions here; if we're realigning the stack,
1217 // we've set a frame pointer and already finished the SEH prologue.
1219 }
1220 if (X15Scratch != AArch64::NoRegister) {
1221 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1222 .addReg(AArch64::XZR)
1223 .addReg(X15Scratch, RegState::Undef)
1224 .addReg(X15Scratch, RegState::Implicit)
1226 }
1227}
1228
1229void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1231 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1232 if (CSI.empty())
1233 return;
1234
1235 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1236 for (const auto &Info : CSI) {
1237 unsigned FrameIdx = Info.getFrameIdx();
1238 if (MFI.hasScalableStackID(FrameIdx))
1239 continue;
1240
1241 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1242 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1243 CFIBuilder.buildOffset(Info.getReg(), Offset);
1244 }
1245}
1246
1247void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1249 // Add callee saved registers to move list.
1250 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1251 if (CSI.empty())
1252 return;
1253
1254 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1255
1256 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1257 if (AFL.requiresSaveVG(MF)) {
1258 auto IncomingVG = *find_if(
1259 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1260 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1261 AFL.getOffsetOfLocalArea();
1262 }
1263
1264 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1265 for (const auto &Info : CSI) {
1266 int FI = Info.getFrameIdx();
1267 if (!MFI.hasScalableStackID(FI))
1268 continue;
1269
1270 // Not all unwinders may know about SVE registers, so assume the lowest
1271 // common denominator.
1272 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1273 MCRegister Reg = Info.getReg();
1274 if (!RegInfo.regNeedsCFI(Reg, Reg))
1275 continue;
1276
1277 StackOffset Offset =
1278 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1279 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1280
1281 // The scalable vectors are below (lower address) the scalable predicates
1282 // with split SVE objects, so we must subtract the size of the predicates.
1284 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1285 Offset -= PPRStackSize;
1286
1287 CFIBuilder.insertCFIInst(
1288 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1289 }
1290}
1291
1293 switch (MI.getOpcode()) {
1294 default:
1295 return false;
1296 case AArch64::CATCHRET:
1297 case AArch64::CLEANUPRET:
1298 return true;
1299 }
1300}
1301
1306 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1307 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1308 SEHEpilogueStartI = MBB.end();
1309}
1310
1312 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1313 if (MBB.end() != EpilogueEndI) {
1314 DL = EpilogueEndI->getDebugLoc();
1315 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1316 }
1317
1318 int64_t NumBytes =
1319 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1320
1321 // All calls are tail calls in GHC calling conv, and functions have no
1322 // prologue/epilogue.
1323 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1324 return;
1325
1326 // How much of the stack used by incoming arguments this function is expected
1327 // to restore in this particular epilogue.
1328 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1329 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1330 MF.getFunction().isVarArg());
1331 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1332
1333 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1334 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1335 // We cannot rely on the local stack size set in emitPrologue if the function
1336 // has funclets, as funclets have different local stack size requirements, and
1337 // the current value set in emitPrologue may be that of the containing
1338 // function.
1339 if (MF.hasEHFunclets())
1340 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1341
1342 if (HomPrologEpilog) {
1344 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1345 if (FirstHomogenousEpilogI != MBB.begin()) {
1346 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1347 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1348 FirstHomogenousEpilogI = HomogeneousEpilog;
1349 }
1350
1351 // Adjust local stack
1352 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1353 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1355
1356 // SP has been already adjusted while restoring callee save regs.
1357 // We've bailed-out the case with adjusting SP for arguments.
1358 assert(AfterCSRPopSize == 0);
1359 return;
1360 }
1361
1362 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1363 // Assume we can't combine the last pop with the sp restore.
1364 bool CombineAfterCSRBump = false;
1366 AfterCSRPopSize += FixedObject;
1367 } else if (!CombineSPBump && PrologueSaveSize != 0) {
1368 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1369 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1370 AArch64InstrInfo::isSEHInstruction(*Pop))
1371 Pop = std::prev(Pop);
1372 // Converting the last ldp to a post-index ldp is valid only if the last
1373 // ldp's offset is 0.
1374 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1375 // If the offset is 0 and the AfterCSR pop is not actually trying to
1376 // allocate more stack for arguments (in space that an untimely interrupt
1377 // may clobber), convert it to a post-index ldp.
1378 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1380 Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
1381 PrologueSaveSize);
1382 } else {
1383 // If not, make sure to emit an add after the last ldp.
1384 // We're doing this by transferring the size to be restored from the
1385 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1386 // pops.
1387 AfterCSRPopSize += PrologueSaveSize;
1388 CombineAfterCSRBump = true;
1389 }
1390 }
1391
1392 // Move past the restores of the callee-saved registers.
1393 // If we plan on combining the sp bump of the local stack size and the callee
1394 // save stack size, we might need to adjust the CSR save and restore offsets.
1395 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1396 MachineBasicBlock::iterator Begin = MBB.begin();
1397 while (FirstGPRRestoreI != Begin) {
1398 --FirstGPRRestoreI;
1399 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1401 isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1402 ++FirstGPRRestoreI;
1403 break;
1404 } else if (CombineSPBump)
1405 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1406 AFI->getLocalStackSize());
1407 }
1408
1409 if (NeedsWinCFI) {
1410 // Note that there are cases where we insert SEH opcodes in the
1411 // epilogue when we had no SEH opcodes in the prologue. For
1412 // example, when there is no stack frame but there are stack
1413 // arguments. Insert the SEH_EpilogStart and remove it later if it
1414 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1415 // functions that don't need it.
1416 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1418 SEHEpilogueStartI = FirstGPRRestoreI;
1419 --SEHEpilogueStartI;
1420 }
1421
1422 if (HasFP && AFI->hasSwiftAsyncContext())
1423 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1424
1425 // If there is a single SP update, insert it before the ret and we're done.
1426 if (CombineSPBump) {
1427 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1428
1429 // When we are about to restore the CSRs, the CFA register is SP again.
1430 if (EmitCFI && HasFP)
1432 .buildDefCFA(AArch64::SP, NumBytes);
1433
1434 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1435 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1437 EmitCFI, StackOffset::getFixed(NumBytes));
1438 return;
1439 }
1440
1441 NumBytes -= PrologueSaveSize;
1442 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1443
1444 auto [PPR, ZPR] = getSVEStackFrameSizes();
1445 auto [PPRRange, ZPRRange] = partitionSVECS(
1446 MBB,
1448 ? MBB.getFirstTerminator()
1449 : FirstGPRRestoreI,
1450 PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1451
1452 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1453 StackOffset SVEStackSize =
1454 SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
1455 MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
1456 MachineBasicBlock::iterator RestoreEnd = PPRRange.End;
1457
1458 // Deallocate the SVE area.
1460 StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize;
1461 // If the callee-save area is before FP, restoring the FP implicitly
1462 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1463 // explicitly.
1464 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1465 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1466 SVELocalsSize, TII, MachineInstr::FrameDestroy, false,
1468 }
1469
1470 // Deallocate callee-save non-SVE registers.
1471 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1472 StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1474
1475 // Deallocate fixed objects.
1476 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1477 StackOffset::getFixed(FixedObject), TII,
1479
1480 // Deallocate callee-save SVE registers.
1481 emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1482 SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
1484 } else if (AFI->hasSVEStackSize()) {
1485 // If we have stack realignment or variable-sized objects we must use the FP
1486 // to restore SVE callee saves (as there is an unknown amount of
1487 // data/padding between the SP and SVE CS area).
1488 Register BaseForSVEDealloc =
1489 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1490 : AArch64::SP;
1491 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1492 // TODO: Support stack realigment and variable-sized objects.
1493 assert(
1495 "unexpected stack realignment or variable sized objects with split "
1496 "SVE stack objects");
1497
1498 Register CalleeSaveBase = AArch64::FP;
1499 if (int64_t CalleeSaveBaseOffset =
1500 AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1501 // If we have have an non-zero offset to the non-SVE CS base we need to
1502 // compute the base address by subtracting the offest in a temporary
1503 // register first (to avoid briefly deallocating the SVE CS).
1504 CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
1505 &AArch64::GPR64RegClass);
1506 emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1507 StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1509 }
1510 // The code below will deallocate the stack space space by moving the SP
1511 // to the start of the SVE callee-save area.
1512 emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1513 -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
1514 } else if (BaseForSVEDealloc == AArch64::SP) {
1515 auto CFAOffset =
1516 SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
1517
1518 if (SVECalleeSavesSize) {
1519 // Deallocate the non-SVE locals first before we can deallocate (and
1520 // restore callee saves) from the SVE area.
1521 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1522 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1523 NonSVELocals, TII, MachineInstr::FrameDestroy, false,
1524 NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1525 CFAOffset -= NonSVELocals;
1526 NumBytes = 0;
1527 }
1528
1529 if (ZPR.LocalsSize) {
1530 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1531 ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false,
1532 NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1533 CFAOffset -= ZPR.LocalsSize;
1534 }
1535
1536 StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize;
1538 (PPR.LocalsSize || ZPR.CalleeSavesSize)) {
1539 assert(PPRRange.Begin == ZPRRange.End &&
1540 "Expected PPR restores after ZPR");
1541 emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1542 PPR.LocalsSize + ZPR.CalleeSavesSize, TII,
1544 &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1545 CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize;
1546 SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize;
1547 }
1548
1549 // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs:
1550 if (SVECalleeSavesToDealloc)
1551 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1552 SVECalleeSavesToDealloc, TII,
1554 &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1555 }
1556
1557 if (EmitCFI)
1558 emitCalleeSavedSVERestores(
1559 SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1560 }
1561
1562 if (!HasFP) {
1563 bool RedZone = AFL.canUseRedZone(MF);
1564 // If this was a redzone leaf function, we don't need to restore the
1565 // stack pointer (but we may need to pop stack args for fastcc).
1566 if (RedZone && AfterCSRPopSize == 0)
1567 return;
1568
1569 // Pop the local variables off the stack. If there are no callee-saved
1570 // registers, it means we are actually positioned at the terminator and can
1571 // combine stack increment for the locals and the stack increment for
1572 // callee-popped arguments into (possibly) a single instruction and be done.
1573 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1574 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1575 if (NoCalleeSaveRestore)
1576 StackRestoreBytes += AfterCSRPopSize;
1577
1579 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1580 StackOffset::getFixed(StackRestoreBytes), TII,
1582 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1583
1584 // If we were able to combine the local stack pop with the argument pop,
1585 // then we're done.
1586 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1587 return;
1588
1589 NumBytes = 0;
1590 }
1591
1592 // Restore the original stack pointer.
1593 // FIXME: Rather than doing the math here, we should instead just use
1594 // non-post-indexed loads for the restores if we aren't actually going to
1595 // be able to save any instructions.
1596 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1598 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1599 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1601 } else if (NumBytes)
1602 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1603 StackOffset::getFixed(NumBytes), TII,
1605
1606 // When we are about to restore the CSRs, the CFA register is SP again.
1607 if (EmitCFI && HasFP)
1609 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1610
1611 // This must be placed after the callee-save restore code because that code
1612 // assumes the SP is at the same location as it was after the callee-save save
1613 // code in the prologue.
1614 if (AfterCSRPopSize) {
1615 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1616 "interrupt may have clobbered");
1617
1619 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1621 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1622 StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
1623 }
1624}
1625
1626bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1627 uint64_t StackBumpBytes) const {
1629 StackBumpBytes))
1630 return false;
1631 if (MBB.empty())
1632 return true;
1633
1634 // Disable combined SP bump if the last instruction is an MTE tag store. It
1635 // is almost always better to merge SP adjustment into those instructions.
1636 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1637 MachineBasicBlock::iterator Begin = MBB.begin();
1638 while (LastI != Begin) {
1639 --LastI;
1640 if (LastI->isTransient())
1641 continue;
1642 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1643 break;
1644 }
1645 switch (LastI->getOpcode()) {
1646 case AArch64::STGloop:
1647 case AArch64::STZGloop:
1648 case AArch64::STGi:
1649 case AArch64::STZGi:
1650 case AArch64::ST2Gi:
1651 case AArch64::STZ2Gi:
1652 return false;
1653 default:
1654 return true;
1655 }
1656 llvm_unreachable("unreachable");
1657}
1658
1659void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1661 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1663 // Avoid the reload as it is GOT relative, and instead fall back to the
1664 // hardcoded value below. This allows a mismatch between the OS and
1665 // application without immediately terminating on the difference.
1666 [[fallthrough]];
1668 // We need to reset FP to its untagged state on return. Bit 60 is
1669 // currently used to show the presence of an extended frame.
1670
1671 // BIC x29, x29, #0x1000_0000_0000_0000
1672 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1673 AArch64::FP)
1674 .addUse(AArch64::FP)
1675 .addImm(0x10fe)
1677 if (NeedsWinCFI) {
1678 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1680 HasWinCFI = true;
1681 }
1682 break;
1683
1685 break;
1686 }
1687}
1688
1689void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1691 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1692 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1693 .addReg(AArch64::X18, RegState::Define)
1694 .addReg(AArch64::LR, RegState::Define)
1695 .addReg(AArch64::X18)
1696 .addImm(-8)
1698
1699 if (NeedsWinCFI)
1700 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1702
1703 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1704 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1705 .buildRestore(AArch64::X18);
1706}
1707
1708void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1709 MachineBasicBlock::iterator MBBI, bool SVE) const {
1710 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1711 if (CSI.empty())
1712 return;
1713
1714 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1715
1716 for (const auto &Info : CSI) {
1717 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1718 continue;
1719
1720 MCRegister Reg = Info.getReg();
1721 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1722 continue;
1723
1724 CFIBuilder.buildRestore(Info.getReg());
1725 }
1726}
1727
1728void AArch64EpilogueEmitter::finalizeEpilogue() const {
1729 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1730 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1732 }
1733 if (EmitCFI)
1734 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1735 if (AFI->shouldSignReturnAddress(MF)) {
1736 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1737 // are inserted by emitPacRetPlusLeafHardening().
1738 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1739 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1740 TII->get(AArch64::PAUTH_EPILOGUE))
1742 }
1743 // AArch64PointerAuth pass will insert SEH_PACSignLR
1745 }
1746 if (HasWinCFI) {
1747 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1749 if (!MF.hasWinCFI())
1750 MF.setHasWinCFI(true);
1751 }
1752 if (NeedsWinCFI) {
1753 assert(SEHEpilogueStartI != MBB.end());
1754 if (!HasWinCFI)
1755 MBB.erase(SEHEpilogueStartI);
1756 }
1757}
1758
1759} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:124
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
Wrapper class representing virtual and physical registers.
Definition Register.h:19
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset PPRCalleeSavesSize, StackOffset ZPRCalleeSavesSize, bool IsEpilogue)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
MachineBasicBlock::iterator End
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 ZPR
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 PPR
MachineBasicBlock::iterator Begin