LLVM 22.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
147#include "llvm/Support/Debug.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
170 unsigned NumAlignedDPRCS2Regs);
171
172enum class SpillArea {
173 GPRCS1,
174 GPRCS2,
175 FPStatus,
176 DPRCS1,
177 DPRCS2,
178 GPRCS3,
179 FPCXT,
180};
181
182/// Get the spill area that Reg should be saved into in the prologue.
185 unsigned NumAlignedDPRCS2Regs,
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
310 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
311 STI(sti) {}
312
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
383}
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
454 .addImm(/*Wide=*/1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MBBI->getOperand(0));
464 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
468 MBB->erase(MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
477 .addImm(/*Wide=*/0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
490 .addImm(/*Wide=*/1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(MBBI, MIB);
493 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
494 .addImm(/*Wide=*/1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(0).getReg() == ARM::SP &&
500 MBBI->getOperand(2).getReg() == ARM::SP &&
501 MBBI->getOperand(3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
503 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
504 .addImm(1ULL << Reg)
505 .addImm(/*Wide=*/1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(1).getReg() == ARM::SP &&
514 MBBI->getOperand(2).getReg() == ARM::SP &&
515 MBBI->getOperand(3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
517 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
518 .addImm(1ULL << Reg)
519 .addImm(/*Wide=*/1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
564 MBB->erase(MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
570 .addImm(Mask)
571 .addImm(Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
585 .addImm(First)
586 .addImm(Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
593 .addImm(MBBI->getOperand(2).getImm() * 4)
594 .addImm(/*Wide=*/0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
602 .addImm(MBBI->getOperand(2).getImm())
603 .addImm(/*Wide=*/1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
611 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
612 .addImm(Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
617 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
618 .addImm(Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error("No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::TCRETURNri:
627 case ARM::TCRETURNrinotr12:
628 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
629 .addImm(/*Wide=*/0)
630 .setMIFlags(Flags);
631 break;
632
633 case ARM::TCRETURNdi:
634 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
635 .addImm(/*Wide=*/1)
636 .setMIFlags(Flags);
637 break;
638 }
639 return MBB->insertAfter(MBBI, MIB);
640}
641
644 if (MBBI == MBB.begin())
646 return std::prev(MBBI);
647}
648
652 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
653 if (Start.isValid())
654 Start = std::next(Start);
655 else
656 Start = MBB.begin();
657
658 for (auto MI = Start; MI != End;) {
659 auto Next = std::next(MI);
660 // Check if this instruction already has got a SEH opcode added. In that
661 // case, don't do this generic mapping.
662 if (Next != End && isSEHInstruction(*Next)) {
663 MI = std::next(Next);
664 while (MI != End && isSEHInstruction(*MI))
665 ++MI;
666 continue;
667 }
668 insertSEH(MI, TII, MIFlags);
669 MI = Next;
670 }
671}
672
675 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
676 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
677 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
678 if (isARM)
679 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
680 Pred, PredReg, TII, MIFlags);
681 else
682 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
683 Pred, PredReg, TII, MIFlags);
684}
685
686static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
688 const ARMBaseInstrInfo &TII, int NumBytes,
689 unsigned MIFlags = MachineInstr::NoFlags,
691 unsigned PredReg = 0) {
692 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
693 MIFlags, Pred, PredReg);
694}
695
697 int RegSize;
698 switch (MI.getOpcode()) {
699 case ARM::VSTMDDB_UPD:
700 RegSize = 8;
701 break;
702 case ARM::STMDB_UPD:
703 case ARM::t2STMDB_UPD:
704 RegSize = 4;
705 break;
706 case ARM::t2STR_PRE:
707 case ARM::STR_PRE_IMM:
708 return 4;
709 default:
710 llvm_unreachable("Unknown push or pop like instruction");
711 }
712
713 int count = 0;
714 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
715 // pred) so the list starts at 4.
716 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
717 count += RegSize;
718 return count;
719}
720
722 size_t StackSizeInBytes) {
723 const MachineFrameInfo &MFI = MF.getFrameInfo();
724 const Function &F = MF.getFunction();
725 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
726
727 StackProbeSize =
728 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
729 return (StackSizeInBytes >= StackProbeSize) &&
730 !F.hasFnAttribute("no-stack-arg-probe");
731}
732
733namespace {
734
735struct StackAdjustingInsts {
736 struct InstInfo {
738 unsigned SPAdjust;
739 bool BeforeFPSet;
740
741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
742 void dump() {
743 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
744 << "sp-adjust=" << SPAdjust;
745 I->dump();
746 }
747#endif
748 };
749
751
752 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
753 bool BeforeFPSet = false) {
754 InstInfo Info = {I, SPAdjust, BeforeFPSet};
755 Insts.push_back(Info);
756 }
757
758 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
759 auto Info =
760 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
761 assert(Info != Insts.end() && "invalid sp adjusting instruction");
762 Info->SPAdjust += ExtraBytes;
763 }
764
765 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
767 unsigned CFAOffset = 0;
768 for (auto &Info : Insts) {
769 if (HasFP && !Info.BeforeFPSet)
770 return;
771
772 CFAOffset += Info.SPAdjust;
773 CFIBuilder.setInsertPoint(std::next(Info.I));
774 CFIBuilder.buildDefCFAOffset(CFAOffset);
775 }
776 }
777
778#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
779 void dump() {
780 dbgs() << "StackAdjustingInsts:\n";
781 for (auto &Info : Insts)
782 Info.dump();
783 }
784#endif
785};
786
787} // end anonymous namespace
788
789/// Emit an instruction sequence that will align the address in
790/// register Reg by zero-ing out the lower bits. For versions of the
791/// architecture that support Neon, this must be done in a single
792/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
793/// single instruction. That function only gets called when optimizing
794/// spilling of D registers on a core with the Neon instruction set
795/// present.
797 const TargetInstrInfo &TII,
800 const DebugLoc &DL, const unsigned Reg,
801 const Align Alignment,
802 const bool MustBeSingleInstruction) {
803 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
804 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
805 const unsigned AlignMask = Alignment.value() - 1U;
806 const unsigned NrBitsToZero = Log2(Alignment);
807 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
808 if (!AFI->isThumbFunction()) {
809 // if the BFC instruction is available, use that to zero the lower
810 // bits:
811 // bfc Reg, #0, log2(Alignment)
812 // otherwise use BIC, if the mask to zero the required number of bits
813 // can be encoded in the bic immediate field
814 // bic Reg, Reg, Alignment-1
815 // otherwise, emit
816 // lsr Reg, Reg, log2(Alignment)
817 // lsl Reg, Reg, log2(Alignment)
818 if (CanUseBFC) {
819 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
821 .addImm(~AlignMask)
823 } else if (AlignMask <= 255) {
824 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
826 .addImm(AlignMask)
828 .add(condCodeOp());
829 } else {
830 assert(!MustBeSingleInstruction &&
831 "Shouldn't call emitAligningInstructions demanding a single "
832 "instruction to be emitted for large stack alignment for a target "
833 "without BFC.");
834 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
836 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
838 .add(condCodeOp());
839 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
841 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
843 .add(condCodeOp());
844 }
845 } else {
846 // Since this is only reached for Thumb-2 targets, the BFC instruction
847 // should always be available.
848 assert(CanUseBFC);
849 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
851 .addImm(~AlignMask)
853 }
854}
855
856/// We need the offset of the frame pointer relative to other MachineFrameInfo
857/// offsets which are encoded relative to SP at function begin.
858/// See also emitPrologue() for how the FP is set up.
859/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
860/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
861/// this to produce a conservative estimate that we check in an assert() later.
862static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
863 const MachineFunction &MF) {
866 // For Thumb1, push.w isn't available, so the first push will always push
867 // r7 and lr onto the stack first.
868 if (AFI.isThumb1OnlyFunction())
869 return -AFI.getArgRegsSaveSize() - (2 * 4);
870 // This is a conservative estimation: Assume the frame pointer being r7 and
871 // pc("r15") up to r8 getting spilled before (= 8 registers).
872 int MaxRegBytes = 8 * 4;
873 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
874 // Here, r11 can be stored below all of r4-r15.
875 MaxRegBytes = 11 * 4;
876 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
877 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
878 MaxRegBytes = 11 * 4 + 8 * 8;
879 }
880 int FPCXTSaveSize =
881 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
882 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
883}
884
886 MachineBasicBlock &MBB) const {
888 MachineFrameInfo &MFI = MF.getFrameInfo();
890 const TargetMachine &TM = MF.getTarget();
891 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
894 "This emitPrologue does not support Thumb1!");
895 bool isARM = !AFI->isThumbFunction();
896 Align Alignment = STI.getFrameLowering()->getStackAlign();
897 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
898 unsigned NumBytes = MFI.getStackSize();
899 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
900 int FPCXTSaveSize = 0;
901 bool NeedsWinCFI = needsWinCFI(MF);
904
905 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
906
907 // Debug location must be unknown since the first debug location is used
908 // to determine the end of the prologue.
909 DebugLoc dl;
910
911 Register FramePtr = RegInfo->getFrameRegister(MF);
912
913 // Determine the sizes of each callee-save spill areas and record which frame
914 // belongs to which callee-save spill areas.
915 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
916 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
917 int FramePtrSpillFI = 0;
918 int D8SpillFI = 0;
919
920 // All calls are tail calls in GHC calling conv, and functions have no
921 // prologue/epilogue.
923 return;
924
925 StackAdjustingInsts DefCFAOffsetCandidates;
926 bool HasFP = hasFP(MF);
927
928 if (!AFI->hasStackFrame() &&
929 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
930 if (NumBytes != 0) {
931 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
933 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
934 }
935 if (!NeedsWinCFI)
936 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
937 if (NeedsWinCFI && MBBI != MBB.begin()) {
939 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
941 MF.setHasWinCFI(true);
942 }
943 return;
944 }
945
946 // Determine spill area sizes, and some important frame indices.
947 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
948 bool BeforeFPPush = true;
949 for (const CalleeSavedInfo &I : CSI) {
950 MCRegister Reg = I.getReg();
951 int FI = I.getFrameIdx();
952
953 SpillArea Area = getSpillArea(Reg, PushPopSplit,
954 AFI->getNumAlignedDPRCS2Regs(), RegInfo);
955
956 if (Reg == FramePtr.asMCReg()) {
957 FramePtrSpillFI = FI;
958 FramePtrSpillArea = Area;
959 }
960 if (Reg == ARM::D8)
961 D8SpillFI = FI;
962
963 switch (Area) {
964 case SpillArea::FPCXT:
965 FPCXTSaveSize += 4;
966 break;
967 case SpillArea::GPRCS1:
968 GPRCS1Size += 4;
969 break;
970 case SpillArea::GPRCS2:
971 GPRCS2Size += 4;
972 break;
973 case SpillArea::FPStatus:
974 FPStatusSize += 4;
975 break;
976 case SpillArea::DPRCS1:
977 DPRCS1Size += 8;
978 break;
979 case SpillArea::GPRCS3:
980 GPRCS3Size += 4;
981 break;
982 case SpillArea::DPRCS2:
983 DPRCS2Size += 8;
984 break;
985 }
986 }
987
988 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
989 DPRCS1Push, GPRCS3Push;
990
991 // Move past the PAC computation.
992 if (AFI->shouldSignReturnAddress())
993 LastPush = MBBI++;
994
995 // Move past FPCXT area.
996 if (FPCXTSaveSize > 0) {
997 LastPush = MBBI++;
998 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
999 }
1000
1001 // Allocate the vararg register save area.
1002 if (ArgRegsSaveSize) {
1003 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
1005 LastPush = std::prev(MBBI);
1006 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
1007 }
1008
1009 // Move past area 1.
1010 if (GPRCS1Size > 0) {
1011 GPRCS1Push = LastPush = MBBI++;
1012 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
1013 if (FramePtrSpillArea == SpillArea::GPRCS1)
1014 BeforeFPPush = false;
1015 }
1016
1017 // Determine starting offsets of spill areas. These offsets are all positive
1018 // offsets from the bottom of the lowest-addressed callee-save area
1019 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1020 // of the spill area in question.
1021 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1022 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1023 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1024 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1025
1026 Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4);
1027 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1028 GPRCS2Size + FPStatusSize) %
1029 DPRAlign.value();
1030
1031 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1032
1033 if (HasFP) {
1034 // Offset from the CFA to the saved frame pointer, will be negative.
1035 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
1036 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1037 << ", FPOffset: " << FPOffset << "\n");
1038 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1039 "Max FP estimation is wrong");
1040 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
1041 NumBytes);
1042 }
1043 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1044 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1045 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1046
1047 // Move past area 2.
1048 if (GPRCS2Size > 0) {
1050 GPRCS2Push = LastPush = MBBI++;
1051 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1052 if (FramePtrSpillArea == SpillArea::GPRCS2)
1053 BeforeFPPush = false;
1054 }
1055
1056 // Move past FP status save area.
1057 if (FPStatusSize > 0) {
1058 while (MBBI != MBB.end()) {
1059 unsigned Opc = MBBI->getOpcode();
1060 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1061 MBBI++;
1062 else
1063 break;
1064 }
1065 LastPush = MBBI++;
1066 DefCFAOffsetCandidates.addInst(LastPush, FPStatusSize);
1067 }
1068
1069 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1070 // .cfi_offset operations will reflect that.
1071 if (DPRGapSize) {
1072 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1073 if (LastPush != MBB.end() &&
1074 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
1075 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
1076 else {
1077 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
1079 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
1080 }
1081 }
1082
1083 // Move past DPRCS1Size.
1084 if (DPRCS1Size > 0) {
1085 // Since vpush register list cannot have gaps, there may be multiple vpush
1086 // instructions in the prologue.
1087 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1088 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
1089 BeforeFPPush);
1090 DPRCS1Push = LastPush = MBBI++;
1091 }
1092 }
1093
1094 // Move past the aligned DPRCS2 area.
1095 if (DPRCS2Size > 0) {
1097 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1098 // leaves the stack pointer pointing to the DPRCS2 area.
1099 //
1100 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1101 NumBytes += MFI.getObjectOffset(D8SpillFI);
1102 } else
1103 NumBytes = DPRCS1Offset;
1104
1105 // Move GPRCS3, if using using SplitR11WindowsSEH.
1106 if (GPRCS3Size > 0) {
1108 GPRCS3Push = LastPush = MBBI++;
1109 DefCFAOffsetCandidates.addInst(LastPush, GPRCS3Size, BeforeFPPush);
1110 if (FramePtrSpillArea == SpillArea::GPRCS3)
1111 BeforeFPPush = false;
1112 }
1113
1114 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1115 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1116 NeedsWinCFIStackAlloc = false;
1117
1118 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
1119 uint32_t NumWords = NumBytes >> 2;
1120
1121 if (NumWords < 65536) {
1122 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1123 .addImm(NumWords)
1126 } else {
1127 // Split into two instructions here, instead of using t2MOVi32imm,
1128 // to allow inserting accurate SEH instructions (including accurate
1129 // instruction size for each of them).
1130 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1131 .addImm(NumWords & 0xffff)
1134 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
1135 .addReg(ARM::R4)
1136 .addImm(NumWords >> 16)
1139 }
1140
1141 switch (TM.getCodeModel()) {
1142 case CodeModel::Tiny:
1143 llvm_unreachable("Tiny code model not available on ARM.");
1144 case CodeModel::Small:
1145 case CodeModel::Medium:
1146 case CodeModel::Kernel:
1147 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1149 .addExternalSymbol("__chkstk")
1150 .addReg(ARM::R4, RegState::Implicit)
1152 break;
1153 case CodeModel::Large:
1154 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1155 .addExternalSymbol("__chkstk")
1157
1158 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1160 .addReg(ARM::R12, RegState::Kill)
1161 .addReg(ARM::R4, RegState::Implicit)
1163 break;
1164 }
1165
1166 MachineInstrBuilder Instr, SEH;
1167 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1168 .addReg(ARM::SP, RegState::Kill)
1169 .addReg(ARM::R4, RegState::Kill)
1172 .add(condCodeOp());
1173 if (NeedsWinCFIStackAlloc) {
1174 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1175 .addImm(NumBytes)
1176 .addImm(/*Wide=*/1)
1178 MBB.insertAfter(Instr, SEH);
1179 }
1180 NumBytes = 0;
1181 }
1182
1183 if (NumBytes) {
1184 // Adjust SP after all the callee-save spills.
1185 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1186 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1187 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1188 else {
1189 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1191 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1192 }
1193
1194 if (HasFP && isARM)
1195 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1196 // Note it's not safe to do this in Thumb2 mode because it would have
1197 // taken two instructions:
1198 // mov sp, r7
1199 // sub sp, #24
1200 // If an interrupt is taken between the two instructions, then sp is in
1201 // an inconsistent state (pointing to the middle of callee-saved area).
1202 // The interrupt handler can end up clobbering the registers.
1203 AFI->setShouldRestoreSPFromFP(true);
1204 }
1205
1206 // Set FP to point to the stack slot that contains the previous FP.
1207 // For iOS, FP is R7, which has now been stored in spill area 1.
1208 // Otherwise, if this is not iOS, all the callee-saved registers go
1209 // into spill area 1, including the FP in R11. In either case, it
1210 // is in area one and the adjustment needs to take place just after
1211 // that push.
1213 if (HasFP) {
1214 MachineBasicBlock::iterator FPPushInst;
1215 // Offset from SP immediately after the push which saved the FP to the FP
1216 // save slot.
1217 int64_t FPOffsetAfterPush;
1218 switch (FramePtrSpillArea) {
1219 case SpillArea::GPRCS1:
1220 FPPushInst = GPRCS1Push;
1221 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1222 ArgRegsSaveSize + FPCXTSaveSize +
1223 sizeOfSPAdjustment(*FPPushInst);
1224 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1225 << FPOffsetAfterPush << " after that push\n");
1226 break;
1227 case SpillArea::GPRCS2:
1228 FPPushInst = GPRCS2Push;
1229 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1230 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1231 sizeOfSPAdjustment(*FPPushInst);
1232 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1233 << FPOffsetAfterPush << " after that push\n");
1234 break;
1235 case SpillArea::GPRCS3:
1236 FPPushInst = GPRCS3Push;
1237 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1238 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1239 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1240 sizeOfSPAdjustment(*FPPushInst);
1241 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1242 << FPOffsetAfterPush << " after that push\n");
1243 break;
1244 default:
1245 llvm_unreachable("frame pointer in unknown spill area");
1246 break;
1247 }
1248 AfterPush = std::next(FPPushInst);
1249 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1250 assert(FPOffsetAfterPush == 0);
1251
1252 // Emit the MOV or ADD to set up the frame pointer register.
1253 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1254 FramePtr, ARM::SP, FPOffsetAfterPush,
1256
1257 if (!NeedsWinCFI) {
1258 // Emit DWARF info to find the CFA using the frame pointer from this
1259 // point onward.
1260 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1261 if (FPOffsetAfterPush != 0)
1262 CFIBuilder.buildDefCFA(FramePtr, -MFI.getObjectOffset(FramePtrSpillFI));
1263 else
1264 CFIBuilder.buildDefCFARegister(FramePtr);
1265 }
1266 }
1267
1268 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1269 // instructions below don't need to be replayed to unwind the stack.
1270 if (NeedsWinCFI && MBBI != MBB.begin()) {
1272 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1273 End = AfterPush;
1275 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1277 MF.setHasWinCFI(true);
1278 }
1279
1280 // Now that the prologue's actual instructions are finalised, we can insert
1281 // the necessary DWARF cf instructions to describe the situation. Start by
1282 // recording where each register ended up:
1283 if (!NeedsWinCFI) {
1284 for (const auto &Entry : reverse(CSI)) {
1285 MCRegister Reg = Entry.getReg();
1286 int FI = Entry.getFrameIdx();
1288 switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
1289 RegInfo)) {
1290 case SpillArea::GPRCS1:
1291 CFIPos = std::next(GPRCS1Push);
1292 break;
1293 case SpillArea::GPRCS2:
1294 CFIPos = std::next(GPRCS2Push);
1295 break;
1296 case SpillArea::DPRCS1:
1297 CFIPos = std::next(DPRCS1Push);
1298 break;
1299 case SpillArea::GPRCS3:
1300 CFIPos = std::next(GPRCS3Push);
1301 break;
1302 case SpillArea::FPStatus:
1303 case SpillArea::FPCXT:
1304 case SpillArea::DPRCS2:
1305 // FPCXT and DPRCS2 are not represented in the DWARF info.
1306 break;
1307 }
1308
1309 if (CFIPos.isValid()) {
1311 .buildOffset(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1312 MFI.getObjectOffset(FI));
1313 }
1314 }
1315 }
1316
1317 // Now we can emit descriptions of where the canonical frame address was
1318 // throughout the process. If we have a frame pointer, it takes over the job
1319 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1320 // actually get emitted.
1321 if (!NeedsWinCFI) {
1322 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1323 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1324 }
1325
1326 if (STI.isTargetELF() && hasFP(MF))
1328 AFI->getFramePtrSpillOffset());
1329
1330 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1331 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1332 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1333 AFI->setFPStatusSavesSize(FPStatusSize);
1334 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1335 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1336 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1337
1338 // If we need dynamic stack realignment, do it here. Be paranoid and make
1339 // sure if we also have VLAs, we have a base pointer for frame access.
1340 // If aligned NEON registers were spilled, the stack has already been
1341 // realigned.
1342 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1343 Align MaxAlign = MFI.getMaxAlign();
1345 if (!AFI->isThumbFunction()) {
1346 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1347 false);
1348 } else {
1349 // We cannot use sp as source/dest register here, thus we're using r4 to
1350 // perform the calculations. We're emitting the following sequence:
1351 // mov r4, sp
1352 // -- use emitAligningInstructions to produce best sequence to zero
1353 // -- out lower bits in r4
1354 // mov sp, r4
1355 // FIXME: It will be better just to find spare register here.
1356 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1357 .addReg(ARM::SP, RegState::Kill)
1359 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1360 false);
1361 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1362 .addReg(ARM::R4, RegState::Kill)
1364 }
1365
1366 AFI->setShouldRestoreSPFromFP(true);
1367 }
1368
1369 // If we need a base pointer, set it up here. It's whatever the value
1370 // of the stack pointer is at this point. Any variable size objects
1371 // will be allocated after this, so we can still use the base pointer
1372 // to reference locals.
1373 // FIXME: Clarify FrameSetup flags here.
1374 if (RegInfo->hasBasePointer(MF)) {
1375 if (isARM)
1376 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1377 .addReg(ARM::SP)
1379 .add(condCodeOp());
1380 else
1381 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1382 .addReg(ARM::SP)
1384 }
1385
1386 // If the frame has variable sized objects then the epilogue must restore
1387 // the sp from fp. We can assume there's an FP here since hasFP already
1388 // checks for hasVarSizedObjects.
1389 if (MFI.hasVarSizedObjects())
1390 AFI->setShouldRestoreSPFromFP(true);
1391}
1392
1394 MachineBasicBlock &MBB) const {
1395 MachineFrameInfo &MFI = MF.getFrameInfo();
1397 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1398 const ARMBaseInstrInfo &TII =
1399 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1400 assert(!AFI->isThumb1OnlyFunction() &&
1401 "This emitEpilogue does not support Thumb1!");
1402 bool isARM = !AFI->isThumbFunction();
1405
1406 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1407
1408 // Amount of stack space we reserved next to incoming args for either
1409 // varargs registers or stack arguments in tail calls made by this function.
1410 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1411
1412 // How much of the stack used by incoming arguments this function is expected
1413 // to restore in this particular epilogue.
1414 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1415 int NumBytes = (int)MFI.getStackSize();
1416 Register FramePtr = RegInfo->getFrameRegister(MF);
1417
1418 // All calls are tail calls in GHC calling conv, and functions have no
1419 // prologue/epilogue.
1421 return;
1422
1423 // First put ourselves on the first (from top) terminator instructions.
1425 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1426
1427 MachineBasicBlock::iterator RangeStart;
1428 if (!AFI->hasStackFrame()) {
1429 if (MF.hasWinCFI()) {
1430 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1432 RangeStart = initMBBRange(MBB, MBBI);
1433 }
1434
1435 if (NumBytes + IncomingArgStackToRestore != 0)
1436 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1437 NumBytes + IncomingArgStackToRestore,
1439 } else {
1440 // Unwind MBBI to point to first LDR / VLDRD.
1441 if (MBBI != MBB.begin()) {
1442 do {
1443 --MBBI;
1444 } while (MBBI != MBB.begin() &&
1446 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1447 ++MBBI;
1448 }
1449
1450 if (MF.hasWinCFI()) {
1451 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1453 RangeStart = initMBBRange(MBB, MBBI);
1454 }
1455
1456 // Move SP to start of FP callee save spill area.
1457 NumBytes -=
1458 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1462
1463 // Reset SP based on frame pointer only if the stack frame extends beyond
1464 // frame pointer stack slot or target is ELF and the function has FP.
1465 if (AFI->shouldRestoreSPFromFP()) {
1466 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1467 if (NumBytes) {
1468 if (isARM)
1469 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1470 ARMCC::AL, 0, TII,
1472 else {
1473 // It's not possible to restore SP from FP in a single instruction.
1474 // For iOS, this looks like:
1475 // mov sp, r7
1476 // sub sp, #24
1477 // This is bad, if an interrupt is taken after the mov, sp is in an
1478 // inconsistent state.
1479 // Use the first callee-saved register as a scratch register.
1480 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1481 "No scratch register to restore SP from FP!");
1482 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1484 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1485 .addReg(ARM::R4)
1488 }
1489 } else {
1490 // Thumb2 or ARM.
1491 if (isARM)
1492 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1495 .add(condCodeOp())
1497 else
1498 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1502 }
1503 } else if (NumBytes &&
1504 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1505 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1507
1508 // Increment past our save areas.
1509 if (AFI->getGPRCalleeSavedArea3Size()) {
1511 (void)PushPopSplit;
1512 MBBI++;
1513 }
1514
1515 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1516 MBBI++;
1517 // Since vpop register list cannot have gaps, there may be multiple vpop
1518 // instructions in the epilogue.
1519 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1520 MBBI++;
1521 }
1522 if (AFI->getDPRCalleeSavedGapSize()) {
1523 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1524 "unexpected DPR alignment gap");
1525 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1527 }
1528
1529 if (AFI->getGPRCalleeSavedArea2Size()) {
1531 (void)PushPopSplit;
1532 MBBI++;
1533 }
1534 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1535
1536 if (ReservedArgStack || IncomingArgStackToRestore) {
1537 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1538 "attempting to restore negative stack amount");
1539 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1540 ReservedArgStack + IncomingArgStackToRestore,
1542 }
1543
1544 // Validate PAC, It should have been already popped into R12. For CMSE entry
1545 // function, the validation instruction is emitted during expansion of the
1546 // tBXNS_RET, since the validation must use the value of SP at function
1547 // entry, before saving, resp. after restoring, FPCXTNS.
1548 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1549 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1550 }
1551
1552 if (MF.hasWinCFI()) {
1554 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1556 }
1557}
1558
1559/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1560/// debug info. It's the same as what we use for resolving the code-gen
1561/// references for now. FIXME: This can go wrong when references are
1562/// SP-relative and simple call frames aren't used.
1564 int FI,
1565 Register &FrameReg) const {
1566 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1567}
1568
1570 int FI, Register &FrameReg,
1571 int SPAdj) const {
1572 const MachineFrameInfo &MFI = MF.getFrameInfo();
1573 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1575 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1576 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1577 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1578 bool isFixed = MFI.isFixedObjectIndex(FI);
1579
1580 FrameReg = ARM::SP;
1581 Offset += SPAdj;
1582
1583 // SP can move around if there are allocas. We may also lose track of SP
1584 // when emergency spilling inside a non-reserved call frame setup.
1585 bool hasMovingSP = !hasReservedCallFrame(MF);
1586
1587 // When dynamically realigning the stack, use the frame pointer for
1588 // parameters, and the stack/base pointer for locals.
1589 if (RegInfo->hasStackRealignment(MF)) {
1590 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1591 if (isFixed) {
1592 FrameReg = RegInfo->getFrameRegister(MF);
1593 Offset = FPOffset;
1594 } else if (hasMovingSP) {
1595 assert(RegInfo->hasBasePointer(MF) &&
1596 "VLAs and dynamic stack alignment, but missing base pointer!");
1597 FrameReg = RegInfo->getBaseRegister();
1598 Offset -= SPAdj;
1599 }
1600 return Offset;
1601 }
1602
1603 // If there is a frame pointer, use it when we can.
1604 if (hasFP(MF) && AFI->hasStackFrame()) {
1605 // Use frame pointer to reference fixed objects. Use it for locals if
1606 // there are VLAs (and thus the SP isn't reliable as a base).
1607 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1608 FrameReg = RegInfo->getFrameRegister(MF);
1609 return FPOffset;
1610 } else if (hasMovingSP) {
1611 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1612 if (AFI->isThumb2Function()) {
1613 // Try to use the frame pointer if we can, else use the base pointer
1614 // since it's available. This is handy for the emergency spill slot, in
1615 // particular.
1616 if (FPOffset >= -255 && FPOffset < 0) {
1617 FrameReg = RegInfo->getFrameRegister(MF);
1618 return FPOffset;
1619 }
1620 }
1621 } else if (AFI->isThumbFunction()) {
1622 // Prefer SP to base pointer, if the offset is suitably aligned and in
1623 // range as the effective range of the immediate offset is bigger when
1624 // basing off SP.
1625 // Use add <rd>, sp, #<imm8>
1626 // ldr <rd>, [sp, #<imm8>]
1627 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1628 return Offset;
1629 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1630 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1631 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1632 FrameReg = RegInfo->getFrameRegister(MF);
1633 return FPOffset;
1634 }
1635 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1636 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1637 FrameReg = RegInfo->getFrameRegister(MF);
1638 return FPOffset;
1639 }
1640 }
1641 // Use the base pointer if we have one.
1642 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1643 // That can happen if we forced a base pointer for a large call frame.
1644 if (RegInfo->hasBasePointer(MF)) {
1645 FrameReg = RegInfo->getBaseRegister();
1646 Offset -= SPAdj;
1647 }
1648 return Offset;
1649}
1650
1651void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1654 unsigned StmOpc, unsigned StrOpc,
1655 bool NoGap,
1656 function_ref<bool(unsigned)> Func) const {
1657 MachineFunction &MF = *MBB.getParent();
1660
1661 DebugLoc DL;
1662
1663 using RegAndKill = std::pair<unsigned, bool>;
1664
1666 unsigned i = CSI.size();
1667 while (i != 0) {
1668 unsigned LastReg = 0;
1669 for (; i != 0; --i) {
1670 MCRegister Reg = CSI[i-1].getReg();
1671 if (!Func(Reg))
1672 continue;
1673
1674 const MachineRegisterInfo &MRI = MF.getRegInfo();
1675 bool isLiveIn = MRI.isLiveIn(Reg);
1676 if (!isLiveIn && !MRI.isReserved(Reg))
1677 MBB.addLiveIn(Reg);
1678 // If NoGap is true, push consecutive registers and then leave the rest
1679 // for other instructions. e.g.
1680 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1681 if (NoGap && LastReg && LastReg != Reg-1)
1682 break;
1683 LastReg = Reg;
1684 // Do not set a kill flag on values that are also marked as live-in. This
1685 // happens with the @llvm-returnaddress intrinsic and with arguments
1686 // passed in callee saved registers.
1687 // Omitting the kill flags is conservatively correct even if the live-in
1688 // is not used after all.
1689 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1690 }
1691
1692 if (Regs.empty())
1693 continue;
1694
1695 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1696 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1697 });
1698
1699 if (Regs.size() > 1 || StrOpc== 0) {
1700 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1701 .addReg(ARM::SP)
1704 for (const auto &[Reg, Kill] : Regs)
1705 MIB.addReg(Reg, getKillRegState(Kill));
1706 } else if (Regs.size() == 1) {
1707 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1708 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1709 .addReg(ARM::SP)
1711 .addImm(-4)
1713 }
1714 Regs.clear();
1715
1716 // Put any subsequent vpush instructions before this one: they will refer to
1717 // higher register numbers so need to be pushed first in order to preserve
1718 // monotonicity.
1719 if (MI != MBB.begin())
1720 --MI;
1721 }
1722}
1723
1724void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1727 unsigned LdmOpc, unsigned LdrOpc,
1728 bool isVarArg, bool NoGap,
1729 function_ref<bool(unsigned)> Func) const {
1730 MachineFunction &MF = *MBB.getParent();
1734 bool hasPAC = AFI->shouldSignReturnAddress();
1735 DebugLoc DL;
1736 bool isTailCall = false;
1737 bool isInterrupt = false;
1738 bool isTrap = false;
1739 bool isCmseEntry = false;
1742 if (MBB.end() != MI) {
1743 DL = MI->getDebugLoc();
1744 unsigned RetOpcode = MI->getOpcode();
1745 isTailCall =
1746 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1747 RetOpcode == ARM::TCRETURNrinotr12);
1748 isInterrupt =
1749 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1750 isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::tTRAP;
1751 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1752 }
1753
1755 unsigned i = CSI.size();
1756 while (i != 0) {
1757 unsigned LastReg = 0;
1758 bool DeleteRet = false;
1759 for (; i != 0; --i) {
1760 CalleeSavedInfo &Info = CSI[i-1];
1761 MCRegister Reg = Info.getReg();
1762 if (!Func(Reg))
1763 continue;
1764
1765 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1766 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1767 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1768 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1769 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1770 Reg = ARM::PC;
1771 // Fold the return instruction into the LDM.
1772 DeleteRet = true;
1773 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1774 }
1775
1776 // If NoGap is true, pop consecutive registers and then leave the rest
1777 // for other instructions. e.g.
1778 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1779 if (NoGap && LastReg && LastReg != Reg-1)
1780 break;
1781
1782 LastReg = Reg;
1783 Regs.push_back(Reg);
1784 }
1785
1786 if (Regs.empty())
1787 continue;
1788
1789 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1790 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1791 });
1792
1793 if (Regs.size() > 1 || LdrOpc == 0) {
1794 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1795 .addReg(ARM::SP)
1798 for (unsigned Reg : Regs)
1799 MIB.addReg(Reg, getDefRegState(true));
1800 if (DeleteRet) {
1801 if (MI != MBB.end()) {
1802 MIB.copyImplicitOps(*MI);
1803 MI->eraseFromParent();
1804 }
1805 }
1806 MI = MIB;
1807 } else if (Regs.size() == 1) {
1808 // If we adjusted the reg to PC from LR above, switch it back here. We
1809 // only do that for LDM.
1810 if (Regs[0] == ARM::PC)
1811 Regs[0] = ARM::LR;
1813 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1814 .addReg(ARM::SP, RegState::Define)
1815 .addReg(ARM::SP)
1817 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1818 // that refactoring is complete (eventually).
1819 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1820 MIB.addReg(0);
1822 } else
1823 MIB.addImm(4);
1824 MIB.add(predOps(ARMCC::AL));
1825 }
1826 Regs.clear();
1827
1828 // Put any subsequent vpop instructions after this one: they will refer to
1829 // higher register numbers so need to be popped afterwards.
1830 if (MI != MBB.end())
1831 ++MI;
1832 }
1833}
1834
1835void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1838 unsigned PushOpc) const {
1839 MachineFunction &MF = *MBB.getParent();
1841
1843 auto RegPresent = [&CSI](MCRegister Reg) {
1844 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1845 return C.getReg() == Reg;
1846 });
1847 };
1848
1849 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1850 // instruction.
1851 if (RegPresent(ARM::FPSCR)) {
1852 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS), ARM::R4)
1855
1856 Regs.push_back(ARM::R4);
1857 }
1858
1859 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1860 // VMRS_FPEXC instruction.
1861 if (RegPresent(ARM::FPEXC)) {
1862 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS_FPEXC), ARM::R5)
1865
1866 Regs.push_back(ARM::R5);
1867 }
1868
1869 // If neither FPSCR and FPEXC are present, then do nothing.
1870 if (Regs.size() == 0)
1871 return;
1872
1873 // Push both R4 and R5 onto the stack, if present.
1875 BuildMI(MBB, MI, DebugLoc(), TII.get(PushOpc), ARM::SP)
1876 .addReg(ARM::SP)
1879
1880 for (Register Reg : Regs) {
1881 MIB.addReg(Reg);
1882 }
1883}
1884
1885void ARMFrameLowering::emitFPStatusRestores(
1887 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1888 MachineFunction &MF = *MBB.getParent();
1890
1891 auto RegPresent = [&CSI](MCRegister Reg) {
1892 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1893 return C.getReg() == Reg;
1894 });
1895 };
1896
1897 // Do nothing if we don't need to restore any FP status registers.
1898 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1899 return;
1900
1901 // Pop registers off of the stack.
1903 BuildMI(MBB, MI, DebugLoc(), TII.get(LdmOpc), ARM::SP)
1904 .addReg(ARM::SP)
1907
1908 // If FPSCR was saved, it will be popped into R4.
1909 if (RegPresent(ARM::FPSCR)) {
1910 MIB.addReg(ARM::R4, RegState::Define);
1911 }
1912
1913 // If FPEXC was saved, it will be popped into R5.
1914 if (RegPresent(ARM::FPEXC)) {
1915 MIB.addReg(ARM::R5, RegState::Define);
1916 }
1917
1918 // Move the FPSCR value back into the register with the VMSR instruction.
1919 if (RegPresent(ARM::FPSCR)) {
1920 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR))
1921 .addReg(ARM::R4)
1924 }
1925
1926 // Move the FPEXC value back into the register with the VMSR_FPEXC
1927 // instruction.
1928 if (RegPresent(ARM::FPEXC)) {
1929 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR_FPEXC))
1930 .addReg(ARM::R5)
1933 }
1934}
1935
1936/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1937/// starting from d8. Also insert stack realignment code and leave the stack
1938/// pointer pointing to the d8 spill slot.
1941 unsigned NumAlignedDPRCS2Regs,
1943 const TargetRegisterInfo *TRI) {
1944 MachineFunction &MF = *MBB.getParent();
1946 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1948 MachineFrameInfo &MFI = MF.getFrameInfo();
1949
1950 // Mark the D-register spill slots as properly aligned. Since MFI computes
1951 // stack slot layout backwards, this can actually mean that the d-reg stack
1952 // slot offsets can be wrong. The offset for d8 will always be correct.
1953 for (const CalleeSavedInfo &I : CSI) {
1954 unsigned DNum = I.getReg() - ARM::D8;
1955 if (DNum > NumAlignedDPRCS2Regs - 1)
1956 continue;
1957 int FI = I.getFrameIdx();
1958 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1959 // registers will be 8-byte aligned.
1960 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1961
1962 // The stack slot for D8 needs to be maximally aligned because this is
1963 // actually the point where we align the stack pointer. MachineFrameInfo
1964 // computes all offsets relative to the incoming stack pointer which is a
1965 // bit weird when realigning the stack. Any extra padding for this
1966 // over-alignment is not realized because the code inserted below adjusts
1967 // the stack pointer by numregs * 8 before aligning the stack pointer.
1968 if (DNum == 0)
1969 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1970 }
1971
1972 // Move the stack pointer to the d8 spill slot, and align it at the same
1973 // time. Leave the stack slot address in the scratch register r4.
1974 //
1975 // sub r4, sp, #numregs * 8
1976 // bic r4, r4, #align - 1
1977 // mov sp, r4
1978 //
1979 bool isThumb = AFI->isThumbFunction();
1980 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1981 AFI->setShouldRestoreSPFromFP(true);
1982
1983 // sub r4, sp, #numregs * 8
1984 // The immediate is <= 64, so it doesn't need any special encoding.
1985 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1986 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1987 .addReg(ARM::SP)
1988 .addImm(8 * NumAlignedDPRCS2Regs)
1990 .add(condCodeOp());
1991
1992 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1993 // We must set parameter MustBeSingleInstruction to true, since
1994 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1995 // stack alignment. Luckily, this can always be done since all ARM
1996 // architecture versions that support Neon also support the BFC
1997 // instruction.
1998 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1999
2000 // mov sp, r4
2001 // The stack pointer must be adjusted before spilling anything, otherwise
2002 // the stack slots could be clobbered by an interrupt handler.
2003 // Leave r4 live, it is used below.
2004 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2005 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
2006 .addReg(ARM::R4)
2008 if (!isThumb)
2009 MIB.add(condCodeOp());
2010
2011 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2012 // r4 holds the stack slot address.
2013 unsigned NextReg = ARM::D8;
2014
2015 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2016 // The writeback is only needed when emitting two vst1.64 instructions.
2017 if (NumAlignedDPRCS2Regs >= 6) {
2018 MCRegister SupReg =
2019 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2020 MBB.addLiveIn(SupReg);
2021 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
2022 .addReg(ARM::R4, RegState::Kill)
2023 .addImm(16)
2024 .addReg(NextReg)
2027 NextReg += 4;
2028 NumAlignedDPRCS2Regs -= 4;
2029 }
2030
2031 // We won't modify r4 beyond this point. It currently points to the next
2032 // register to be spilled.
2033 unsigned R4BaseReg = NextReg;
2034
2035 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2036 if (NumAlignedDPRCS2Regs >= 4) {
2037 MCRegister SupReg =
2038 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2039 MBB.addLiveIn(SupReg);
2040 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
2041 .addReg(ARM::R4)
2042 .addImm(16)
2043 .addReg(NextReg)
2046 NextReg += 4;
2047 NumAlignedDPRCS2Regs -= 4;
2048 }
2049
2050 // 16-byte aligned vst1.64 with 2 d-regs.
2051 if (NumAlignedDPRCS2Regs >= 2) {
2052 MCRegister SupReg =
2053 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2054 MBB.addLiveIn(SupReg);
2055 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
2056 .addReg(ARM::R4)
2057 .addImm(16)
2058 .addReg(SupReg)
2060 NextReg += 2;
2061 NumAlignedDPRCS2Regs -= 2;
2062 }
2063
2064 // Finally, use a vanilla vstr.64 for the odd last register.
2065 if (NumAlignedDPRCS2Regs) {
2066 MBB.addLiveIn(NextReg);
2067 // vstr.64 uses addrmode5 which has an offset scale of 4.
2068 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
2069 .addReg(NextReg)
2070 .addReg(ARM::R4)
2071 .addImm((NextReg - R4BaseReg) * 2)
2073 }
2074
2075 // The last spill instruction inserted should kill the scratch register r4.
2076 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2077}
2078
2079/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2080/// iterator to the following instruction.
2083 unsigned NumAlignedDPRCS2Regs) {
2084 // sub r4, sp, #numregs * 8
2085 // bic r4, r4, #align - 1
2086 // mov sp, r4
2087 ++MI; ++MI; ++MI;
2088 assert(MI->mayStore() && "Expecting spill instruction");
2089
2090 // These switches all fall through.
2091 switch(NumAlignedDPRCS2Regs) {
2092 case 7:
2093 ++MI;
2094 assert(MI->mayStore() && "Expecting spill instruction");
2095 [[fallthrough]];
2096 default:
2097 ++MI;
2098 assert(MI->mayStore() && "Expecting spill instruction");
2099 [[fallthrough]];
2100 case 1:
2101 case 2:
2102 case 4:
2103 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2104 ++MI;
2105 }
2106 return MI;
2107}
2108
2109/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2110/// starting from d8. These instructions are assumed to execute while the
2111/// stack is still aligned, unlike the code inserted by emitPopInst.
2114 unsigned NumAlignedDPRCS2Regs,
2116 const TargetRegisterInfo *TRI) {
2117 MachineFunction &MF = *MBB.getParent();
2119 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2121
2122 // Find the frame index assigned to d8.
2123 int D8SpillFI = 0;
2124 for (const CalleeSavedInfo &I : CSI)
2125 if (I.getReg() == ARM::D8) {
2126 D8SpillFI = I.getFrameIdx();
2127 break;
2128 }
2129
2130 // Materialize the address of the d8 spill slot into the scratch register r4.
2131 // This can be fairly complicated if the stack frame is large, so just use
2132 // the normal frame index elimination mechanism to do it. This code runs as
2133 // the initial part of the epilog where the stack and base pointers haven't
2134 // been changed yet.
2135 bool isThumb = AFI->isThumbFunction();
2136 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2137
2138 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2139 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
2140 .addFrameIndex(D8SpillFI)
2141 .addImm(0)
2143 .add(condCodeOp());
2144
2145 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2146 unsigned NextReg = ARM::D8;
2147
2148 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2149 if (NumAlignedDPRCS2Regs >= 6) {
2150 MCRegister SupReg =
2151 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2152 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
2153 .addReg(ARM::R4, RegState::Define)
2154 .addReg(ARM::R4, RegState::Kill)
2155 .addImm(16)
2158 NextReg += 4;
2159 NumAlignedDPRCS2Regs -= 4;
2160 }
2161
2162 // We won't modify r4 beyond this point. It currently points to the next
2163 // register to be spilled.
2164 unsigned R4BaseReg = NextReg;
2165
2166 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2167 if (NumAlignedDPRCS2Regs >= 4) {
2168 MCRegister SupReg =
2169 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2170 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
2171 .addReg(ARM::R4)
2172 .addImm(16)
2175 NextReg += 4;
2176 NumAlignedDPRCS2Regs -= 4;
2177 }
2178
2179 // 16-byte aligned vld1.64 with 2 d-regs.
2180 if (NumAlignedDPRCS2Regs >= 2) {
2181 MCRegister SupReg =
2182 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2183 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
2184 .addReg(ARM::R4)
2185 .addImm(16)
2187 NextReg += 2;
2188 NumAlignedDPRCS2Regs -= 2;
2189 }
2190
2191 // Finally, use a vanilla vldr.64 for the remaining odd register.
2192 if (NumAlignedDPRCS2Regs)
2193 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
2194 .addReg(ARM::R4)
2195 .addImm(2 * (NextReg - R4BaseReg))
2197
2198 // Last store kills r4.
2199 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2200}
2201
2205 if (CSI.empty())
2206 return false;
2207
2208 MachineFunction &MF = *MBB.getParent();
2212 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2213
2214 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2215 unsigned PushOneOpc = AFI->isThumbFunction() ?
2216 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2217 unsigned FltOpc = ARM::VSTMDDB_UPD;
2218 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2219 // Compute PAC in R12.
2220 if (AFI->shouldSignReturnAddress()) {
2221 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
2223 }
2224 // Save the non-secure floating point context.
2225 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2226 return C.getReg() == ARM::FPCXTNS;
2227 })) {
2228 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2229 ARM::SP)
2230 .addReg(ARM::SP)
2231 .addImm(-4)
2233 }
2234
2235 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2236 RegInfo](unsigned Reg, SpillArea TestArea) {
2237 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2238 TestArea;
2239 };
2240 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2241 return CheckRegArea(Reg, SpillArea::GPRCS1);
2242 };
2243 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2244 return CheckRegArea(Reg, SpillArea::GPRCS2);
2245 };
2246 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2247 return CheckRegArea(Reg, SpillArea::DPRCS1);
2248 };
2249 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2250 return CheckRegArea(Reg, SpillArea::GPRCS3);
2251 };
2252
2253 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
2254 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
2255 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2256 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
2257 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS3);
2258
2259 // The code above does not insert spill code for the aligned DPRCS2 registers.
2260 // The stack realignment code will be inserted between the push instructions
2261 // and these spills.
2262 if (NumAlignedDPRCS2Regs)
2263 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2264
2265 return true;
2266}
2267
2271 if (CSI.empty())
2272 return false;
2273
2274 MachineFunction &MF = *MBB.getParent();
2276 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2277
2278 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2279 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2282
2283 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2284 // registers. Do that here instead.
2285 if (NumAlignedDPRCS2Regs)
2286 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2287
2288 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2289 unsigned LdrOpc =
2290 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2291 unsigned FltOpc = ARM::VLDMDIA_UPD;
2292
2293 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2294 RegInfo](unsigned Reg, SpillArea TestArea) {
2295 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2296 TestArea;
2297 };
2298 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2299 return CheckRegArea(Reg, SpillArea::GPRCS1);
2300 };
2301 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2302 return CheckRegArea(Reg, SpillArea::GPRCS2);
2303 };
2304 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2305 return CheckRegArea(Reg, SpillArea::DPRCS1);
2306 };
2307 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2308 return CheckRegArea(Reg, SpillArea::GPRCS3);
2309 };
2310
2311 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS3);
2312 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
2313 emitFPStatusRestores(MBB, MI, CSI, PopOpc);
2314 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
2315 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
2316
2317 return true;
2318}
2319
2320// FIXME: Make generic?
2322 const ARMBaseInstrInfo &TII) {
2323 unsigned FnSize = 0;
2324 for (auto &MBB : MF) {
2325 for (auto &MI : MBB)
2326 FnSize += TII.getInstSizeInBytes(MI);
2327 }
2328 if (MF.getJumpTableInfo())
2329 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2330 FnSize += Table.MBBs.size() * 4;
2331 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2332 return FnSize;
2333}
2334
2335/// estimateRSStackSizeLimit - Look at each instruction that references stack
2336/// frames and return the stack size limit beyond which some of these
2337/// instructions will require a scratch register during their expansion later.
2338// FIXME: Move to TII?
2340 const TargetFrameLowering *TFI,
2341 bool &HasNonSPFrameIndex) {
2342 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2343 const ARMBaseInstrInfo &TII =
2344 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2346 unsigned Limit = (1 << 12) - 1;
2347 for (auto &MBB : MF) {
2348 for (auto &MI : MBB) {
2349 if (MI.isDebugInstr())
2350 continue;
2351 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2352 if (!MI.getOperand(i).isFI())
2353 continue;
2354
2355 // When using ADDri to get the address of a stack object, 255 is the
2356 // largest offset guaranteed to fit in the immediate offset.
2357 if (MI.getOpcode() == ARM::ADDri) {
2358 Limit = std::min(Limit, (1U << 8) - 1);
2359 break;
2360 }
2361 // t2ADDri will not require an extra register, it can reuse the
2362 // destination.
2363 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2364 break;
2365
2366 const MCInstrDesc &MCID = MI.getDesc();
2367 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2368 if (RegClass && !RegClass->contains(ARM::SP))
2369 HasNonSPFrameIndex = true;
2370
2371 // Otherwise check the addressing mode.
2372 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2374 case ARMII::AddrMode2:
2375 // Default 12 bit limit.
2376 break;
2377 case ARMII::AddrMode3:
2379 Limit = std::min(Limit, (1U << 8) - 1);
2380 break;
2382 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2383 break;
2384 case ARMII::AddrMode5:
2387 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2388 break;
2390 // i12 supports only positive offset so these will be converted to
2391 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2392 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2393 Limit = std::min(Limit, (1U << 8) - 1);
2394 break;
2395 case ARMII::AddrMode4:
2396 case ARMII::AddrMode6:
2397 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2398 // immediate offset for stack references.
2399 return 0;
2401 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2402 break;
2404 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2405 break;
2407 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2408 break;
2409 default:
2410 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2411 }
2412 break; // At most one FI per instruction
2413 }
2414 }
2415 }
2416
2417 return Limit;
2418}
2419
2420// In functions that realign the stack, it can be an advantage to spill the
2421// callee-saved vector registers after realigning the stack. The vst1 and vld1
2422// instructions take alignment hints that can improve performance.
2423static void
2425 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2427 return;
2428
2429 // Naked functions don't spill callee-saved registers.
2430 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2431 return;
2432
2433 // We are planning to use NEON instructions vst1 / vld1.
2434 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2435 return;
2436
2437 // Don't bother if the default stack alignment is sufficiently high.
2439 return;
2440
2441 // Aligned spills require stack realignment.
2442 if (!static_cast<const ARMBaseRegisterInfo *>(
2444 return;
2445
2446 // We always spill contiguous d-registers starting from d8. Count how many
2447 // needs spilling. The register allocator will almost always use the
2448 // callee-saved registers in order, but it can happen that there are holes in
2449 // the range. Registers above the hole will be spilled to the standard DPRCS
2450 // area.
2451 unsigned NumSpills = 0;
2452 for (; NumSpills < 8; ++NumSpills)
2453 if (!SavedRegs.test(ARM::D8 + NumSpills))
2454 break;
2455
2456 // Don't do this for just one d-register. It's not worth it.
2457 if (NumSpills < 2)
2458 return;
2459
2460 // Spill the first NumSpills D-registers after realigning the stack.
2461 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2462
2463 // A scratch register is required for the vst1 / vld1 instructions.
2464 SavedRegs.set(ARM::R4);
2465}
2466
2468 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2469 // upon function entry (resp. restore it immmediately before return)
2470 if (STI.hasV8_1MMainlineOps() &&
2472 return false;
2473
2474 // We are disabling shrinkwrapping for now when PAC is enabled, as
2475 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2476 // generated. A follow-up patch will fix this in a more performant manner.
2478 true /* SpillsLR */))
2479 return false;
2480
2481 return true;
2482}
2483
2485 const MachineFunction &MF) const {
2486 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2487 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2488}
2489
2490// Thumb1 may require a spill when storing to a frame index through FP (or any
2491// access with execute-only), for cases where FP is a high register (R11). This
2492// scans the function for cases where this may happen.
2494 const TargetFrameLowering &TFI) {
2495 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2496 if (!AFI->isThumb1OnlyFunction())
2497 return false;
2498
2499 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2500 for (const auto &MBB : MF)
2501 for (const auto &MI : MBB)
2502 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2503 STI.genExecuteOnly())
2504 for (const auto &Op : MI.operands())
2505 if (Op.isFI()) {
2506 Register Reg;
2507 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2508 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2509 return true;
2510 }
2511 return false;
2512}
2513
2515 BitVector &SavedRegs,
2516 RegScavenger *RS) const {
2518 // This tells PEI to spill the FP as if it is any other callee-save register
2519 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2520 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2521 // to combine multiple loads / stores.
2522 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2524 bool CS1Spilled = false;
2525 bool LRSpilled = false;
2526 unsigned NumGPRSpills = 0;
2527 unsigned NumFPRSpills = 0;
2528 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2529 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2530 const Function &F = MF.getFunction();
2531 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2533 const ARMBaseInstrInfo &TII =
2534 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2536 MachineFrameInfo &MFI = MF.getFrameInfo();
2539 (void)TRI; // Silence unused warning in non-assert builds.
2540 Register FramePtr = RegInfo->getFrameRegister(MF);
2543
2544 // For a floating point interrupt, save these registers always, since LLVM
2545 // currently doesn't model reads/writes to these registers.
2546 if (F.hasFnAttribute("interrupt") && F.hasFnAttribute("save-fp")) {
2547 SavedRegs.set(ARM::FPSCR);
2548 SavedRegs.set(ARM::R4);
2549
2550 // This register will only be present on non-MClass registers.
2551 if (STI.isMClass()) {
2552 SavedRegs.reset(ARM::FPEXC);
2553 } else {
2554 SavedRegs.set(ARM::FPEXC);
2555 SavedRegs.set(ARM::R5);
2556 }
2557 }
2558
2559 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2560 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2561 // since it's not always possible to restore sp from fp in a single
2562 // instruction.
2563 // FIXME: It will be better just to find spare register here.
2564 if (AFI->isThumb2Function() &&
2565 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2566 SavedRegs.set(ARM::R4);
2567
2568 // If a stack probe will be emitted, spill R4 and LR, since they are
2569 // clobbered by the stack probe call.
2570 // This estimate should be a safe, conservative estimate. The actual
2571 // stack probe is enabled based on the size of the local objects;
2572 // this estimate also includes the varargs store size.
2573 if (STI.isTargetWindows() &&
2574 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2575 SavedRegs.set(ARM::R4);
2576 SavedRegs.set(ARM::LR);
2577 }
2578
2579 if (AFI->isThumb1OnlyFunction()) {
2580 // Spill LR if Thumb1 function uses variable length argument lists.
2581 if (AFI->getArgRegsSaveSize() > 0)
2582 SavedRegs.set(ARM::LR);
2583
2584 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2585 // requires stack alignment. We don't know for sure what the stack size
2586 // will be, but for this, an estimate is good enough. If there anything
2587 // changes it, it'll be a spill, which implies we've used all the registers
2588 // and so R4 is already used, so not marking it here will be OK.
2589 // FIXME: It will be better just to find spare register here.
2590 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2591 MFI.estimateStackSize(MF) > 508)
2592 SavedRegs.set(ARM::R4);
2593 }
2594
2595 // See if we can spill vector registers to aligned stack.
2596 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2597
2598 // Spill the BasePtr if it's used.
2599 if (RegInfo->hasBasePointer(MF))
2600 SavedRegs.set(RegInfo->getBaseRegister());
2601
2602 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2603 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2604 CanEliminateFrame = false;
2605
2606 // When return address signing is enabled R12 is treated as callee-saved.
2607 if (AFI->shouldSignReturnAddress())
2608 CanEliminateFrame = false;
2609
2610 // Don't spill FP if the frame can be eliminated. This is determined
2611 // by scanning the callee-save registers to see if any is modified.
2612 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2613 for (unsigned i = 0; CSRegs[i]; ++i) {
2614 unsigned Reg = CSRegs[i];
2615 bool Spilled = false;
2616 if (SavedRegs.test(Reg)) {
2617 Spilled = true;
2618 CanEliminateFrame = false;
2619 }
2620
2621 if (!ARM::GPRRegClass.contains(Reg)) {
2622 if (Spilled) {
2623 if (ARM::SPRRegClass.contains(Reg))
2624 NumFPRSpills++;
2625 else if (ARM::DPRRegClass.contains(Reg))
2626 NumFPRSpills += 2;
2627 else if (ARM::QPRRegClass.contains(Reg))
2628 NumFPRSpills += 4;
2629 }
2630 continue;
2631 }
2632
2633 if (Spilled) {
2634 NumGPRSpills++;
2635
2636 if (PushPopSplit != ARMSubtarget::SplitR7) {
2637 if (Reg == ARM::LR)
2638 LRSpilled = true;
2639 CS1Spilled = true;
2640 continue;
2641 }
2642
2643 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2644 switch (Reg) {
2645 case ARM::LR:
2646 LRSpilled = true;
2647 [[fallthrough]];
2648 case ARM::R0: case ARM::R1:
2649 case ARM::R2: case ARM::R3:
2650 case ARM::R4: case ARM::R5:
2651 case ARM::R6: case ARM::R7:
2652 CS1Spilled = true;
2653 break;
2654 default:
2655 break;
2656 }
2657 } else {
2658 if (PushPopSplit != ARMSubtarget::SplitR7) {
2659 UnspilledCS1GPRs.push_back(Reg);
2660 continue;
2661 }
2662
2663 switch (Reg) {
2664 case ARM::R0: case ARM::R1:
2665 case ARM::R2: case ARM::R3:
2666 case ARM::R4: case ARM::R5:
2667 case ARM::R6: case ARM::R7:
2668 case ARM::LR:
2669 UnspilledCS1GPRs.push_back(Reg);
2670 break;
2671 default:
2672 UnspilledCS2GPRs.push_back(Reg);
2673 break;
2674 }
2675 }
2676 }
2677
2678 bool ForceLRSpill = false;
2679 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2680 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2681 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2682 // use of BL to implement far jump.
2683 if (FnSize >= (1 << 11)) {
2684 CanEliminateFrame = false;
2685 ForceLRSpill = true;
2686 }
2687 }
2688
2689 // If any of the stack slot references may be out of range of an immediate
2690 // offset, make sure a register (or a spill slot) is available for the
2691 // register scavenger. Note that if we're indexing off the frame pointer, the
2692 // effective stack size is 4 bytes larger since the FP points to the stack
2693 // slot of the previous FP. Also, if we have variable sized objects in the
2694 // function, stack slot references will often be negative, and some of
2695 // our instructions are positive-offset only, so conservatively consider
2696 // that case to want a spill slot (or register) as well. Similarly, if
2697 // the function adjusts the stack pointer during execution and the
2698 // adjustments aren't already part of our stack size estimate, our offset
2699 // calculations may be off, so be conservative.
2700 // FIXME: We could add logic to be more precise about negative offsets
2701 // and which instructions will need a scratch register for them. Is it
2702 // worth the effort and added fragility?
2703 unsigned EstimatedStackSize =
2704 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2705
2706 // Determine biggest (positive) SP offset in MachineFrameInfo.
2707 int MaxFixedOffset = 0;
2708 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2709 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2710 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2711 }
2712
2713 bool HasFP = hasFP(MF);
2714 if (HasFP) {
2715 if (AFI->hasStackFrame())
2716 EstimatedStackSize += 4;
2717 } else {
2718 // If FP is not used, SP will be used to access arguments, so count the
2719 // size of arguments into the estimation.
2720 EstimatedStackSize += MaxFixedOffset;
2721 }
2722 EstimatedStackSize += 16; // For possible paddings.
2723
2724 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2725 bool HasNonSPFrameIndex = false;
2726 if (AFI->isThumb1OnlyFunction()) {
2727 // For Thumb1, don't bother to iterate over the function. The only
2728 // instruction that requires an emergency spill slot is a store to a
2729 // frame index.
2730 //
2731 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2732 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2733 // a 5-bit unsigned immediate.
2734 //
2735 // We could try to check if the function actually contains a tSTRspi
2736 // that might need the spill slot, but it's not really important.
2737 // Functions with VLAs or extremely large call frames are rare, and
2738 // if a function is allocating more than 1KB of stack, an extra 4-byte
2739 // slot probably isn't relevant.
2740 //
2741 // A special case is the scenario where r11 is used as FP, where accesses
2742 // to a frame index will require its value to be moved into a low reg.
2743 // This is handled later on, once we are able to determine if we have any
2744 // fp-relative accesses.
2745 if (RegInfo->hasBasePointer(MF))
2746 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2747 else
2748 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2749 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2750 } else {
2751 EstimatedRSStackSizeLimit =
2752 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2753 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2754 }
2755 // Final estimate of whether sp or bp-relative accesses might require
2756 // scavenging.
2757 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2758
2759 // If the stack pointer moves and we don't have a base pointer, the
2760 // estimate logic doesn't work. The actual offsets might be larger when
2761 // we're constructing a call frame, or we might need to use negative
2762 // offsets from fp.
2763 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2764 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2765 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2766
2767 // If we have a frame pointer, we assume arguments will be accessed
2768 // relative to the frame pointer. Check whether fp-relative accesses to
2769 // arguments require scavenging.
2770 //
2771 // We could do slightly better on Thumb1; in some cases, an sp-relative
2772 // offset would be legal even though an fp-relative offset is not.
2773 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2774 bool HasLargeArgumentList =
2775 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2776
2777 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2778 HasLargeArgumentList || HasNonSPFrameIndex;
2779 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2780 << "; EstimatedStack: " << EstimatedStackSize
2781 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2782 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2783 if (BigFrameOffsets ||
2784 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2785 AFI->setHasStackFrame(true);
2786
2787 if (HasFP) {
2788 SavedRegs.set(FramePtr);
2789 // If the frame pointer is required by the ABI, also spill LR so that we
2790 // emit a complete frame record.
2791 if ((requiresAAPCSFrameRecord(MF) ||
2793 !LRSpilled) {
2794 SavedRegs.set(ARM::LR);
2795 LRSpilled = true;
2796 NumGPRSpills++;
2797 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2798 if (LRPos != UnspilledCS1GPRs.end())
2799 UnspilledCS1GPRs.erase(LRPos);
2800 }
2801 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2802 if (FPPos != UnspilledCS1GPRs.end())
2803 UnspilledCS1GPRs.erase(FPPos);
2804 NumGPRSpills++;
2805 if (FramePtr == ARM::R7)
2806 CS1Spilled = true;
2807 }
2808
2809 // This is the number of extra spills inserted for callee-save GPRs which
2810 // would not otherwise be used by the function. When greater than zero it
2811 // guaranteees that it is possible to scavenge a register to hold the
2812 // address of a stack slot. On Thumb1, the register must be a valid operand
2813 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2814 // or lr.
2815 //
2816 // If we don't insert a spill, we instead allocate an emergency spill
2817 // slot, which can be used by scavenging to spill an arbitrary register.
2818 //
2819 // We currently don't try to figure out whether any specific instruction
2820 // requires scavening an additional register.
2821 unsigned NumExtraCSSpill = 0;
2822
2823 if (AFI->isThumb1OnlyFunction()) {
2824 // For Thumb1-only targets, we need some low registers when we save and
2825 // restore the high registers (which aren't allocatable, but could be
2826 // used by inline assembly) because the push/pop instructions can not
2827 // access high registers. If necessary, we might need to push more low
2828 // registers to ensure that there is at least one free that can be used
2829 // for the saving & restoring, and preferably we should ensure that as
2830 // many as are needed are available so that fewer push/pop instructions
2831 // are required.
2832
2833 // Low registers which are not currently pushed, but could be (r4-r7).
2834 SmallVector<unsigned, 4> AvailableRegs;
2835
2836 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2837 // free.
2838 int EntryRegDeficit = 0;
2839 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2840 if (!MF.getRegInfo().isLiveIn(Reg)) {
2841 --EntryRegDeficit;
2843 << printReg(Reg, TRI)
2844 << " is unused argument register, EntryRegDeficit = "
2845 << EntryRegDeficit << "\n");
2846 }
2847 }
2848
2849 // Unused return registers can be clobbered in the epilogue for free.
2850 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2852 << " return regs used, ExitRegDeficit = "
2853 << ExitRegDeficit << "\n");
2854
2855 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2856 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2857
2858 // r4-r6 can be used in the prologue if they are pushed by the first push
2859 // instruction.
2860 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2861 if (SavedRegs.test(Reg)) {
2862 --RegDeficit;
2863 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2864 << " is saved low register, RegDeficit = "
2865 << RegDeficit << "\n");
2866 } else {
2867 AvailableRegs.push_back(Reg);
2868 LLVM_DEBUG(
2869 dbgs()
2870 << printReg(Reg, TRI)
2871 << " is non-saved low register, adding to AvailableRegs\n");
2872 }
2873 }
2874
2875 // r7 can be used if it is not being used as the frame pointer.
2876 if (!HasFP || FramePtr != ARM::R7) {
2877 if (SavedRegs.test(ARM::R7)) {
2878 --RegDeficit;
2879 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2880 << RegDeficit << "\n");
2881 } else {
2882 AvailableRegs.push_back(ARM::R7);
2883 LLVM_DEBUG(
2884 dbgs()
2885 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2886 }
2887 }
2888
2889 // Each of r8-r11 needs to be copied to a low register, then pushed.
2890 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2891 if (SavedRegs.test(Reg)) {
2892 ++RegDeficit;
2893 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2894 << " is saved high register, RegDeficit = "
2895 << RegDeficit << "\n");
2896 }
2897 }
2898
2899 // LR can only be used by PUSH, not POP, and can't be used at all if the
2900 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2901 // are more limited at function entry than exit.
2902 if ((EntryRegDeficit > ExitRegDeficit) &&
2903 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2905 if (SavedRegs.test(ARM::LR)) {
2906 --RegDeficit;
2907 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2908 << RegDeficit << "\n");
2909 } else {
2910 AvailableRegs.push_back(ARM::LR);
2911 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2912 }
2913 }
2914
2915 // If there are more high registers that need pushing than low registers
2916 // available, push some more low registers so that we can use fewer push
2917 // instructions. This might not reduce RegDeficit all the way to zero,
2918 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2919 // need saving.
2920 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2921 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2922 unsigned Reg = AvailableRegs.pop_back_val();
2923 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2924 << " to make up reg deficit\n");
2925 SavedRegs.set(Reg);
2926 NumGPRSpills++;
2927 CS1Spilled = true;
2928 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2929 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2930 NumExtraCSSpill++;
2931 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2932 if (Reg == ARM::LR)
2933 LRSpilled = true;
2934 }
2935 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2936 << "\n");
2937 }
2938
2939 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2940 // restore LR in that case.
2941 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2942
2943 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2944 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2945 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2946 SavedRegs.set(ARM::LR);
2947 NumGPRSpills++;
2949 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2950 if (LRPos != UnspilledCS1GPRs.end())
2951 UnspilledCS1GPRs.erase(LRPos);
2952
2953 ForceLRSpill = false;
2954 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2955 !AFI->isThumb1OnlyFunction())
2956 NumExtraCSSpill++;
2957 }
2958
2959 // If stack and double are 8-byte aligned and we are spilling an odd number
2960 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2961 // the integer and double callee save areas.
2962 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2963 const Align TargetAlign = getStackAlign();
2964 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2965 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2966 for (unsigned Reg : UnspilledCS1GPRs) {
2967 // Don't spill high register if the function is thumb. In the case of
2968 // Windows on ARM, accept R11 (frame pointer)
2969 if (!AFI->isThumbFunction() ||
2970 (STI.isTargetWindows() && Reg == ARM::R11) ||
2971 isARMLowRegister(Reg) ||
2972 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2973 SavedRegs.set(Reg);
2974 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2975 << " to make up alignment\n");
2976 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2977 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2978 NumExtraCSSpill++;
2979 break;
2980 }
2981 }
2982 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2983 unsigned Reg = UnspilledCS2GPRs.front();
2984 SavedRegs.set(Reg);
2985 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2986 << " to make up alignment\n");
2987 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2988 NumExtraCSSpill++;
2989 }
2990 }
2991
2992 // Estimate if we might need to scavenge registers at some point in order
2993 // to materialize a stack offset. If so, either spill one additional
2994 // callee-saved register or reserve a special spill slot to facilitate
2995 // register scavenging. Thumb1 needs a spill slot for stack pointer
2996 // adjustments and for frame index accesses when FP is high register,
2997 // even when the frame itself is small.
2998 unsigned RegsNeeded = 0;
2999 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
3000 RegsNeeded++;
3001 // With thumb1 execute-only we may need an additional register for saving
3002 // and restoring the CPSR.
3003 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3004 RegsNeeded++;
3005 }
3006
3007 if (RegsNeeded > NumExtraCSSpill) {
3008 // If any non-reserved CS register isn't spilled, just spill one or two
3009 // extra. That should take care of it!
3010 unsigned NumExtras = TargetAlign.value() / 4;
3012 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3013 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3014 if (!MRI.isReserved(Reg) &&
3015 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3016 Extras.push_back(Reg);
3017 NumExtras--;
3018 }
3019 }
3020 // For non-Thumb1 functions, also check for hi-reg CS registers
3021 if (!AFI->isThumb1OnlyFunction()) {
3022 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3023 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3024 if (!MRI.isReserved(Reg)) {
3025 Extras.push_back(Reg);
3026 NumExtras--;
3027 }
3028 }
3029 }
3030 if (NumExtras == 0) {
3031 for (unsigned Reg : Extras) {
3032 SavedRegs.set(Reg);
3033 if (!MRI.isPhysRegUsed(Reg))
3034 NumExtraCSSpill++;
3035 }
3036 }
3037 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3038 // Reserve a slot closest to SP or frame pointer.
3039 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3040 const TargetRegisterClass &RC = ARM::GPRRegClass;
3041 unsigned Size = TRI->getSpillSize(RC);
3042 Align Alignment = TRI->getSpillAlign(RC);
3044 MFI.CreateSpillStackObject(Size, Alignment));
3045 --RegsNeeded;
3046 }
3047 }
3048 }
3049
3050 if (ForceLRSpill)
3051 SavedRegs.set(ARM::LR);
3052 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
3053}
3054
3056 MachineFrameInfo &MFI = MF.getFrameInfo();
3057 if (!MFI.isCalleeSavedInfoValid())
3058 return;
3059
3060 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3061 // into PC so it is not live out of the return block: Clear the Restored bit
3062 // in that case.
3063 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3064 if (Info.getReg() != ARM::LR)
3065 continue;
3066 if (all_of(MF, [](const MachineBasicBlock &MBB) {
3067 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
3068 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3069 Term.getOpcode() == ARM::t2LDMIA_RET ||
3070 Term.getOpcode() == ARM::tPOP_RET;
3071 });
3072 })) {
3073 Info.setRestored(false);
3074 break;
3075 }
3076 }
3077}
3078
3080 MachineFunction &MF, RegScavenger *RS) const {
3082 updateLRRestored(MF);
3083}
3084
3086 BitVector &SavedRegs) const {
3088
3089 // If we have the "returned" parameter attribute which guarantees that we
3090 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3091 // record that fact for IPRA.
3092 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3093 if (AFI->getPreservesR0())
3094 SavedRegs.set(ARM::R0);
3095}
3096
3099 std::vector<CalleeSavedInfo> &CSI) const {
3100 // For CMSE entry functions, handle floating-point context as if it was a
3101 // callee-saved register.
3102 if (STI.hasV8_1MMainlineOps() &&
3104 CSI.emplace_back(ARM::FPCXTNS);
3105 CSI.back().setRestored(false);
3106 }
3107
3108 // For functions, which sign their return address, upon function entry, the
3109 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3110 // in this case.
3111 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3112 if (AFI.shouldSignReturnAddress()) {
3113 // The order of register must match the order we push them, because the
3114 // PEI assigns frame indices in that order. That order depends on the
3115 // PushPopSplitVariation, there are only two cases which we use with return
3116 // address signing:
3117 switch (STI.getPushPopSplitVariation(MF)) {
3119 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3120 CSI.insert(find_if(CSI,
3121 [=](const auto &CS) {
3122 MCRegister Reg = CS.getReg();
3123 return Reg == ARM::R10 || Reg == ARM::R11 ||
3124 Reg == ARM::R8 || Reg == ARM::R9 ||
3125 ARM::DPRRegClass.contains(Reg);
3126 }),
3127 CalleeSavedInfo(ARM::R12));
3128 break;
3130 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3131 // on the stack.
3132 CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
3133 break;
3136 "ABI-required frame pointers need a CSR split when signing return "
3137 "address.");
3138 CSI.insert(find_if(CSI,
3139 [=](const auto &CS) {
3140 MCRegister Reg = CS.getReg();
3141 return Reg != ARM::LR;
3142 }),
3143 CalleeSavedInfo(ARM::R12));
3144 break;
3145 default:
3146 llvm_unreachable("Unexpected CSR split with return address signing");
3147 }
3148 }
3149
3150 return false;
3151}
3152
3155 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
3156 NumEntries = std::size(FixedSpillOffsets);
3157 return FixedSpillOffsets;
3158}
3159
3160MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3163 const ARMBaseInstrInfo &TII =
3164 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3166 bool isARM = !AFI->isThumbFunction();
3167 DebugLoc dl = I->getDebugLoc();
3168 unsigned Opc = I->getOpcode();
3169 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3170 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
3171
3172 assert(!AFI->isThumb1OnlyFunction() &&
3173 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3174
3175 int PIdx = I->findFirstPredOperandIdx();
3176 ARMCC::CondCodes Pred = (PIdx == -1)
3177 ? ARMCC::AL
3178 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
3179 unsigned PredReg = TII.getFramePred(*I);
3180
3181 if (!hasReservedCallFrame(MF)) {
3182 // Bail early if the callee is expected to do the adjustment.
3183 if (IsDestroy && CalleePopAmount != -1U)
3184 return MBB.erase(I);
3185
3186 // If we have alloca, convert as follows:
3187 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3188 // ADJCALLSTACKUP -> add, sp, sp, amount
3189 unsigned Amount = TII.getFrameSize(*I);
3190 if (Amount != 0) {
3191 // We need to keep the stack aligned properly. To do this, we round the
3192 // amount of space needed for the outgoing arguments up to the next
3193 // alignment boundary.
3194 Amount = alignSPAdjust(Amount);
3195
3196 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3197 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
3198 Pred, PredReg);
3199 } else {
3200 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3201 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
3202 Pred, PredReg);
3203 }
3204 }
3205 } else if (CalleePopAmount != -1U) {
3206 // If the calling convention demands that the callee pops arguments from the
3207 // stack, we want to add it back if we have a reserved call frame.
3208 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
3209 MachineInstr::NoFlags, Pred, PredReg);
3210 }
3211 return MBB.erase(I);
3212}
3213
3214/// Get the minimum constant for ARM that is greater than or equal to the
3215/// argument. In ARM, constants can have any value that can be produced by
3216/// rotating an 8-bit value to the right by an even number of bits within a
3217/// 32-bit word.
3219 unsigned Shifted = 0;
3220
3221 if (Value == 0)
3222 return 0;
3223
3224 while (!(Value & 0xC0000000)) {
3225 Value = Value << 2;
3226 Shifted += 2;
3227 }
3228
3229 bool Carry = (Value & 0x00FFFFFF);
3230 Value = ((Value & 0xFF000000) >> 24) + Carry;
3231
3232 if (Value & 0x0000100)
3233 Value = Value & 0x000001FC;
3234
3235 if (Shifted > 24)
3236 Value = Value >> (Shifted - 24);
3237 else
3238 Value = Value << (24 - Shifted);
3239
3240 return Value;
3241}
3242
3243// The stack limit in the TCB is set to this many bytes above the actual
3244// stack limit.
3246
3247// Adjust the function prologue to enable split stacks. This currently only
3248// supports android and linux.
3249//
3250// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3251// must be well defined in order to allow for consistent implementations of the
3252// __morestack helper function. The ABI is also not a normal ABI in that it
3253// doesn't follow the normal calling conventions because this allows the
3254// prologue of each function to be optimized further.
3255//
3256// Currently, the ABI looks like (when calling __morestack)
3257//
3258// * r4 holds the minimum stack size requested for this function call
3259// * r5 holds the stack size of the arguments to the function
3260// * the beginning of the function is 3 instructions after the call to
3261// __morestack
3262//
3263// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3264// place the arguments on to the new stack, and the 3-instruction knowledge to
3265// jump directly to the body of the function when working on the new stack.
3266//
3267// An old (and possibly no longer compatible) implementation of __morestack for
3268// ARM can be found at [1].
3269//
3270// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3272 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3273 unsigned Opcode;
3274 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3275 bool Thumb = ST->isThumb();
3276 bool Thumb2 = ST->isThumb2();
3277
3278 // Sadly, this currently doesn't support varargs, platforms other than
3279 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3280 if (MF.getFunction().isVarArg())
3281 report_fatal_error("Segmented stacks do not support vararg functions.");
3282 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3283 report_fatal_error("Segmented stacks not supported on this platform.");
3284
3285 MachineFrameInfo &MFI = MF.getFrameInfo();
3286 const ARMBaseInstrInfo &TII =
3287 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3289 DebugLoc DL;
3290
3291 if (!MFI.needsSplitStackProlog())
3292 return;
3293
3294 uint64_t StackSize = MFI.getStackSize();
3295
3296 // Use R4 and R5 as scratch registers.
3297 // We save R4 and R5 before use and restore them before leaving the function.
3298 unsigned ScratchReg0 = ARM::R4;
3299 unsigned ScratchReg1 = ARM::R5;
3300 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3301 uint64_t AlignedStackSize;
3302
3303 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3304 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3308
3309 // Grab everything that reaches PrologueMBB to update there liveness as well.
3310 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3312 WalkList.push_back(&PrologueMBB);
3313
3314 do {
3315 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3316 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3317 if (BeforePrologueRegion.insert(PredBB).second)
3318 WalkList.push_back(PredBB);
3319 }
3320 } while (!WalkList.empty());
3321
3322 // The order in that list is important.
3323 // The blocks will all be inserted before PrologueMBB using that order.
3324 // Therefore the block that should appear first in the CFG should appear
3325 // first in the list.
3326 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3327 PostStackMBB};
3328
3329 BeforePrologueRegion.insert_range(AddedBlocks);
3330
3331 for (const auto &LI : PrologueMBB.liveins()) {
3332 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3333 PredBB->addLiveIn(LI);
3334 }
3335
3336 // Remove the newly added blocks from the list, since we know
3337 // we do not have to do the following updates for them.
3338 for (MachineBasicBlock *B : AddedBlocks) {
3339 BeforePrologueRegion.erase(B);
3340 MF.insert(PrologueMBB.getIterator(), B);
3341 }
3342
3343 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3344 // Make sure the LiveIns are still sorted and unique.
3346 // Replace the edges to PrologueMBB by edges to the sequences
3347 // we are about to add, but only update for immediate predecessors.
3348 if (MBB->isSuccessor(&PrologueMBB))
3349 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3350 }
3351
3352 // The required stack size that is aligned to ARM constant criterion.
3353 AlignedStackSize = alignToARMConstant(StackSize);
3354
3355 // When the frame size is less than 256 we just compare the stack
3356 // boundary directly to the value of the stack pointer, per gcc.
3357 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3358
3359 // We will use two of the callee save registers as scratch registers so we
3360 // need to save those registers onto the stack.
3361 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3362 // requested and arguments for __morestack().
3363 // SR0: Scratch Register #0
3364 // SR1: Scratch Register #1
3365 // push {SR0, SR1}
3366 if (Thumb) {
3367 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3369 .addReg(ScratchReg0)
3370 .addReg(ScratchReg1);
3371 } else {
3372 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3373 .addReg(ARM::SP, RegState::Define)
3374 .addReg(ARM::SP)
3376 .addReg(ScratchReg0)
3377 .addReg(ScratchReg1);
3378 }
3379
3380 // Emit the relevant DWARF information about the change in stack pointer as
3381 // well as where to find both r4 and r5 (the callee-save registers)
3382 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3383 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3384 CFIBuilder.buildDefCFAOffset(8);
3385 CFIBuilder.buildOffset(ScratchReg1, -4);
3386 CFIBuilder.buildOffset(ScratchReg0, -8);
3387 }
3388
3389 // mov SR1, sp
3390 if (Thumb) {
3391 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3392 .addReg(ARM::SP)
3394 } else if (CompareStackPointer) {
3395 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3396 .addReg(ARM::SP)
3398 .add(condCodeOp());
3399 }
3400
3401 // sub SR1, sp, #StackSize
3402 if (!CompareStackPointer && Thumb) {
3403 if (AlignedStackSize < 256) {
3404 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3405 .add(condCodeOp())
3406 .addReg(ScratchReg1)
3407 .addImm(AlignedStackSize)
3409 } else {
3410 if (Thumb2 || ST->genExecuteOnly()) {
3411 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3412 .addImm(AlignedStackSize);
3413 } else {
3414 auto MBBI = McrMBB->end();
3415 auto RegInfo = STI.getRegisterInfo();
3416 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3417 AlignedStackSize);
3418 }
3419 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3420 .add(condCodeOp())
3421 .addReg(ScratchReg1)
3422 .addReg(ScratchReg0)
3424 }
3425 } else if (!CompareStackPointer) {
3426 if (AlignedStackSize < 256) {
3427 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3428 .addReg(ARM::SP)
3429 .addImm(AlignedStackSize)
3431 .add(condCodeOp());
3432 } else {
3433 auto MBBI = McrMBB->end();
3434 auto RegInfo = STI.getRegisterInfo();
3435 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3436 AlignedStackSize);
3437 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3438 .addReg(ARM::SP)
3439 .addReg(ScratchReg0)
3441 .add(condCodeOp());
3442 }
3443 }
3444
3445 if (Thumb && ST->isThumb1Only()) {
3446 if (ST->genExecuteOnly()) {
3447 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3448 .addExternalSymbol("__STACK_LIMIT");
3449 } else {
3450 unsigned PCLabelId = ARMFI->createPICLabelUId();
3452 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3454 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3455
3456 // ldr SR0, [pc, offset(STACK_LIMIT)]
3457 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3460 }
3461
3462 // ldr SR0, [SR0]
3463 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3464 .addReg(ScratchReg0)
3465 .addImm(0)
3467 } else {
3468 // Get TLS base address from the coprocessor
3469 // mrc p15, #0, SR0, c13, c0, #3
3470 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3471 ScratchReg0)
3472 .addImm(15)
3473 .addImm(0)
3474 .addImm(13)
3475 .addImm(0)
3476 .addImm(3)
3478
3479 // Use the last tls slot on android and a private field of the TCP on linux.
3480 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3481 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3482
3483 // Get the stack limit from the right offset
3484 // ldr SR0, [sr0, #4 * TlsOffset]
3485 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3486 ScratchReg0)
3487 .addReg(ScratchReg0)
3488 .addImm(4 * TlsOffset)
3490 }
3491
3492 // Compare stack limit with stack size requested.
3493 // cmp SR0, SR1
3494 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3495 BuildMI(GetMBB, DL, TII.get(Opcode))
3496 .addReg(ScratchReg0)
3497 .addReg(ScratchReg1)
3499
3500 // This jump is taken if StackLimit <= SP - stack required.
3501 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3502 BuildMI(GetMBB, DL, TII.get(Opcode))
3503 .addMBB(PostStackMBB)
3505 .addReg(ARM::CPSR);
3506
3507 // Calling __morestack(StackSize, Size of stack arguments).
3508 // __morestack knows that the stack size requested is in SR0(r4)
3509 // and amount size of stack arguments is in SR1(r5).
3510
3511 // Pass first argument for the __morestack by Scratch Register #0.
3512 // The amount size of stack required
3513 if (Thumb) {
3514 if (AlignedStackSize < 256) {
3515 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3516 .add(condCodeOp())
3517 .addImm(AlignedStackSize)
3519 } else {
3520 if (Thumb2 || ST->genExecuteOnly()) {
3521 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3522 .addImm(AlignedStackSize);
3523 } else {
3524 auto MBBI = AllocMBB->end();
3525 auto RegInfo = STI.getRegisterInfo();
3526 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3527 AlignedStackSize);
3528 }
3529 }
3530 } else {
3531 if (AlignedStackSize < 256) {
3532 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3533 .addImm(AlignedStackSize)
3535 .add(condCodeOp());
3536 } else {
3537 auto MBBI = AllocMBB->end();
3538 auto RegInfo = STI.getRegisterInfo();
3539 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3540 AlignedStackSize);
3541 }
3542 }
3543
3544 // Pass second argument for the __morestack by Scratch Register #1.
3545 // The amount size of stack consumed to save function arguments.
3546 if (Thumb) {
3547 if (ARMFI->getArgumentStackSize() < 256) {
3548 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3549 .add(condCodeOp())
3552 } else {
3553 if (Thumb2 || ST->genExecuteOnly()) {
3554 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3556 } else {
3557 auto MBBI = AllocMBB->end();
3558 auto RegInfo = STI.getRegisterInfo();
3559 RegInfo->emitLoadConstPool(
3560 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3562 }
3563 }
3564 } else {
3565 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3566 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3569 .add(condCodeOp());
3570 } else {
3571 auto MBBI = AllocMBB->end();
3572 auto RegInfo = STI.getRegisterInfo();
3573 RegInfo->emitLoadConstPool(
3574 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3576 }
3577 }
3578
3579 // push {lr} - Save return address of this function.
3580 if (Thumb) {
3581 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3583 .addReg(ARM::LR);
3584 } else {
3585 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3586 .addReg(ARM::SP, RegState::Define)
3587 .addReg(ARM::SP)
3589 .addReg(ARM::LR);
3590 }
3591
3592 // Emit the DWARF info about the change in stack as well as where to find the
3593 // previous link register
3594 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3595 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3596 CFIBuilder.buildDefCFAOffset(12);
3597 CFIBuilder.buildOffset(ARM::LR, -12);
3598 }
3599
3600 // Call __morestack().
3601 if (Thumb) {
3602 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3604 .addExternalSymbol("__morestack");
3605 } else {
3606 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3607 .addExternalSymbol("__morestack");
3608 }
3609
3610 // pop {lr} - Restore return address of this original function.
3611 if (Thumb) {
3612 if (ST->isThumb1Only()) {
3613 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3615 .addReg(ScratchReg0);
3616 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3617 .addReg(ScratchReg0)
3619 } else {
3620 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3621 .addReg(ARM::LR, RegState::Define)
3622 .addReg(ARM::SP, RegState::Define)
3623 .addReg(ARM::SP)
3624 .addImm(4)
3626 }
3627 } else {
3628 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3629 .addReg(ARM::SP, RegState::Define)
3630 .addReg(ARM::SP)
3632 .addReg(ARM::LR);
3633 }
3634
3635 // Restore SR0 and SR1 in case of __morestack() was called.
3636 // __morestack() will skip PostStackMBB block so we need to restore
3637 // scratch registers from here.
3638 // pop {SR0, SR1}
3639 if (Thumb) {
3640 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3642 .addReg(ScratchReg0)
3643 .addReg(ScratchReg1);
3644 } else {
3645 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3646 .addReg(ARM::SP, RegState::Define)
3647 .addReg(ARM::SP)
3649 .addReg(ScratchReg0)
3650 .addReg(ScratchReg1);
3651 }
3652
3653 // Update the CFA offset now that we've popped
3654 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
3656
3657 // Return from this function.
3658 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3659
3660 // Restore SR0 and SR1 in case of __morestack() was not called.
3661 // pop {SR0, SR1}
3662 if (Thumb) {
3663 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3665 .addReg(ScratchReg0)
3666 .addReg(ScratchReg1);
3667 } else {
3668 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3669 .addReg(ARM::SP, RegState::Define)
3670 .addReg(ARM::SP)
3672 .addReg(ScratchReg0)
3673 .addReg(ScratchReg1);
3674 }
3675
3676 // Update the CFA offset now that we've popped
3677 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3678 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3679 CFIBuilder.buildDefCFAOffset(0);
3680
3681 // Tell debuggers that r4 and r5 are now the same as they were in the
3682 // previous function, that they're the "Same Value".
3683 CFIBuilder.buildSameValue(ScratchReg0);
3684 CFIBuilder.buildSameValue(ScratchReg1);
3685 }
3686
3687 // Organizing MBB lists
3688 PostStackMBB->addSuccessor(&PrologueMBB);
3689
3690 AllocMBB->addSuccessor(PostStackMBB);
3691
3692 GetMBB->addSuccessor(PostStackMBB);
3693 GetMBB->addSuccessor(AllocMBB);
3694
3695 McrMBB->addSuccessor(GetMBB);
3696
3697 PrevStackMBB->addSuccessor(McrMBB);
3698
3699#ifdef EXPENSIVE_CHECKS
3700 MF.verify();
3701#endif
3702}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
SpillArea getSpillArea(Register Reg, ARMSubtarget::PushPopSplitVariation Variation, unsigned NumAlignedDPRCS2Regs, const ARMBaseRegisterInfo *RegInfo)
Get the spill area that Reg should be saved into in the prologue.
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register const TargetRegisterInfo * TRI
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
bool keepFramePointer(const MachineFunction &MF) const
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool requiresAAPCSFrameRecord(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
unsigned getDPRCalleeSavedArea1Size() const
void setDPRCalleeSavedArea1Offset(unsigned o)
void setGPRCalleeSavedArea2Size(unsigned s)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getGPRCalleeSavedArea3Size() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedArea1Size(unsigned s)
void setDPRCalleeSavedGapSize(unsigned s)
void setFPStatusSavesSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
void setGPRCalleeSavedArea3Size(unsigned s)
unsigned getFPStatusSavesSize() const
bool useMovt() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:235
bool isTargetWindows() const
Definition: ARMSubtarget.h:342
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:247
enum PushPopSplitVariation getPushPopSplitVariation(const MachineFunction &MF) const
PushPopSplitVariation
How the push and pop instructions of callee saved general-purpose registers should be split.
Definition: ARMSubtarget.h:86
@ SplitR11WindowsSEH
When the stack frame size is not known (because of variable-sized objects or realignment),...
Definition: ARMSubtarget.h:111
@ SplitR7
R7 and LR must be adjacent, because R7 is the frame pointer, and must point to a frame record consist...
Definition: ARMSubtarget.h:102
@ SplitR11AAPCSSignRA
When generating AAPCS-compilant frame chains, R11 is the frame pointer, and must be pushed adjacent t...
Definition: ARMSubtarget.h:123
@ NoSplit
All GPRs can be pushed in a single instruction.
Definition: ARMSubtarget.h:90
bool isMClass() const
Definition: ARMSubtarget.h:377
bool isTargetELF() const
Definition: ARMSubtarget.h:345
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:243
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void buildDefCFARegister(MCRegister Reg) const
void buildSameValue(MCRegister Reg) const
void buildOffset(MCRegister Reg, int64_t Offset) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:652
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
iterator_range< livein_iterator > liveins() const
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
LLVM_ABI void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getStackProtectorIndex() const
Return the index for the stack protector object.
int64_t getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
LLVM_ABI BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:72
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
typename SuperClass::iterator iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:50
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
LLVM Value Representation.
Definition: Value.h:75
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ D16
Only 16 D registers.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:477
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85