LLVM 22.0.0git
AArch64StackTaggingPreRA.cpp
Go to the documentation of this file.
1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64.h"
10#include "AArch64InstrInfo.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/Passes.h"
25#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
31
33
35 "stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe),
37 "Unconditionally apply unchecked-ld-st optimization (even for large "
38 "stack frames, or in the presence of variable sized allocas)."),
40 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
42 UncheckedSafe, "safe",
43 "apply unchecked-ld-st when the target is definitely within range"),
44 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
45
46static cl::opt<bool>
47 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
48 cl::desc("Apply first slot optimization for stack tagging "
49 "(eliminate ADDG Rt, Rn, 0, 0)."));
50
51namespace {
52
53class AArch64StackTaggingPreRA : public MachineFunctionPass {
59 const AArch64InstrInfo *TII;
60
62
63public:
64 static char ID;
65 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {}
66
67 bool mayUseUncheckedLoadStore();
68 void uncheckUsesOf(unsigned TaggedReg, int FI);
69 void uncheckLoadsAndStores();
70 std::optional<int> findFirstSlotCandidate();
71
72 bool runOnMachineFunction(MachineFunction &Func) override;
73 StringRef getPassName() const override {
74 return "AArch64 Stack Tagging PreRA";
75 }
76
77 void getAnalysisUsage(AnalysisUsage &AU) const override {
78 AU.setPreservesCFG();
80 }
81};
82} // end anonymous namespace
83
84char AArch64StackTaggingPreRA::ID = 0;
85
86INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
87 "AArch64 Stack Tagging PreRA Pass", false, false)
88INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
89 "AArch64 Stack Tagging PreRA Pass", false, false)
90
92 return new AArch64StackTaggingPreRA();
93}
94
95static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
96 switch (Opcode) {
97 case AArch64::LDRBBui:
98 case AArch64::LDRHHui:
99 case AArch64::LDRWui:
100 case AArch64::LDRXui:
101
102 case AArch64::LDRBui:
103 case AArch64::LDRHui:
104 case AArch64::LDRSui:
105 case AArch64::LDRDui:
106 case AArch64::LDRQui:
107
108 case AArch64::LDRSHWui:
109 case AArch64::LDRSHXui:
110
111 case AArch64::LDRSBWui:
112 case AArch64::LDRSBXui:
113
114 case AArch64::LDRSWui:
115
116 case AArch64::STRBBui:
117 case AArch64::STRHHui:
118 case AArch64::STRWui:
119 case AArch64::STRXui:
120
121 case AArch64::STRBui:
122 case AArch64::STRHui:
123 case AArch64::STRSui:
124 case AArch64::STRDui:
125 case AArch64::STRQui:
126
127 case AArch64::LDPWi:
128 case AArch64::LDPXi:
129 case AArch64::LDPSi:
130 case AArch64::LDPDi:
131 case AArch64::LDPQi:
132
133 case AArch64::LDPSWi:
134
135 case AArch64::STPWi:
136 case AArch64::STPXi:
137 case AArch64::STPSi:
138 case AArch64::STPDi:
139 case AArch64::STPQi:
140 return true;
141 default:
142 return false;
143 }
144}
145
146bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
148 return false;
150 return true;
151
152 // This estimate can be improved if we had harder guarantees about stack frame
153 // layout. With LocalStackAllocation we can estimate SP offset to any
154 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
155 // objects ahead of non-tagged ones, but that's not always desirable.
156 //
157 // Underestimating SP offset here may require the use of LDG to materialize
158 // the tagged address of the stack slot, along with a scratch register
159 // allocation (post-regalloc!).
160 //
161 // For now we do the safe thing here and require that the entire stack frame
162 // is within range of the shortest of the unchecked instructions.
163 unsigned FrameSize = 0;
164 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
165 FrameSize += MFI->getObjectSize(i);
166 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
167 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
168}
169
170void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
171 for (MachineInstr &UseI :
172 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
173 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
174 // FI operand is always the one before the immediate offset.
175 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
176 if (UseI.getOperand(OpIdx).isReg() &&
177 UseI.getOperand(OpIdx).getReg() == TaggedReg) {
178 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
179 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
180 }
181 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
182 uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
183 }
184 }
185}
186
187void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
188 for (auto *I : ReTags) {
189 Register TaggedReg = I->getOperand(0).getReg();
190 int FI = I->getOperand(1).getIndex();
191 uncheckUsesOf(TaggedReg, FI);
192 }
193}
194
195namespace {
196struct SlotWithTag {
197 int FI;
198 int Tag;
199 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
200 explicit SlotWithTag(const MachineInstr &MI)
201 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
202 bool operator==(const SlotWithTag &Other) const {
203 return FI == Other.FI && Tag == Other.Tag;
204 }
205};
206} // namespace
207
208namespace llvm {
209template <> struct DenseMapInfo<SlotWithTag> {
210 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
211 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
212 static unsigned getHashValue(const SlotWithTag &V) {
215 }
216 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
217 return A == B;
218 }
219};
220} // namespace llvm
221
222static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
223 return MFI->getUseLocalStackAllocationBlock() &&
224 MFI->isObjectPreAllocated(FI);
225}
226
227// Pin one of the tagged slots to offset 0 from the tagged base pointer.
228// This would make its address available in a virtual register (IRG's def), as
229// opposed to requiring an ADDG instruction to materialize. This effectively
230// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
231// live almost everywhere anyway), and therefore needs to happen before
232// regalloc.
233std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
234 // Find the best (FI, Tag) pair to pin to offset 0.
235 // Looking at the possible uses of a tagged address, the advantage of pinning
236 // is:
237 // - COPY to physical register.
238 // Does not matter, this would trade a MOV instruction for an ADDG.
239 // - ST*G matter, but those mostly appear near the function prologue where all
240 // the tagged addresses need to be materialized anyway; also, counting ST*G
241 // uses would overweight large allocas that require more than one ST*G
242 // instruction.
243 // - Load/Store instructions in the address operand do not require a tagged
244 // pointer, so they also do not benefit. These operands have already been
245 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
246 // instructions count.
247 // - Any other instruction may benefit from being pinned to offset 0.
248 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
249 if (!ClFirstSlot)
250 return std::nullopt;
251
253 SlotWithTag MaxScoreST{-1, -1};
254 int MaxScore = -1;
255 for (auto *I : ReTags) {
256 SlotWithTag ST{*I};
257 if (isSlotPreAllocated(MFI, ST.FI))
258 continue;
259
260 Register RetagReg = I->getOperand(0).getReg();
261 if (!RetagReg.isVirtual())
262 continue;
263
264 int Score = 0;
266 WorkList.push_back(RetagReg);
267
268 while (!WorkList.empty()) {
269 Register UseReg = WorkList.pop_back_val();
270 for (auto &UseI : MRI->use_instructions(UseReg)) {
271 unsigned Opcode = UseI.getOpcode();
272 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
273 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
274 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
275 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
276 Opcode == AArch64::STZGloop_wback)
277 continue;
278 if (UseI.isCopy()) {
279 Register DstReg = UseI.getOperand(0).getReg();
280 if (DstReg.isVirtual())
281 WorkList.push_back(DstReg);
282 continue;
283 }
284 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of "
285 << printReg(UseReg) << " in " << UseI << "\n");
286 Score++;
287 }
288 }
289
290 int TotalScore = RetagScore[ST] += Score;
291 if (TotalScore > MaxScore ||
292 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
293 MaxScore = TotalScore;
294 MaxScoreST = ST;
295 }
296 }
297
298 if (MaxScoreST.FI < 0)
299 return std::nullopt;
300
301 // If FI's tag is already 0, we are done.
302 if (MaxScoreST.Tag == 0)
303 return MaxScoreST.FI;
304
305 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
306 SlotWithTag SwapST{-1, -1};
307 for (auto *I : ReTags) {
308 SlotWithTag ST{*I};
309 if (ST.Tag == 0) {
310 SwapST = ST;
311 break;
312 }
313 }
314
315 // Swap tags between the victim and the highest scoring pair.
316 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
317 // the highest score slot without changing anything else.
318 for (auto *&I : ReTags) {
319 SlotWithTag ST{*I};
320 MachineOperand &TagOp = I->getOperand(4);
321 if (ST == MaxScoreST) {
322 TagOp.setImm(0);
323 } else if (ST == SwapST) {
324 TagOp.setImm(MaxScoreST.Tag);
325 }
326 }
327 return MaxScoreST.FI;
328}
329
330bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
331 MF = &Func;
332 MRI = &MF->getRegInfo();
333 AFI = MF->getInfo<AArch64FunctionInfo>();
334 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
335 TRI = static_cast<const AArch64RegisterInfo *>(
336 MF->getSubtarget().getRegisterInfo());
337 MFI = &MF->getFrameInfo();
338 ReTags.clear();
339
340 assert(MRI->isSSA());
341
342 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
343 << "********** Function: " << MF->getName() << '\n');
344
345 SmallSetVector<int, 8> TaggedSlots;
346 for (auto &BB : *MF) {
347 for (auto &I : BB) {
348 if (I.getOpcode() == AArch64::TAGPstack) {
349 ReTags.push_back(&I);
350 int FI = I.getOperand(1).getIndex();
351 TaggedSlots.insert(FI);
352 // There should be no offsets in TAGP yet.
353 assert(I.getOperand(2).getImm() == 0);
354 }
355 }
356 }
357
358 // Take over from SSP. It does nothing for tagged slots, and should not really
359 // have been enabled in the first place.
360 for (int FI : TaggedSlots)
361 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
362
363 if (ReTags.empty())
364 return false;
365
366 if (mayUseUncheckedLoadStore())
367 uncheckLoadsAndStores();
368
369 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
370 // If the base tagged pointer is set up to the address of this slot,
371 // the ADDG instruction can be eliminated.
372 std::optional<int> BaseSlot = findFirstSlotCandidate();
373 if (BaseSlot)
374 AFI->setTaggedBasePointerIndex(*BaseSlot);
375
376 for (auto *I : ReTags) {
377 int FI = I->getOperand(1).getIndex();
378 int Tag = I->getOperand(4).getImm();
379 Register Base = I->getOperand(3).getReg();
380 if (Tag == 0 && FI == BaseSlot) {
381 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
382 I->getOperand(0).getReg())
383 .addReg(Base);
384 I->eraseFromParent();
385 }
386 }
387
388 return true;
389}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 stack tagging pre ra
static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI)
static cl::opt< UncheckedLdStMode > ClUncheckedLdSt("stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe), cl::desc("Unconditionally apply unchecked-ld-st optimization (even for large " "stack frames, or in the presence of variable sized allocas)."), cl::values(clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), clEnumValN(UncheckedSafe, "safe", "apply unchecked-ld-st when the target is definitely within range"), clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")))
static cl::opt< bool > ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), cl::desc("Apply first slot optimization for stack tagging " "(eliminate ADDG Rt, Rn, 0, 0)."))
static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode)
AArch64 Stack Tagging
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:687
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file implements a set that has insertion order iteration characteristics.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition: Debug.h:119
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isObjectPreAllocated(int ObjectIdx) const
Return true if the object was pre-allocated into the local block.
@ SSPLK_None
Did not trigger a stack protector.
bool getUseLocalStackAllocationBlock() const
Get whether the local allocation blob should be allocated together or let PEI allocate the locals in ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:99
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:712
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64StackTaggingPreRAPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: SPIRVUtils.cpp:976
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:595
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static bool isEqual(const SlotWithTag &A, const SlotWithTag &B)
static unsigned getHashValue(const SlotWithTag &V)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:54