LLVM 22.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringRef.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCDwarf.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <cstdint>
48#include <functional>
49#include <iterator>
50#include <limits>
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "aarch64-ldst-opt"
56
57STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded, "Number of post-index updates folded");
59STATISTIC(NumPreFolded, "Number of pre-index updates folded");
60STATISTIC(NumUnscaledPairCreated,
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
65 "not passed the alignment check");
66STATISTIC(NumConstOffsetFolded,
67 "Number of const offset of index address folded");
68
69DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
70 "Controls which pairs are considered for renaming");
71
72// The LdStLimit limits how far we search for load/store pairs.
73static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
74 cl::init(20), cl::Hidden);
75
76// The UpdateLimit limits how far we search for update instructions when we form
77// pre-/post-index instructions.
78static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
80
81// The LdStConstLimit limits how far we search for const offset instructions
82// when we form index address load/store instructions.
83static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
84 cl::init(10), cl::Hidden);
85
86// Enable register renaming to find additional store pairing opportunities.
87static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
88 cl::init(true), cl::Hidden);
89
90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
91
92namespace {
93
94using LdStPairFlags = struct LdStPairFlags {
95 // If a matching instruction is found, MergeForward is set to true if the
96 // merge is to remove the first instruction and replace the second with
97 // a pair-wise insn, and false if the reverse is true.
98 bool MergeForward = false;
99
100 // SExtIdx gives the index of the result of the load pair that must be
101 // extended. The value of SExtIdx assumes that the paired load produces the
102 // value in this order: (I, returned iterator), i.e., -1 means no value has
103 // to be extended, 0 means I, and 1 means the returned iterator.
104 int SExtIdx = -1;
105
106 // If not none, RenameReg can be used to rename the result register of the
107 // first store in a pair. Currently this only works when merging stores
108 // forward.
109 std::optional<MCPhysReg> RenameReg;
110
111 LdStPairFlags() = default;
112
113 void setMergeForward(bool V = true) { MergeForward = V; }
114 bool getMergeForward() const { return MergeForward; }
115
116 void setSExtIdx(int V) { SExtIdx = V; }
117 int getSExtIdx() const { return SExtIdx; }
118
119 void setRenameReg(MCPhysReg R) { RenameReg = R; }
120 void clearRenameReg() { RenameReg = std::nullopt; }
121 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
122};
123
124struct AArch64LoadStoreOpt : public MachineFunctionPass {
125 static char ID;
126
127 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
128
130 const AArch64InstrInfo *TII;
131 const TargetRegisterInfo *TRI;
132 const AArch64Subtarget *Subtarget;
133
134 // Track which register units have been modified and used.
135 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
136 LiveRegUnits DefinedInBB;
137
138 void getAnalysisUsage(AnalysisUsage &AU) const override {
141 }
142
143 // Scan the instructions looking for a load/store that can be combined
144 // with the current instruction into a load/store pair.
145 // Return the matching instruction if one is found, else MBB->end().
147 LdStPairFlags &Flags,
148 unsigned Limit,
149 bool FindNarrowMerge);
150
151 // Scan the instructions looking for a store that writes to the address from
152 // which the current load instruction reads. Return true if one is found.
153 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
155
156 // Merge the two instructions indicated into a wider narrow store instruction.
158 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
160 const LdStPairFlags &Flags);
161
162 // Merge the two instructions indicated into a single pair-wise instruction.
164 mergePairedInsns(MachineBasicBlock::iterator I,
166 const LdStPairFlags &Flags);
167
168 // Promote the load that reads directly from the address stored to.
170 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
172
173 // Scan the instruction list to find a base register update that can
174 // be combined with the current instruction (a load or store) using
175 // pre or post indexed addressing with writeback. Scan forwards.
177 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
178 int UnscaledOffset, unsigned Limit);
179
180 // Scan the instruction list to find a register assigned with a const
181 // value that can be combined with the current instruction (a load or store)
182 // using base addressing with writeback. Scan backwards.
184 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
185 unsigned &Offset);
186
187 // Scan the instruction list to find a base register update that can
188 // be combined with the current instruction (a load or store) using
189 // pre or post indexed addressing with writeback. Scan backwards.
190 // `MergeEither` is set to true if the combined instruction may be placed
191 // either at the location of the load/store instruction or at the location of
192 // the update instruction.
194 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
195 bool &MergeEither);
196
197 // Find an instruction that updates the base register of the ld/st
198 // instruction.
199 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
200 unsigned BaseReg, int Offset);
201
202 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
203 unsigned IndexReg, unsigned &Offset);
204
205 // Merge a pre- or post-index base register update into a ld/st instruction.
206 std::optional<MachineBasicBlock::iterator>
207 mergeUpdateInsn(MachineBasicBlock::iterator I,
208 MachineBasicBlock::iterator Update, bool IsForward,
209 bool IsPreIdx, bool MergeEither);
210
212 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
213 MachineBasicBlock::iterator Update, unsigned Offset,
214 int Scale);
215
216 // Find and merge zero store instructions.
217 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
218
219 // Find and pair ldr/str instructions.
220 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
221
222 // Find and promote load instructions which read directly from store.
223 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
224
225 // Find and merge a base register updates before or after a ld/st instruction.
226 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
227
228 // Find and merge an index ldr/st instruction into a base ld/st instruction.
229 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
230
231 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
232
233 bool runOnMachineFunction(MachineFunction &Fn) override;
234
235 MachineFunctionProperties getRequiredProperties() const override {
236 return MachineFunctionProperties().setNoVRegs();
237 }
238
239 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
240};
241
242char AArch64LoadStoreOpt::ID = 0;
243
244} // end anonymous namespace
245
246INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
247 AARCH64_LOAD_STORE_OPT_NAME, false, false)
248
249static bool isNarrowStore(unsigned Opc) {
250 switch (Opc) {
251 default:
252 return false;
253 case AArch64::STRBBui:
254 case AArch64::STURBBi:
255 case AArch64::STRHHui:
256 case AArch64::STURHHi:
257 return true;
258 }
259}
260
261// These instruction set memory tag and either keep memory contents unchanged or
262// set it to zero, ignoring the address part of the source register.
263static bool isTagStore(const MachineInstr &MI) {
264 switch (MI.getOpcode()) {
265 default:
266 return false;
267 case AArch64::STGi:
268 case AArch64::STZGi:
269 case AArch64::ST2Gi:
270 case AArch64::STZ2Gi:
271 return true;
272 }
273}
274
275static unsigned getMatchingNonSExtOpcode(unsigned Opc,
276 bool *IsValidLdStrOpc = nullptr) {
277 if (IsValidLdStrOpc)
278 *IsValidLdStrOpc = true;
279 switch (Opc) {
280 default:
281 if (IsValidLdStrOpc)
282 *IsValidLdStrOpc = false;
283 return std::numeric_limits<unsigned>::max();
284 case AArch64::STRDui:
285 case AArch64::STURDi:
286 case AArch64::STRDpre:
287 case AArch64::STRQui:
288 case AArch64::STURQi:
289 case AArch64::STRQpre:
290 case AArch64::STRBBui:
291 case AArch64::STURBBi:
292 case AArch64::STRHHui:
293 case AArch64::STURHHi:
294 case AArch64::STRWui:
295 case AArch64::STRWpre:
296 case AArch64::STURWi:
297 case AArch64::STRXui:
298 case AArch64::STRXpre:
299 case AArch64::STURXi:
300 case AArch64::STR_ZXI:
301 case AArch64::LDRDui:
302 case AArch64::LDURDi:
303 case AArch64::LDRDpre:
304 case AArch64::LDRQui:
305 case AArch64::LDURQi:
306 case AArch64::LDRQpre:
307 case AArch64::LDRWui:
308 case AArch64::LDURWi:
309 case AArch64::LDRWpre:
310 case AArch64::LDRXui:
311 case AArch64::LDURXi:
312 case AArch64::LDRXpre:
313 case AArch64::STRSui:
314 case AArch64::STURSi:
315 case AArch64::STRSpre:
316 case AArch64::LDRSui:
317 case AArch64::LDURSi:
318 case AArch64::LDRSpre:
319 case AArch64::LDR_ZXI:
320 return Opc;
321 case AArch64::LDRSWui:
322 return AArch64::LDRWui;
323 case AArch64::LDURSWi:
324 return AArch64::LDURWi;
325 case AArch64::LDRSWpre:
326 return AArch64::LDRWpre;
327 }
328}
329
330static unsigned getMatchingWideOpcode(unsigned Opc) {
331 switch (Opc) {
332 default:
333 llvm_unreachable("Opcode has no wide equivalent!");
334 case AArch64::STRBBui:
335 return AArch64::STRHHui;
336 case AArch64::STRHHui:
337 return AArch64::STRWui;
338 case AArch64::STURBBi:
339 return AArch64::STURHHi;
340 case AArch64::STURHHi:
341 return AArch64::STURWi;
342 case AArch64::STURWi:
343 return AArch64::STURXi;
344 case AArch64::STRWui:
345 return AArch64::STRXui;
346 }
347}
348
349static unsigned getMatchingPairOpcode(unsigned Opc) {
350 switch (Opc) {
351 default:
352 llvm_unreachable("Opcode has no pairwise equivalent!");
353 case AArch64::STRSui:
354 case AArch64::STURSi:
355 return AArch64::STPSi;
356 case AArch64::STRSpre:
357 return AArch64::STPSpre;
358 case AArch64::STRDui:
359 case AArch64::STURDi:
360 return AArch64::STPDi;
361 case AArch64::STRDpre:
362 return AArch64::STPDpre;
363 case AArch64::STRQui:
364 case AArch64::STURQi:
365 case AArch64::STR_ZXI:
366 return AArch64::STPQi;
367 case AArch64::STRQpre:
368 return AArch64::STPQpre;
369 case AArch64::STRWui:
370 case AArch64::STURWi:
371 return AArch64::STPWi;
372 case AArch64::STRWpre:
373 return AArch64::STPWpre;
374 case AArch64::STRXui:
375 case AArch64::STURXi:
376 return AArch64::STPXi;
377 case AArch64::STRXpre:
378 return AArch64::STPXpre;
379 case AArch64::LDRSui:
380 case AArch64::LDURSi:
381 return AArch64::LDPSi;
382 case AArch64::LDRSpre:
383 return AArch64::LDPSpre;
384 case AArch64::LDRDui:
385 case AArch64::LDURDi:
386 return AArch64::LDPDi;
387 case AArch64::LDRDpre:
388 return AArch64::LDPDpre;
389 case AArch64::LDRQui:
390 case AArch64::LDURQi:
391 case AArch64::LDR_ZXI:
392 return AArch64::LDPQi;
393 case AArch64::LDRQpre:
394 return AArch64::LDPQpre;
395 case AArch64::LDRWui:
396 case AArch64::LDURWi:
397 return AArch64::LDPWi;
398 case AArch64::LDRWpre:
399 return AArch64::LDPWpre;
400 case AArch64::LDRXui:
401 case AArch64::LDURXi:
402 return AArch64::LDPXi;
403 case AArch64::LDRXpre:
404 return AArch64::LDPXpre;
405 case AArch64::LDRSWui:
406 case AArch64::LDURSWi:
407 return AArch64::LDPSWi;
408 case AArch64::LDRSWpre:
409 return AArch64::LDPSWpre;
410 }
411}
412
415 unsigned LdOpc = LoadInst.getOpcode();
416 unsigned StOpc = StoreInst.getOpcode();
417 switch (LdOpc) {
418 default:
419 llvm_unreachable("Unsupported load instruction!");
420 case AArch64::LDRBBui:
421 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
422 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
423 case AArch64::LDURBBi:
424 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
425 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
426 case AArch64::LDRHHui:
427 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
428 StOpc == AArch64::STRXui;
429 case AArch64::LDURHHi:
430 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
431 StOpc == AArch64::STURXi;
432 case AArch64::LDRWui:
433 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
434 case AArch64::LDURWi:
435 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
436 case AArch64::LDRXui:
437 return StOpc == AArch64::STRXui;
438 case AArch64::LDURXi:
439 return StOpc == AArch64::STURXi;
440 }
441}
442
443static unsigned getPreIndexedOpcode(unsigned Opc) {
444 // FIXME: We don't currently support creating pre-indexed loads/stores when
445 // the load or store is the unscaled version. If we decide to perform such an
446 // optimization in the future the cases for the unscaled loads/stores will
447 // need to be added here.
448 switch (Opc) {
449 default:
450 llvm_unreachable("Opcode has no pre-indexed equivalent!");
451 case AArch64::STRSui:
452 return AArch64::STRSpre;
453 case AArch64::STRDui:
454 return AArch64::STRDpre;
455 case AArch64::STRQui:
456 return AArch64::STRQpre;
457 case AArch64::STRBBui:
458 return AArch64::STRBBpre;
459 case AArch64::STRHHui:
460 return AArch64::STRHHpre;
461 case AArch64::STRWui:
462 return AArch64::STRWpre;
463 case AArch64::STRXui:
464 return AArch64::STRXpre;
465 case AArch64::LDRSui:
466 return AArch64::LDRSpre;
467 case AArch64::LDRDui:
468 return AArch64::LDRDpre;
469 case AArch64::LDRQui:
470 return AArch64::LDRQpre;
471 case AArch64::LDRBBui:
472 return AArch64::LDRBBpre;
473 case AArch64::LDRHHui:
474 return AArch64::LDRHHpre;
475 case AArch64::LDRWui:
476 return AArch64::LDRWpre;
477 case AArch64::LDRXui:
478 return AArch64::LDRXpre;
479 case AArch64::LDRSWui:
480 return AArch64::LDRSWpre;
481 case AArch64::LDPSi:
482 return AArch64::LDPSpre;
483 case AArch64::LDPSWi:
484 return AArch64::LDPSWpre;
485 case AArch64::LDPDi:
486 return AArch64::LDPDpre;
487 case AArch64::LDPQi:
488 return AArch64::LDPQpre;
489 case AArch64::LDPWi:
490 return AArch64::LDPWpre;
491 case AArch64::LDPXi:
492 return AArch64::LDPXpre;
493 case AArch64::STPSi:
494 return AArch64::STPSpre;
495 case AArch64::STPDi:
496 return AArch64::STPDpre;
497 case AArch64::STPQi:
498 return AArch64::STPQpre;
499 case AArch64::STPWi:
500 return AArch64::STPWpre;
501 case AArch64::STPXi:
502 return AArch64::STPXpre;
503 case AArch64::STGi:
504 return AArch64::STGPreIndex;
505 case AArch64::STZGi:
506 return AArch64::STZGPreIndex;
507 case AArch64::ST2Gi:
508 return AArch64::ST2GPreIndex;
509 case AArch64::STZ2Gi:
510 return AArch64::STZ2GPreIndex;
511 case AArch64::STGPi:
512 return AArch64::STGPpre;
513 }
514}
515
516static unsigned getBaseAddressOpcode(unsigned Opc) {
517 // TODO: Add more index address stores.
518 switch (Opc) {
519 default:
520 llvm_unreachable("Opcode has no base address equivalent!");
521 case AArch64::LDRBroX:
522 return AArch64::LDRBui;
523 case AArch64::LDRBBroX:
524 return AArch64::LDRBBui;
525 case AArch64::LDRSBXroX:
526 return AArch64::LDRSBXui;
527 case AArch64::LDRSBWroX:
528 return AArch64::LDRSBWui;
529 case AArch64::LDRHroX:
530 return AArch64::LDRHui;
531 case AArch64::LDRHHroX:
532 return AArch64::LDRHHui;
533 case AArch64::LDRSHXroX:
534 return AArch64::LDRSHXui;
535 case AArch64::LDRSHWroX:
536 return AArch64::LDRSHWui;
537 case AArch64::LDRWroX:
538 return AArch64::LDRWui;
539 case AArch64::LDRSroX:
540 return AArch64::LDRSui;
541 case AArch64::LDRSWroX:
542 return AArch64::LDRSWui;
543 case AArch64::LDRDroX:
544 return AArch64::LDRDui;
545 case AArch64::LDRXroX:
546 return AArch64::LDRXui;
547 case AArch64::LDRQroX:
548 return AArch64::LDRQui;
549 }
550}
551
552static unsigned getPostIndexedOpcode(unsigned Opc) {
553 switch (Opc) {
554 default:
555 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
556 case AArch64::STRSui:
557 case AArch64::STURSi:
558 return AArch64::STRSpost;
559 case AArch64::STRDui:
560 case AArch64::STURDi:
561 return AArch64::STRDpost;
562 case AArch64::STRQui:
563 case AArch64::STURQi:
564 return AArch64::STRQpost;
565 case AArch64::STRBBui:
566 return AArch64::STRBBpost;
567 case AArch64::STRHHui:
568 return AArch64::STRHHpost;
569 case AArch64::STRWui:
570 case AArch64::STURWi:
571 return AArch64::STRWpost;
572 case AArch64::STRXui:
573 case AArch64::STURXi:
574 return AArch64::STRXpost;
575 case AArch64::LDRSui:
576 case AArch64::LDURSi:
577 return AArch64::LDRSpost;
578 case AArch64::LDRDui:
579 case AArch64::LDURDi:
580 return AArch64::LDRDpost;
581 case AArch64::LDRQui:
582 case AArch64::LDURQi:
583 return AArch64::LDRQpost;
584 case AArch64::LDRBBui:
585 return AArch64::LDRBBpost;
586 case AArch64::LDRHHui:
587 return AArch64::LDRHHpost;
588 case AArch64::LDRWui:
589 case AArch64::LDURWi:
590 return AArch64::LDRWpost;
591 case AArch64::LDRXui:
592 case AArch64::LDURXi:
593 return AArch64::LDRXpost;
594 case AArch64::LDRSWui:
595 return AArch64::LDRSWpost;
596 case AArch64::LDPSi:
597 return AArch64::LDPSpost;
598 case AArch64::LDPSWi:
599 return AArch64::LDPSWpost;
600 case AArch64::LDPDi:
601 return AArch64::LDPDpost;
602 case AArch64::LDPQi:
603 return AArch64::LDPQpost;
604 case AArch64::LDPWi:
605 return AArch64::LDPWpost;
606 case AArch64::LDPXi:
607 return AArch64::LDPXpost;
608 case AArch64::STPSi:
609 return AArch64::STPSpost;
610 case AArch64::STPDi:
611 return AArch64::STPDpost;
612 case AArch64::STPQi:
613 return AArch64::STPQpost;
614 case AArch64::STPWi:
615 return AArch64::STPWpost;
616 case AArch64::STPXi:
617 return AArch64::STPXpost;
618 case AArch64::STGi:
619 return AArch64::STGPostIndex;
620 case AArch64::STZGi:
621 return AArch64::STZGPostIndex;
622 case AArch64::ST2Gi:
623 return AArch64::ST2GPostIndex;
624 case AArch64::STZ2Gi:
625 return AArch64::STZ2GPostIndex;
626 case AArch64::STGPi:
627 return AArch64::STGPpost;
628 }
629}
630
632
633 unsigned OpcA = FirstMI.getOpcode();
634 unsigned OpcB = MI.getOpcode();
635
636 switch (OpcA) {
637 default:
638 return false;
639 case AArch64::STRSpre:
640 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
641 case AArch64::STRDpre:
642 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
643 case AArch64::STRQpre:
644 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
645 case AArch64::STRWpre:
646 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
647 case AArch64::STRXpre:
648 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
649 case AArch64::LDRSpre:
650 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
651 case AArch64::LDRDpre:
652 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
653 case AArch64::LDRQpre:
654 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
655 case AArch64::LDRWpre:
656 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
657 case AArch64::LDRXpre:
658 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
659 case AArch64::LDRSWpre:
660 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
661 }
662}
663
664// Returns the scale and offset range of pre/post indexed variants of MI.
665static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
666 int &MinOffset, int &MaxOffset) {
667 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
668 bool IsTagStore = isTagStore(MI);
669 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
670 // as in the "unsigned offset" variant.
671 // All other pre/post indexed ldst instructions are unscaled.
672 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
673
674 if (IsPaired) {
675 MinOffset = -64;
676 MaxOffset = 63;
677 } else {
678 MinOffset = -256;
679 MaxOffset = 255;
680 }
681}
682
684 unsigned PairedRegOp = 0) {
685 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
686 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
687 if (IsPreLdSt)
688 PairedRegOp += 1;
689 unsigned Idx =
690 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
691 return MI.getOperand(Idx);
692}
693
696 const AArch64InstrInfo *TII) {
697 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
698 int LoadSize = TII->getMemScale(LoadInst);
699 int StoreSize = TII->getMemScale(StoreInst);
700 int UnscaledStOffset =
701 TII->hasUnscaledLdStOffset(StoreInst)
704 int UnscaledLdOffset =
705 TII->hasUnscaledLdStOffset(LoadInst)
708 return (UnscaledStOffset <= UnscaledLdOffset) &&
709 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
710}
711
713 unsigned Opc = MI.getOpcode();
714 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
715 isNarrowStore(Opc)) &&
716 getLdStRegOp(MI).getReg() == AArch64::WZR;
717}
718
720 switch (MI.getOpcode()) {
721 default:
722 return false;
723 // Scaled instructions.
724 case AArch64::LDRBBui:
725 case AArch64::LDRHHui:
726 case AArch64::LDRWui:
727 case AArch64::LDRXui:
728 // Unscaled instructions.
729 case AArch64::LDURBBi:
730 case AArch64::LDURHHi:
731 case AArch64::LDURWi:
732 case AArch64::LDURXi:
733 return true;
734 }
735}
736
738 unsigned Opc = MI.getOpcode();
739 switch (Opc) {
740 default:
741 return false;
742 // Scaled instructions.
743 case AArch64::STRSui:
744 case AArch64::STRDui:
745 case AArch64::STRQui:
746 case AArch64::STRXui:
747 case AArch64::STRWui:
748 case AArch64::STRHHui:
749 case AArch64::STRBBui:
750 case AArch64::LDRSui:
751 case AArch64::LDRDui:
752 case AArch64::LDRQui:
753 case AArch64::LDRXui:
754 case AArch64::LDRWui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRBBui:
757 case AArch64::STGi:
758 case AArch64::STZGi:
759 case AArch64::ST2Gi:
760 case AArch64::STZ2Gi:
761 case AArch64::STGPi:
762 // Unscaled instructions.
763 case AArch64::STURSi:
764 case AArch64::STURDi:
765 case AArch64::STURQi:
766 case AArch64::STURWi:
767 case AArch64::STURXi:
768 case AArch64::LDURSi:
769 case AArch64::LDURDi:
770 case AArch64::LDURQi:
771 case AArch64::LDURWi:
772 case AArch64::LDURXi:
773 // Paired instructions.
774 case AArch64::LDPSi:
775 case AArch64::LDPSWi:
776 case AArch64::LDPDi:
777 case AArch64::LDPQi:
778 case AArch64::LDPWi:
779 case AArch64::LDPXi:
780 case AArch64::STPSi:
781 case AArch64::STPDi:
782 case AArch64::STPQi:
783 case AArch64::STPWi:
784 case AArch64::STPXi:
785 // Make sure this is a reg+imm (as opposed to an address reloc).
787 return false;
788
789 // When using stack tagging, simple sp+imm loads and stores are not
790 // tag-checked, but pre- and post-indexed versions of them are, so we can't
791 // replace the former with the latter. This transformation would be valid
792 // if the load/store accesses an untagged stack slot, but we don't have
793 // that information available after frame indices have been eliminated.
794 if (AFI.isMTETagged() &&
795 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
796 return false;
797
798 return true;
799 }
800}
801
802// Make sure this is a reg+reg Ld/St
803static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
804 unsigned Opc = MI.getOpcode();
805 switch (Opc) {
806 default:
807 return false;
808 // Scaled instructions.
809 // TODO: Add more index address stores.
810 case AArch64::LDRBroX:
811 case AArch64::LDRBBroX:
812 case AArch64::LDRSBXroX:
813 case AArch64::LDRSBWroX:
814 Scale = 1;
815 return true;
816 case AArch64::LDRHroX:
817 case AArch64::LDRHHroX:
818 case AArch64::LDRSHXroX:
819 case AArch64::LDRSHWroX:
820 Scale = 2;
821 return true;
822 case AArch64::LDRWroX:
823 case AArch64::LDRSroX:
824 case AArch64::LDRSWroX:
825 Scale = 4;
826 return true;
827 case AArch64::LDRDroX:
828 case AArch64::LDRXroX:
829 Scale = 8;
830 return true;
831 case AArch64::LDRQroX:
832 Scale = 16;
833 return true;
834 }
835}
836
837static bool isRewritableImplicitDef(unsigned Opc) {
838 switch (Opc) {
839 default:
840 return false;
841 case AArch64::ORRWrs:
842 case AArch64::ADDWri:
843 return true;
844 }
845}
846
848AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
850 const LdStPairFlags &Flags) {
852 "Expected promotable zero stores.");
853
854 MachineBasicBlock::iterator E = I->getParent()->end();
856 // If NextI is the second of the two instructions to be merged, we need
857 // to skip one further. Either way we merge will invalidate the iterator,
858 // and we don't need to scan the new instruction, as it's a pairwise
859 // instruction, which we're not considering for further action anyway.
860 if (NextI == MergeMI)
861 NextI = next_nodbg(NextI, E);
862
863 unsigned Opc = I->getOpcode();
864 unsigned MergeMIOpc = MergeMI->getOpcode();
865 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
866 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
867 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
868 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
869
870 bool MergeForward = Flags.getMergeForward();
871 // Insert our new paired instruction after whichever of the paired
872 // instructions MergeForward indicates.
873 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
874 // Also based on MergeForward is from where we copy the base register operand
875 // so we get the flags compatible with the input code.
876 const MachineOperand &BaseRegOp =
877 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
878 : AArch64InstrInfo::getLdStBaseOp(*I);
879
880 // Which register is Rt and which is Rt2 depends on the offset order.
881 int64_t IOffsetInBytes =
882 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
883 int64_t MIOffsetInBytes =
885 MergeMIOffsetStride;
886 // Select final offset based on the offset order.
887 int64_t OffsetImm;
888 if (IOffsetInBytes > MIOffsetInBytes)
889 OffsetImm = MIOffsetInBytes;
890 else
891 OffsetImm = IOffsetInBytes;
892
893 int NewOpcode = getMatchingWideOpcode(Opc);
894 // Adjust final offset on scaled stores because the new instruction
895 // has a different scale.
896 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
897 int NewOffsetStride = TII->getMemScale(NewOpcode);
898 assert(((OffsetImm % NewOffsetStride) == 0) &&
899 "Offset should be a multiple of the store memory scale");
900 OffsetImm = OffsetImm / NewOffsetStride;
901 }
902
903 // Construct the new instruction.
904 DebugLoc DL = I->getDebugLoc();
905 MachineBasicBlock *MBB = I->getParent();
906 MachineInstrBuilder MIB;
907 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
908 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
909 .add(BaseRegOp)
910 .addImm(OffsetImm)
911 .cloneMergedMemRefs({&*I, &*MergeMI})
912 .setMIFlags(I->mergeFlagsWith(*MergeMI));
913 (void)MIB;
914
915 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
916 LLVM_DEBUG(I->print(dbgs()));
917 LLVM_DEBUG(dbgs() << " ");
918 LLVM_DEBUG(MergeMI->print(dbgs()));
919 LLVM_DEBUG(dbgs() << " with instruction:\n ");
920 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
921 LLVM_DEBUG(dbgs() << "\n");
922
923 // Erase the old instructions.
924 I->eraseFromParent();
925 MergeMI->eraseFromParent();
926 return NextI;
927}
928
929// Apply Fn to all instructions between MI and the beginning of the block, until
930// a def for DefReg is reached. Returns true, iff Fn returns true for all
931// visited instructions. Stop after visiting Limit iterations.
933 const TargetRegisterInfo *TRI, unsigned Limit,
934 std::function<bool(MachineInstr &, bool)> &Fn) {
935 auto MBB = MI.getParent();
936 for (MachineInstr &I :
937 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
938 if (!Limit)
939 return false;
940 --Limit;
941
942 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
943 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
944 TRI->regsOverlap(MOP.getReg(), DefReg);
945 });
946 if (!Fn(I, isDef))
947 return false;
948 if (isDef)
949 break;
950 }
951 return true;
952}
953
955 const TargetRegisterInfo *TRI) {
956
957 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
958 if (MOP.isReg() && MOP.isKill())
959 Units.removeReg(MOP.getReg());
960
961 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
962 if (MOP.isReg() && !MOP.isKill())
963 Units.addReg(MOP.getReg());
964}
965
966/// This function will add a new entry into the debugValueSubstitutions table
967/// when two instruction have been merged into a new one represented by \p
968/// MergedInstr.
970 unsigned InstrNumToSet,
971 MachineInstr &OriginalInstr,
972 MachineInstr &MergedInstr) {
973
974 // Figure out the Operand Index of the destination register of the
975 // OriginalInstr in the new MergedInstr.
976 auto Reg = OriginalInstr.getOperand(0).getReg();
977 unsigned OperandNo = 0;
978 bool RegFound = false;
979 for (const auto Op : MergedInstr.operands()) {
980 if (Op.getReg() == Reg) {
981 RegFound = true;
982 break;
983 }
984 OperandNo++;
985 }
986
987 if (RegFound)
988 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
989 {InstrNumToSet, OperandNo});
990}
991
993AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
995 const LdStPairFlags &Flags) {
996 MachineBasicBlock::iterator E = I->getParent()->end();
998 // If NextI is the second of the two instructions to be merged, we need
999 // to skip one further. Either way we merge will invalidate the iterator,
1000 // and we don't need to scan the new instruction, as it's a pairwise
1001 // instruction, which we're not considering for further action anyway.
1002 if (NextI == Paired)
1003 NextI = next_nodbg(NextI, E);
1004
1005 int SExtIdx = Flags.getSExtIdx();
1006 unsigned Opc =
1007 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1008 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1009 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1010
1011 bool MergeForward = Flags.getMergeForward();
1012
1013 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1014 if (RenameReg) {
1015 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1016 DefinedInBB.addReg(*RenameReg);
1017
1018 // Return the sub/super register for RenameReg, matching the size of
1019 // OriginalReg.
1020 auto GetMatchingSubReg =
1021 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1022 for (MCPhysReg SubOrSuper :
1023 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1024 if (C->contains(SubOrSuper))
1025 return SubOrSuper;
1026 }
1027 llvm_unreachable("Should have found matching sub or super register!");
1028 };
1029
1030 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1031 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1032 bool IsDef) {
1033 if (IsDef) {
1034 bool SeenDef = false;
1035 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1036 MachineOperand &MOP = MI.getOperand(OpIdx);
1037 // Rename the first explicit definition and all implicit
1038 // definitions matching RegToRename.
1039 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1040 (!MergeForward || !SeenDef ||
1041 (MOP.isDef() && MOP.isImplicit())) &&
1042 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1043 assert((MOP.isImplicit() ||
1044 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1045 "Need renamable operands");
1046 Register MatchingReg;
1047 if (const TargetRegisterClass *RC =
1048 MI.getRegClassConstraint(OpIdx, TII, TRI))
1049 MatchingReg = GetMatchingSubReg(RC);
1050 else {
1051 if (!isRewritableImplicitDef(MI.getOpcode()))
1052 continue;
1053 MatchingReg = GetMatchingSubReg(
1054 TRI->getMinimalPhysRegClass(MOP.getReg()));
1055 }
1056 MOP.setReg(MatchingReg);
1057 SeenDef = true;
1058 }
1059 }
1060 } else {
1061 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1062 MachineOperand &MOP = MI.getOperand(OpIdx);
1063 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1064 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1065 assert((MOP.isImplicit() ||
1066 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1067 "Need renamable operands");
1068 Register MatchingReg;
1069 if (const TargetRegisterClass *RC =
1070 MI.getRegClassConstraint(OpIdx, TII, TRI))
1071 MatchingReg = GetMatchingSubReg(RC);
1072 else
1073 MatchingReg = GetMatchingSubReg(
1074 TRI->getMinimalPhysRegClass(MOP.getReg()));
1075 assert(MatchingReg != AArch64::NoRegister &&
1076 "Cannot find matching regs for renaming");
1077 MOP.setReg(MatchingReg);
1078 }
1079 }
1080 }
1081 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1082 return true;
1083 };
1084 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1085 TRI, UINT32_MAX, UpdateMIs);
1086
1087#if !defined(NDEBUG)
1088 // For forward merging store:
1089 // Make sure the register used for renaming is not used between the
1090 // paired instructions. That would trash the content before the new
1091 // paired instruction.
1092 MCPhysReg RegToCheck = *RenameReg;
1093 // For backward merging load:
1094 // Make sure the register being renamed is not used between the
1095 // paired instructions. That would trash the content after the new
1096 // paired instruction.
1097 if (!MergeForward)
1098 RegToCheck = RegToRename;
1099 for (auto &MI :
1100 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1101 MergeForward ? std::next(I) : I,
1102 MergeForward ? std::next(Paired) : Paired))
1103 assert(all_of(MI.operands(),
1104 [this, RegToCheck](const MachineOperand &MOP) {
1105 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1106 MOP.isUndef() ||
1107 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1108 }) &&
1109 "Rename register used between paired instruction, trashing the "
1110 "content");
1111#endif
1112 }
1113
1114 // Insert our new paired instruction after whichever of the paired
1115 // instructions MergeForward indicates.
1116 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1117 // Also based on MergeForward is from where we copy the base register operand
1118 // so we get the flags compatible with the input code.
1119 const MachineOperand &BaseRegOp =
1120 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1121 : AArch64InstrInfo::getLdStBaseOp(*I);
1122
1124 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1125 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1126 if (IsUnscaled != PairedIsUnscaled) {
1127 // We're trying to pair instructions that differ in how they are scaled. If
1128 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1129 // the opposite (i.e., make Paired's offset unscaled).
1130 int MemSize = TII->getMemScale(*Paired);
1131 if (PairedIsUnscaled) {
1132 // If the unscaled offset isn't a multiple of the MemSize, we can't
1133 // pair the operations together.
1134 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1135 "Offset should be a multiple of the stride!");
1136 PairedOffset /= MemSize;
1137 } else {
1138 PairedOffset *= MemSize;
1139 }
1140 }
1141
1142 // Which register is Rt and which is Rt2 depends on the offset order.
1143 // However, for pre load/stores the Rt should be the one of the pre
1144 // load/store.
1145 MachineInstr *RtMI, *Rt2MI;
1146 if (Offset == PairedOffset + OffsetStride &&
1148 RtMI = &*Paired;
1149 Rt2MI = &*I;
1150 // Here we swapped the assumption made for SExtIdx.
1151 // I.e., we turn ldp I, Paired into ldp Paired, I.
1152 // Update the index accordingly.
1153 if (SExtIdx != -1)
1154 SExtIdx = (SExtIdx + 1) % 2;
1155 } else {
1156 RtMI = &*I;
1157 Rt2MI = &*Paired;
1158 }
1159 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1160 // Scale the immediate offset, if necessary.
1161 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1162 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1163 "Unscaled offset cannot be scaled.");
1164 OffsetImm /= TII->getMemScale(*RtMI);
1165 }
1166
1167 // Construct the new instruction.
1168 MachineInstrBuilder MIB;
1169 DebugLoc DL = I->getDebugLoc();
1170 MachineBasicBlock *MBB = I->getParent();
1171 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1172 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1173 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1174 // Kill flags may become invalid when moving stores for pairing.
1175 if (RegOp0.isUse()) {
1176 if (!MergeForward) {
1177 // Clear kill flags on store if moving upwards. Example:
1178 // STRWui kill %w0, ...
1179 // USE %w1
1180 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1181 // We are about to move the store of w1, so its kill flag may become
1182 // invalid; not the case for w0.
1183 // Since w1 is used between the stores, the kill flag on w1 is cleared
1184 // after merging.
1185 // STPWi kill %w0, %w1, ...
1186 // USE %w1
1187 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1188 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1189 PairedRegOp.setIsKill(false);
1190 } else {
1191 // Clear kill flags of the first stores register. Example:
1192 // STRWui %w1, ...
1193 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1194 // STRW %w0
1196 for (MachineInstr &MI :
1197 make_range(std::next(I->getIterator()), Paired->getIterator()))
1198 MI.clearRegisterKills(Reg, TRI);
1199 }
1200 }
1201
1202 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1203 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1204
1205 // Adds the pre-index operand for pre-indexed ld/st pairs.
1206 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1207 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1208
1209 MIB.add(RegOp0)
1210 .add(RegOp1)
1211 .add(BaseRegOp)
1212 .addImm(OffsetImm)
1213 .cloneMergedMemRefs({&*I, &*Paired})
1214 .setMIFlags(I->mergeFlagsWith(*Paired));
1215
1216 (void)MIB;
1217
1218 LLVM_DEBUG(
1219 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1220 LLVM_DEBUG(I->print(dbgs()));
1221 LLVM_DEBUG(dbgs() << " ");
1222 LLVM_DEBUG(Paired->print(dbgs()));
1223 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1224 if (SExtIdx != -1) {
1225 // Generate the sign extension for the proper result of the ldp.
1226 // I.e., with X1, that would be:
1227 // %w1 = KILL %w1, implicit-def %x1
1228 // %x1 = SBFMXri killed %x1, 0, 31
1229 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1230 // Right now, DstMO has the extended register, since it comes from an
1231 // extended opcode.
1232 Register DstRegX = DstMO.getReg();
1233 // Get the W variant of that register.
1234 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1235 // Update the result of LDP to use the W instead of the X variant.
1236 DstMO.setReg(DstRegW);
1237 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1238 LLVM_DEBUG(dbgs() << "\n");
1239 // Make the machine verifier happy by providing a definition for
1240 // the X register.
1241 // Insert this definition right after the generated LDP, i.e., before
1242 // InsertionPoint.
1243 MachineInstrBuilder MIBKill =
1244 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1245 .addReg(DstRegW)
1246 .addReg(DstRegX, RegState::Define);
1247 MIBKill->getOperand(2).setImplicit();
1248 // Create the sign extension.
1249 MachineInstrBuilder MIBSXTW =
1250 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1251 .addReg(DstRegX)
1252 .addImm(0)
1253 .addImm(31);
1254 (void)MIBSXTW;
1255
1256 // In the case of a sign-extend, where we have something like:
1257 // debugValueSubstitutions:[]
1258 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1259 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1260 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1261 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1262
1263 // It will be converted to:
1264 // debugValueSubstitutions:[]
1265 // $w0, $w1 = LDPWi $x0, 0
1266 // $w0 = KILL $w0, implicit-def $x0
1267 // $x0 = SBFMXri $x0, 0, 31
1268 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1269 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1270
1271 // We want the final result to look like:
1272 // debugValueSubstitutions:
1273 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1274 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1275 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1276 // $w0 = KILL $w0, implicit-def $x0
1277 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1278 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1279 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1280
1281 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1282 // instruction contains the final value we care about we give it a new
1283 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1284 // about, therefore the LDP instruction is also given a new
1285 // debug-instr-number 4. We have to add these substitutions to the
1286 // debugValueSubstitutions table. However, we also have to ensure that the
1287 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1288 // $w1 is the second operand of the LDP instruction.
1289
1290 if (I->peekDebugInstrNum()) {
1291 // If I is the instruction which got sign extended and has a
1292 // debug-instr-number, give the SBFMXri instruction a new
1293 // debug-instr-number, and update the debugValueSubstitutions table with
1294 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1295 // instruction a new debug-instr-number, and update the
1296 // debugValueSubstitutions table with the new debug-instr-number and
1297 // OpIndex pair.
1298 unsigned NewInstrNum;
1299 if (DstRegX == I->getOperand(0).getReg()) {
1300 NewInstrNum = MIBSXTW->getDebugInstrNum();
1301 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1302 *MIBSXTW);
1303 } else {
1304 NewInstrNum = MIB->getDebugInstrNum();
1305 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1306 }
1307 }
1308 if (Paired->peekDebugInstrNum()) {
1309 // If Paired is the instruction which got sign extended and has a
1310 // debug-instr-number, give the SBFMXri instruction a new
1311 // debug-instr-number, and update the debugValueSubstitutions table with
1312 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1313 // instruction a new debug-instr-number, and update the
1314 // debugValueSubstitutions table with the new debug-instr-number and
1315 // OpIndex pair.
1316 unsigned NewInstrNum;
1317 if (DstRegX == Paired->getOperand(0).getReg()) {
1318 NewInstrNum = MIBSXTW->getDebugInstrNum();
1319 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1320 *MIBSXTW);
1321 } else {
1322 NewInstrNum = MIB->getDebugInstrNum();
1323 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1324 *MIB);
1325 }
1326 }
1327
1328 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1329 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1330 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1331 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1332 // variant of the registers.
1333 MachineOperand &MOp0 = MIB->getOperand(0);
1334 MachineOperand &MOp1 = MIB->getOperand(1);
1335 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1336 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1337 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1338 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1339 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1340 } else {
1341
1342 // In the case that the merge doesn't result in a sign-extend, if we have
1343 // something like:
1344 // debugValueSubstitutions:[]
1345 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1346 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1347 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1348 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1349
1350 // It will be converted to:
1351 // debugValueSubstitutions: []
1352 // $x0, $x1 = LDPXi $x0, 0
1353 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1354 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1355
1356 // We want the final result to look like:
1357 // debugValueSubstitutions:
1358 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1359 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1360 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1361 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1362 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1363
1364 // Here all that needs to be done is, that the LDP instruction needs to be
1365 // updated with a new debug-instr-number, we then need to add entries into
1366 // the debugSubstitutions table to map the old instr-refs to the new ones.
1367
1368 // Assign new DebugInstrNum to the Paired instruction.
1369 if (I->peekDebugInstrNum()) {
1370 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1371 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1372 *MIB);
1373 }
1374 if (Paired->peekDebugInstrNum()) {
1375 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1376 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1377 *MIB);
1378 }
1379
1380 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1381 }
1382 LLVM_DEBUG(dbgs() << "\n");
1383
1384 if (MergeForward)
1385 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1386 if (MOP.isReg() && MOP.isKill())
1387 DefinedInBB.addReg(MOP.getReg());
1388
1389 // Erase the old instructions.
1390 I->eraseFromParent();
1391 Paired->eraseFromParent();
1392
1393 return NextI;
1394}
1395
1397AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1400 next_nodbg(LoadI, LoadI->getParent()->end());
1401
1402 int LoadSize = TII->getMemScale(*LoadI);
1403 int StoreSize = TII->getMemScale(*StoreI);
1404 Register LdRt = getLdStRegOp(*LoadI).getReg();
1405 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1406 Register StRt = getLdStRegOp(*StoreI).getReg();
1407 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1408
1409 assert((IsStoreXReg ||
1410 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1411 "Unexpected RegClass");
1412
1413 MachineInstr *BitExtMI;
1414 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1415 // Remove the load, if the destination register of the loads is the same
1416 // register for stored value.
1417 if (StRt == LdRt && LoadSize == 8) {
1418 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1419 LoadI->getIterator())) {
1420 if (MI.killsRegister(StRt, TRI)) {
1421 MI.clearRegisterKills(StRt, TRI);
1422 break;
1423 }
1424 }
1425 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1426 LLVM_DEBUG(LoadI->print(dbgs()));
1427 LLVM_DEBUG(dbgs() << "\n");
1428 LoadI->eraseFromParent();
1429 return NextI;
1430 }
1431 // Replace the load with a mov if the load and store are in the same size.
1432 BitExtMI =
1433 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1434 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1435 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1436 .add(StMO)
1438 .setMIFlags(LoadI->getFlags());
1439 } else {
1440 // FIXME: Currently we disable this transformation in big-endian targets as
1441 // performance and correctness are verified only in little-endian.
1442 if (!Subtarget->isLittleEndian())
1443 return NextI;
1444 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1445 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1446 "Unsupported ld/st match");
1447 assert(LoadSize <= StoreSize && "Invalid load size");
1448 int UnscaledLdOffset =
1449 IsUnscaled
1451 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1452 int UnscaledStOffset =
1453 IsUnscaled
1455 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1456 int Width = LoadSize * 8;
1457 Register DestReg =
1458 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1459 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1460 : LdRt;
1461
1462 assert((UnscaledLdOffset >= UnscaledStOffset &&
1463 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1464 "Invalid offset");
1465
1466 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1467 int Imms = Immr + Width - 1;
1468 if (UnscaledLdOffset == UnscaledStOffset) {
1469 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1470 | ((Immr) << 6) // immr
1471 | ((Imms) << 0) // imms
1472 ;
1473
1474 BitExtMI =
1475 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1476 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1477 DestReg)
1478 .add(StMO)
1479 .addImm(AndMaskEncoded)
1480 .setMIFlags(LoadI->getFlags());
1481 } else if (IsStoreXReg && Imms == 31) {
1482 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1483 // instruction.
1484 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1485 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1486 TII->get(AArch64::UBFMWri),
1487 TRI->getSubReg(DestReg, AArch64::sub_32))
1488 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1489 .addImm(Immr)
1490 .addImm(Imms)
1491 .setMIFlags(LoadI->getFlags());
1492 } else {
1493 BitExtMI =
1494 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1495 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1496 DestReg)
1497 .add(StMO)
1498 .addImm(Immr)
1499 .addImm(Imms)
1500 .setMIFlags(LoadI->getFlags());
1501 }
1502 }
1503
1504 // Clear kill flags between store and load.
1505 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1506 BitExtMI->getIterator()))
1507 if (MI.killsRegister(StRt, TRI)) {
1508 MI.clearRegisterKills(StRt, TRI);
1509 break;
1510 }
1511
1512 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1513 LLVM_DEBUG(StoreI->print(dbgs()));
1514 LLVM_DEBUG(dbgs() << " ");
1515 LLVM_DEBUG(LoadI->print(dbgs()));
1516 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1517 LLVM_DEBUG(StoreI->print(dbgs()));
1518 LLVM_DEBUG(dbgs() << " ");
1519 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1520 LLVM_DEBUG(dbgs() << "\n");
1521
1522 // Erase the old instructions.
1523 LoadI->eraseFromParent();
1524 return NextI;
1525}
1526
1527static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1528 // Convert the byte-offset used by unscaled into an "element" offset used
1529 // by the scaled pair load/store instructions.
1530 if (IsUnscaled) {
1531 // If the byte-offset isn't a multiple of the stride, there's no point
1532 // trying to match it.
1533 if (Offset % OffsetStride)
1534 return false;
1535 Offset /= OffsetStride;
1536 }
1537 return Offset <= 63 && Offset >= -64;
1538}
1539
1540// Do alignment, specialized to power of 2 and for signed ints,
1541// avoiding having to do a C-style cast from uint_64t to int when
1542// using alignTo from include/llvm/Support/MathExtras.h.
1543// FIXME: Move this function to include/MathExtras.h?
1544static int alignTo(int Num, int PowOf2) {
1545 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1546}
1547
1548static bool mayAlias(MachineInstr &MIa,
1550 AliasAnalysis *AA) {
1551 for (MachineInstr *MIb : MemInsns) {
1552 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1553 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1554 return true;
1555 }
1556 }
1557
1558 LLVM_DEBUG(dbgs() << "No aliases found\n");
1559 return false;
1560}
1561
1562bool AArch64LoadStoreOpt::findMatchingStore(
1563 MachineBasicBlock::iterator I, unsigned Limit,
1565 MachineBasicBlock::iterator B = I->getParent()->begin();
1567 MachineInstr &LoadMI = *I;
1569
1570 // If the load is the first instruction in the block, there's obviously
1571 // not any matching store.
1572 if (MBBI == B)
1573 return false;
1574
1575 // Track which register units have been modified and used between the first
1576 // insn and the second insn.
1577 ModifiedRegUnits.clear();
1578 UsedRegUnits.clear();
1579
1580 unsigned Count = 0;
1581 do {
1582 MBBI = prev_nodbg(MBBI, B);
1583 MachineInstr &MI = *MBBI;
1584
1585 // Don't count transient instructions towards the search limit since there
1586 // may be different numbers of them if e.g. debug information is present.
1587 if (!MI.isTransient())
1588 ++Count;
1589
1590 // If the load instruction reads directly from the address to which the
1591 // store instruction writes and the stored value is not modified, we can
1592 // promote the load. Since we do not handle stores with pre-/post-index,
1593 // it's unnecessary to check if BaseReg is modified by the store itself.
1594 // Also we can't handle stores without an immediate offset operand,
1595 // while the operand might be the address for a global variable.
1596 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1599 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1600 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1601 StoreI = MBBI;
1602 return true;
1603 }
1604
1605 if (MI.isCall())
1606 return false;
1607
1608 // Update modified / uses register units.
1609 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1610
1611 // Otherwise, if the base register is modified, we have no match, so
1612 // return early.
1613 if (!ModifiedRegUnits.available(BaseReg))
1614 return false;
1615
1616 // If we encounter a store aliased with the load, return early.
1617 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1618 return false;
1619 } while (MBBI != B && Count < Limit);
1620 return false;
1621}
1622
1623static bool needsWinCFI(const MachineFunction *MF) {
1624 return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1626}
1627
1628// Returns true if FirstMI and MI are candidates for merging or pairing.
1629// Otherwise, returns false.
1631 LdStPairFlags &Flags,
1632 const AArch64InstrInfo *TII) {
1633 // If this is volatile or if pairing is suppressed, not a candidate.
1634 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1635 return false;
1636
1637 // We should have already checked FirstMI for pair suppression and volatility.
1638 assert(!FirstMI.hasOrderedMemoryRef() &&
1639 !TII->isLdStPairSuppressed(FirstMI) &&
1640 "FirstMI shouldn't get here if either of these checks are true.");
1641
1642 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1644 return false;
1645
1646 unsigned OpcA = FirstMI.getOpcode();
1647 unsigned OpcB = MI.getOpcode();
1648
1649 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1650 if (OpcA == OpcB)
1651 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1652
1653 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1654 // allow pairing them with other instructions.
1655 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1656 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1657 return false;
1658
1659 // Two pre ld/st of different opcodes cannot be merged either
1661 return false;
1662
1663 // Try to match a sign-extended load/store with a zero-extended load/store.
1664 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1665 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1666 assert(IsValidLdStrOpc &&
1667 "Given Opc should be a Load or Store with an immediate");
1668 // OpcA will be the first instruction in the pair.
1669 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1670 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1671 return true;
1672 }
1673
1674 // If the second instruction isn't even a mergable/pairable load/store, bail
1675 // out.
1676 if (!PairIsValidLdStrOpc)
1677 return false;
1678
1679 // Narrow stores do not have a matching pair opcodes, so constrain their
1680 // merging to zero stores.
1681 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1682 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1683 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1684 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1685
1686 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1687 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1688 // are candidate pairs that can be merged.
1689 if (isPreLdStPairCandidate(FirstMI, MI))
1690 return true;
1691
1692 // Try to match an unscaled load/store with a scaled load/store.
1693 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1695
1696 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1697}
1698
1699static bool canRenameMOP(const MachineOperand &MOP,
1700 const TargetRegisterInfo *TRI) {
1701 if (MOP.isReg()) {
1702 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1703 // Renaming registers with multiple disjunct sub-registers (e.g. the
1704 // result of a LD3) means that all sub-registers are renamed, potentially
1705 // impacting other instructions we did not check. Bail out.
1706 // Note that this relies on the structure of the AArch64 register file. In
1707 // particular, a subregister cannot be written without overwriting the
1708 // whole register.
1709 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1710 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1711 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1712 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1713 LLVM_DEBUG(
1714 dbgs()
1715 << " Cannot rename operands with multiple disjunct subregisters ("
1716 << MOP << ")\n");
1717 return false;
1718 }
1719
1720 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1721 // them must be known. For example, in ORRWrs the implicit-def
1722 // corresponds to the result register.
1723 if (MOP.isImplicit() && MOP.isDef()) {
1725 return false;
1726 return TRI->isSuperOrSubRegisterEq(
1727 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1728 }
1729 }
1730 return MOP.isImplicit() ||
1731 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1732}
1733
1734static bool
1737 const TargetRegisterInfo *TRI) {
1738 if (!FirstMI.mayStore())
1739 return false;
1740
1741 // Check if we can find an unused register which we can use to rename
1742 // the register used by the first load/store.
1743
1744 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1745 // For now, we only rename if the store operand gets killed at the store.
1746 if (!getLdStRegOp(FirstMI).isKill() &&
1747 !any_of(FirstMI.operands(),
1748 [TRI, RegToRename](const MachineOperand &MOP) {
1749 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1750 MOP.isImplicit() && MOP.isKill() &&
1751 TRI->regsOverlap(RegToRename, MOP.getReg());
1752 })) {
1753 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1754 return false;
1755 }
1756
1757 bool FoundDef = false;
1758
1759 // For each instruction between FirstMI and the previous def for RegToRename,
1760 // we
1761 // * check if we can rename RegToRename in this instruction
1762 // * collect the registers used and required register classes for RegToRename.
1763 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1764 bool IsDef) {
1765 LLVM_DEBUG(dbgs() << "Checking " << MI);
1766 // Currently we do not try to rename across frame-setup instructions.
1767 if (MI.getFlag(MachineInstr::FrameSetup)) {
1768 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1769 << "currently\n");
1770 return false;
1771 }
1772
1773 UsedInBetween.accumulate(MI);
1774
1775 // For a definition, check that we can rename the definition and exit the
1776 // loop.
1777 FoundDef = IsDef;
1778
1779 // For defs, check if we can rename the first def of RegToRename.
1780 if (FoundDef) {
1781 // For some pseudo instructions, we might not generate code in the end
1782 // (e.g. KILL) and we would end up without a correct def for the rename
1783 // register.
1784 // TODO: This might be overly conservative and we could handle those cases
1785 // in multiple ways:
1786 // 1. Insert an extra copy, to materialize the def.
1787 // 2. Skip pseudo-defs until we find an non-pseudo def.
1788 if (MI.isPseudo()) {
1789 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1790 return false;
1791 }
1792
1793 for (auto &MOP : MI.operands()) {
1794 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1795 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1796 continue;
1797 if (!canRenameMOP(MOP, TRI)) {
1798 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1799 return false;
1800 }
1801 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1802 }
1803 return true;
1804 } else {
1805 for (auto &MOP : MI.operands()) {
1806 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1807 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1808 continue;
1809
1810 if (!canRenameMOP(MOP, TRI)) {
1811 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1812 return false;
1813 }
1814 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1815 }
1816 }
1817 return true;
1818 };
1819
1820 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1821 return false;
1822
1823 if (!FoundDef) {
1824 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1825 return false;
1826 }
1827 return true;
1828}
1829
1830// We want to merge the second load into the first by rewriting the usages of
1831// the same reg between first (incl.) and second (excl.). We don't need to care
1832// about any insns before FirstLoad or after SecondLoad.
1833// 1. The second load writes new value into the same reg.
1834// - The renaming is impossible to impact later use of the reg.
1835// - The second load always trash the value written by the first load which
1836// means the reg must be killed before the second load.
1837// 2. The first load must be a def for the same reg so we don't need to look
1838// into anything before it.
1840 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1841 LiveRegUnits &UsedInBetween,
1843 const TargetRegisterInfo *TRI) {
1844 if (FirstLoad.isPseudo())
1845 return false;
1846
1847 UsedInBetween.accumulate(FirstLoad);
1848 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1849 bool Success = std::all_of(
1850 FirstLoad.getIterator(), SecondLoad.getIterator(),
1851 [&](MachineInstr &MI) {
1852 LLVM_DEBUG(dbgs() << "Checking " << MI);
1853 // Currently we do not try to rename across frame-setup instructions.
1854 if (MI.getFlag(MachineInstr::FrameSetup)) {
1855 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1856 << "currently\n");
1857 return false;
1858 }
1859
1860 for (auto &MOP : MI.operands()) {
1861 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1862 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1863 continue;
1864 if (!canRenameMOP(MOP, TRI)) {
1865 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1866 return false;
1867 }
1868 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1869 }
1870
1871 return true;
1872 });
1873 return Success;
1874}
1875
1876// Check if we can find a physical register for renaming \p Reg. This register
1877// must:
1878// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1879// defined registers up to the point where the renamed register will be used,
1880// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1881// registers in the range the rename register will be used,
1882// * is available in all used register classes (checked using RequiredClasses).
1883static std::optional<MCPhysReg> tryToFindRegisterToRename(
1884 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1885 LiveRegUnits &UsedInBetween,
1887 const TargetRegisterInfo *TRI) {
1889
1890 // Checks if any sub- or super-register of PR is callee saved.
1891 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1892 return any_of(TRI->sub_and_superregs_inclusive(PR),
1893 [&MF, TRI](MCPhysReg SubOrSuper) {
1894 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1895 });
1896 };
1897
1898 // Check if PR or one of its sub- or super-registers can be used for all
1899 // required register classes.
1900 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1901 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1902 return any_of(
1903 TRI->sub_and_superregs_inclusive(PR),
1904 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1905 });
1906 };
1907
1908 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1909 for (const MCPhysReg &PR : *RegClass) {
1910 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1911 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1912 CanBeUsedForAllClasses(PR)) {
1913 DefinedInBB.addReg(PR);
1914 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1915 << "\n");
1916 return {PR};
1917 }
1918 }
1919 LLVM_DEBUG(dbgs() << "No rename register found from "
1920 << TRI->getRegClassName(RegClass) << "\n");
1921 return std::nullopt;
1922}
1923
1924// For store pairs: returns a register from FirstMI to the beginning of the
1925// block that can be renamed.
1926// For load pairs: returns a register from FirstMI to MI that can be renamed.
1927static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1928 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1929 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1931 const TargetRegisterInfo *TRI) {
1932 std::optional<MCPhysReg> RenameReg;
1933 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1934 return RenameReg;
1935
1936 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1937 MachineFunction &MF = *FirstMI.getParent()->getParent();
1938 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1939 return RenameReg;
1940
1941 const bool IsLoad = FirstMI.mayLoad();
1942
1943 if (!MaybeCanRename) {
1944 if (IsLoad)
1945 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1946 RequiredClasses, TRI)};
1947 else
1948 MaybeCanRename = {
1949 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
1950 }
1951
1952 if (*MaybeCanRename) {
1953 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
1954 RequiredClasses, TRI);
1955 }
1956 return RenameReg;
1957}
1958
1959/// Scan the instructions looking for a load/store that can be combined with the
1960/// current instruction into a wider equivalent or a load/store pair.
1962AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1963 LdStPairFlags &Flags, unsigned Limit,
1964 bool FindNarrowMerge) {
1965 MachineBasicBlock::iterator E = I->getParent()->end();
1967 MachineBasicBlock::iterator MBBIWithRenameReg;
1968 MachineInstr &FirstMI = *I;
1969 MBBI = next_nodbg(MBBI, E);
1970
1971 bool MayLoad = FirstMI.mayLoad();
1972 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1973 Register Reg = getLdStRegOp(FirstMI).getReg();
1976 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1977 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1978
1979 std::optional<bool> MaybeCanRename;
1980 if (!EnableRenaming)
1981 MaybeCanRename = {false};
1982
1983 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
1984 LiveRegUnits UsedInBetween;
1985 UsedInBetween.init(*TRI);
1986
1987 Flags.clearRenameReg();
1988
1989 // Track which register units have been modified and used between the first
1990 // insn (inclusive) and the second insn.
1991 ModifiedRegUnits.clear();
1992 UsedRegUnits.clear();
1993
1994 // Remember any instructions that read/write memory between FirstMI and MI.
1995 SmallVector<MachineInstr *, 4> MemInsns;
1996
1997 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
1998 for (unsigned Count = 0; MBBI != E && Count < Limit;
1999 MBBI = next_nodbg(MBBI, E)) {
2000 MachineInstr &MI = *MBBI;
2001 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2002
2003 UsedInBetween.accumulate(MI);
2004
2005 // Don't count transient instructions towards the search limit since there
2006 // may be different numbers of them if e.g. debug information is present.
2007 if (!MI.isTransient())
2008 ++Count;
2009
2010 Flags.setSExtIdx(-1);
2011 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2013 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2014 // If we've found another instruction with the same opcode, check to see
2015 // if the base and offset are compatible with our starting instruction.
2016 // These instructions all have scaled immediate operands, so we just
2017 // check for +1/-1. Make sure to check the new instruction offset is
2018 // actually an immediate and not a symbolic reference destined for
2019 // a relocation.
2022 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2023 if (IsUnscaled != MIIsUnscaled) {
2024 // We're trying to pair instructions that differ in how they are scaled.
2025 // If FirstMI is scaled then scale the offset of MI accordingly.
2026 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2027 int MemSize = TII->getMemScale(MI);
2028 if (MIIsUnscaled) {
2029 // If the unscaled offset isn't a multiple of the MemSize, we can't
2030 // pair the operations together: bail and keep looking.
2031 if (MIOffset % MemSize) {
2032 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2033 UsedRegUnits, TRI);
2034 MemInsns.push_back(&MI);
2035 continue;
2036 }
2037 MIOffset /= MemSize;
2038 } else {
2039 MIOffset *= MemSize;
2040 }
2041 }
2042
2043 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2044
2045 if (BaseReg == MIBaseReg) {
2046 // If the offset of the second ld/st is not equal to the size of the
2047 // destination register it can’t be paired with a pre-index ld/st
2048 // pair. Additionally if the base reg is used or modified the operations
2049 // can't be paired: bail and keep looking.
2050 if (IsPreLdSt) {
2051 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2052 bool IsBaseRegUsed = !UsedRegUnits.available(
2054 bool IsBaseRegModified = !ModifiedRegUnits.available(
2056 // If the stored value and the address of the second instruction is
2057 // the same, it needs to be using the updated register and therefore
2058 // it must not be folded.
2059 bool IsMIRegTheSame =
2060 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2062 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2063 IsMIRegTheSame) {
2064 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2065 UsedRegUnits, TRI);
2066 MemInsns.push_back(&MI);
2067 continue;
2068 }
2069 } else {
2070 if ((Offset != MIOffset + OffsetStride) &&
2071 (Offset + OffsetStride != MIOffset)) {
2072 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2073 UsedRegUnits, TRI);
2074 MemInsns.push_back(&MI);
2075 continue;
2076 }
2077 }
2078
2079 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2080 if (FindNarrowMerge) {
2081 // If the alignment requirements of the scaled wide load/store
2082 // instruction can't express the offset of the scaled narrow input,
2083 // bail and keep looking. For promotable zero stores, allow only when
2084 // the stored value is the same (i.e., WZR).
2085 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2086 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2087 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2088 UsedRegUnits, TRI);
2089 MemInsns.push_back(&MI);
2090 continue;
2091 }
2092 } else {
2093 // Pairwise instructions have a 7-bit signed offset field. Single
2094 // insns have a 12-bit unsigned offset field. If the resultant
2095 // immediate offset of merging these instructions is out of range for
2096 // a pairwise instruction, bail and keep looking.
2097 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2098 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2099 UsedRegUnits, TRI);
2100 MemInsns.push_back(&MI);
2101 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2102 << "keep looking.\n");
2103 continue;
2104 }
2105 // If the alignment requirements of the paired (scaled) instruction
2106 // can't express the offset of the unscaled input, bail and keep
2107 // looking.
2108 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2109 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2110 UsedRegUnits, TRI);
2111 MemInsns.push_back(&MI);
2113 << "Offset doesn't fit due to alignment requirements, "
2114 << "keep looking.\n");
2115 continue;
2116 }
2117 }
2118
2119 // If the BaseReg has been modified, then we cannot do the optimization.
2120 // For example, in the following pattern
2121 // ldr x1 [x2]
2122 // ldr x2 [x3]
2123 // ldr x4 [x2, #8],
2124 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2125 if (!ModifiedRegUnits.available(BaseReg))
2126 return E;
2127
2128 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2130
2131 // If the Rt of the second instruction (destination register of the
2132 // load) was not modified or used between the two instructions and none
2133 // of the instructions between the second and first alias with the
2134 // second, we can combine the second into the first.
2135 bool RtNotModified =
2136 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2137 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2138 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2139
2140 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2141 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2142 << (RtNotModified ? "true" : "false") << "\n"
2143 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2144 << (RtNotUsed ? "true" : "false") << "\n");
2145
2146 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2147 // For pairs loading into the same reg, try to find a renaming
2148 // opportunity to allow the renaming of Reg between FirstMI and MI
2149 // and combine MI into FirstMI; otherwise bail and keep looking.
2150 if (SameLoadReg) {
2151 std::optional<MCPhysReg> RenameReg =
2152 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2153 Reg, DefinedInBB, UsedInBetween,
2154 RequiredClasses, TRI);
2155 if (!RenameReg) {
2156 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2157 UsedRegUnits, TRI);
2158 MemInsns.push_back(&MI);
2159 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2160 << "keep looking.\n");
2161 continue;
2162 }
2163 Flags.setRenameReg(*RenameReg);
2164 }
2165
2166 Flags.setMergeForward(false);
2167 if (!SameLoadReg)
2168 Flags.clearRenameReg();
2169 return MBBI;
2170 }
2171
2172 // Likewise, if the Rt of the first instruction is not modified or used
2173 // between the two instructions and none of the instructions between the
2174 // first and the second alias with the first, we can combine the first
2175 // into the second.
2176 RtNotModified = !(
2177 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2178
2179 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2180 << "Reg '" << getLdStRegOp(FirstMI)
2181 << "' not modified: "
2182 << (RtNotModified ? "true" : "false") << "\n");
2183
2184 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2185 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2186 Flags.setMergeForward(true);
2187 Flags.clearRenameReg();
2188 return MBBI;
2189 }
2190
2191 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2192 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2193 RequiredClasses, TRI);
2194 if (RenameReg) {
2195 Flags.setMergeForward(true);
2196 Flags.setRenameReg(*RenameReg);
2197 MBBIWithRenameReg = MBBI;
2198 }
2199 }
2200 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2201 << "interference in between, keep looking.\n");
2202 }
2203 }
2204
2205 if (Flags.getRenameReg())
2206 return MBBIWithRenameReg;
2207
2208 // If the instruction wasn't a matching load or store. Stop searching if we
2209 // encounter a call instruction that might modify memory.
2210 if (MI.isCall()) {
2211 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2212 return E;
2213 }
2214
2215 // Update modified / uses register units.
2216 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2217
2218 // Otherwise, if the base register is modified, we have no match, so
2219 // return early.
2220 if (!ModifiedRegUnits.available(BaseReg)) {
2221 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2222 return E;
2223 }
2224
2225 // Update list of instructions that read/write memory.
2226 if (MI.mayLoadOrStore())
2227 MemInsns.push_back(&MI);
2228 }
2229 return E;
2230}
2231
2234 assert((MI.getOpcode() == AArch64::SUBXri ||
2235 MI.getOpcode() == AArch64::ADDXri) &&
2236 "Expected a register update instruction");
2237 auto End = MI.getParent()->end();
2238 if (MaybeCFI == End ||
2239 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2240 !(MI.getFlag(MachineInstr::FrameSetup) ||
2241 MI.getFlag(MachineInstr::FrameDestroy)) ||
2242 MI.getOperand(0).getReg() != AArch64::SP)
2243 return End;
2244
2245 const MachineFunction &MF = *MI.getParent()->getParent();
2246 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2247 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2248 switch (CFI.getOperation()) {
2251 return MaybeCFI;
2252 default:
2253 return End;
2254 }
2255}
2256
2257std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2259 bool IsForward, bool IsPreIdx, bool MergeEither) {
2260 assert((Update->getOpcode() == AArch64::ADDXri ||
2261 Update->getOpcode() == AArch64::SUBXri) &&
2262 "Unexpected base register update instruction to merge!");
2263 MachineBasicBlock::iterator E = I->getParent()->end();
2265
2266 // If updating the SP and the following instruction is CFA offset related CFI,
2267 // make sure the CFI follows the SP update either by merging at the location
2268 // of the update or by moving the CFI after the merged instruction. If unable
2269 // to do so, bail.
2270 MachineBasicBlock::iterator InsertPt = I;
2271 if (IsForward) {
2272 assert(IsPreIdx);
2273 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2274 if (MergeEither) {
2275 InsertPt = Update;
2276 } else {
2277 // Take care not to reorder CFIs.
2278 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2279 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2280 }))
2281 return std::nullopt;
2282
2283 MachineBasicBlock *MBB = InsertPt->getParent();
2284 MBB->splice(std::next(InsertPt), MBB, CFI);
2285 }
2286 }
2287 }
2288
2289 // Return the instruction following the merged instruction, which is
2290 // the instruction following our unmerged load. Unless that's the add/sub
2291 // instruction we're merging, in which case it's the one after that.
2292 if (NextI == Update)
2293 NextI = next_nodbg(NextI, E);
2294
2295 int Value = Update->getOperand(2).getImm();
2296 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2297 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2298 if (Update->getOpcode() == AArch64::SUBXri)
2299 Value = -Value;
2300
2301 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2303 MachineInstrBuilder MIB;
2304 int Scale, MinOffset, MaxOffset;
2305 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2307 // Non-paired instruction.
2308 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2309 TII->get(NewOpc))
2310 .add(Update->getOperand(0))
2311 .add(getLdStRegOp(*I))
2313 .addImm(Value / Scale)
2314 .setMemRefs(I->memoperands())
2315 .setMIFlags(I->mergeFlagsWith(*Update));
2316 } else {
2317 // Paired instruction.
2318 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2319 TII->get(NewOpc))
2320 .add(Update->getOperand(0))
2321 .add(getLdStRegOp(*I, 0))
2322 .add(getLdStRegOp(*I, 1))
2324 .addImm(Value / Scale)
2325 .setMemRefs(I->memoperands())
2326 .setMIFlags(I->mergeFlagsWith(*Update));
2327 }
2328
2329 if (IsPreIdx) {
2330 ++NumPreFolded;
2331 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2332 } else {
2333 ++NumPostFolded;
2334 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2335 }
2336 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2337 LLVM_DEBUG(I->print(dbgs()));
2338 LLVM_DEBUG(dbgs() << " ");
2339 LLVM_DEBUG(Update->print(dbgs()));
2340 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2341 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2342 LLVM_DEBUG(dbgs() << "\n");
2343
2344 // Erase the old instructions for the block.
2345 I->eraseFromParent();
2346 Update->eraseFromParent();
2347
2348 return NextI;
2349}
2350
2352AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2354 unsigned Offset, int Scale) {
2355 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2356 "Unexpected const mov instruction to merge!");
2357 MachineBasicBlock::iterator E = I->getParent()->end();
2359 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2360 MachineInstr &MemMI = *I;
2361 unsigned Mask = (1 << 12) * Scale - 1;
2362 unsigned Low = Offset & Mask;
2363 unsigned High = Offset - Low;
2366 MachineInstrBuilder AddMIB, MemMIB;
2367
2368 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2369 AddMIB =
2370 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2371 .addDef(IndexReg)
2372 .addUse(BaseReg)
2373 .addImm(High >> 12) // shifted value
2374 .addImm(12); // shift 12
2375 (void)AddMIB;
2376 // Ld/St DestReg, IndexReg, Imm12
2377 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2378 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2379 .add(getLdStRegOp(MemMI))
2381 .addImm(Low / Scale)
2382 .setMemRefs(I->memoperands())
2383 .setMIFlags(I->mergeFlagsWith(*Update));
2384 (void)MemMIB;
2385
2386 ++NumConstOffsetFolded;
2387 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2388 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2389 LLVM_DEBUG(PrevI->print(dbgs()));
2390 LLVM_DEBUG(dbgs() << " ");
2391 LLVM_DEBUG(Update->print(dbgs()));
2392 LLVM_DEBUG(dbgs() << " ");
2393 LLVM_DEBUG(I->print(dbgs()));
2394 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2395 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2396 LLVM_DEBUG(dbgs() << " ");
2397 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2398 LLVM_DEBUG(dbgs() << "\n");
2399
2400 // Erase the old instructions for the block.
2401 I->eraseFromParent();
2402 PrevI->eraseFromParent();
2403 Update->eraseFromParent();
2404
2405 return NextI;
2406}
2407
2408bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2409 MachineInstr &MI,
2410 unsigned BaseReg, int Offset) {
2411 switch (MI.getOpcode()) {
2412 default:
2413 break;
2414 case AArch64::SUBXri:
2415 case AArch64::ADDXri:
2416 // Make sure it's a vanilla immediate operand, not a relocation or
2417 // anything else we can't handle.
2418 if (!MI.getOperand(2).isImm())
2419 break;
2420 // Watch out for 1 << 12 shifted value.
2421 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2422 break;
2423
2424 // The update instruction source and destination register must be the
2425 // same as the load/store base register.
2426 if (MI.getOperand(0).getReg() != BaseReg ||
2427 MI.getOperand(1).getReg() != BaseReg)
2428 break;
2429
2430 int UpdateOffset = MI.getOperand(2).getImm();
2431 if (MI.getOpcode() == AArch64::SUBXri)
2432 UpdateOffset = -UpdateOffset;
2433
2434 // The immediate must be a multiple of the scaling factor of the pre/post
2435 // indexed instruction.
2436 int Scale, MinOffset, MaxOffset;
2437 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2438 if (UpdateOffset % Scale != 0)
2439 break;
2440
2441 // Scaled offset must fit in the instruction immediate.
2442 int ScaledOffset = UpdateOffset / Scale;
2443 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2444 break;
2445
2446 // If we have a non-zero Offset, we check that it matches the amount
2447 // we're adding to the register.
2448 if (!Offset || Offset == UpdateOffset)
2449 return true;
2450 break;
2451 }
2452 return false;
2453}
2454
2455bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2456 MachineInstr &MI,
2457 unsigned IndexReg,
2458 unsigned &Offset) {
2459 // The update instruction source and destination register must be the
2460 // same as the load/store index register.
2461 if (MI.getOpcode() == AArch64::MOVKWi &&
2462 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2463
2464 // movz + movk hold a large offset of a Ld/St instruction.
2465 MachineBasicBlock::iterator B = MI.getParent()->begin();
2467 // Skip the scene when the MI is the first instruction of a block.
2468 if (MBBI == B)
2469 return false;
2470 MBBI = prev_nodbg(MBBI, B);
2471 MachineInstr &MovzMI = *MBBI;
2472 // Make sure the MOVKWi and MOVZWi set the same register.
2473 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2474 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2475 unsigned Low = MovzMI.getOperand(1).getImm();
2476 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2477 Offset = High + Low;
2478 // 12-bit optionally shifted immediates are legal for adds.
2479 return Offset >> 24 == 0;
2480 }
2481 }
2482 return false;
2483}
2484
2485MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2486 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2487 MachineBasicBlock::iterator E = I->getParent()->end();
2488 MachineInstr &MemMI = *I;
2490
2492 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2493 TII->getMemScale(MemMI);
2494
2495 // Scan forward looking for post-index opportunities. Updating instructions
2496 // can't be formed if the memory instruction doesn't have the offset we're
2497 // looking for.
2498 if (MIUnscaledOffset != UnscaledOffset)
2499 return E;
2500
2501 // If the base register overlaps a source/destination register, we can't
2502 // merge the update. This does not apply to tag store instructions which
2503 // ignore the address part of the source register.
2504 // This does not apply to STGPi as well, which does not have unpredictable
2505 // behavior in this case unlike normal stores, and always performs writeback
2506 // after reading the source register value.
2507 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2508 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2509 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2510 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2511 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2512 return E;
2513 }
2514 }
2515
2516 // Track which register units have been modified and used between the first
2517 // insn (inclusive) and the second insn.
2518 ModifiedRegUnits.clear();
2519 UsedRegUnits.clear();
2520 MBBI = next_nodbg(MBBI, E);
2521
2522 // We can't post-increment the stack pointer if any instruction between
2523 // the memory access (I) and the increment (MBBI) can access the memory
2524 // region defined by [SP, MBBI].
2525 const bool BaseRegSP = BaseReg == AArch64::SP;
2526 if (BaseRegSP && needsWinCFI(I->getMF())) {
2527 // FIXME: For now, we always block the optimization over SP in windows
2528 // targets as it requires to adjust the unwind/debug info, messing up
2529 // the unwind info can actually cause a miscompile.
2530 return E;
2531 }
2532
2533 unsigned Count = 0;
2534 MachineBasicBlock *CurMBB = I->getParent();
2535 // choice of next block to visit is liveins-based
2536 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2537
2538 while (true) {
2539 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2540 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2541 MachineInstr &MI = *MBBI;
2542
2543 // Don't count transient instructions towards the search limit since there
2544 // may be different numbers of them if e.g. debug information is present.
2545 if (!MI.isTransient())
2546 ++Count;
2547
2548 // If we found a match, return it.
2549 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2550 return MBBI;
2551
2552 // Update the status of what the instruction clobbered and used.
2553 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2554 TRI);
2555
2556 // Otherwise, if the base register is used or modified, we have no match,
2557 // so return early. If we are optimizing SP, do not allow instructions
2558 // that may load or store in between the load and the optimized value
2559 // update.
2560 if (!ModifiedRegUnits.available(BaseReg) ||
2561 !UsedRegUnits.available(BaseReg) ||
2562 (BaseRegSP && MBBI->mayLoadOrStore()))
2563 return E;
2564 }
2565
2566 if (!VisitSucc || Limit <= Count)
2567 break;
2568
2569 // Try to go downward to successors along a CF path w/o side enters
2570 // such that BaseReg is alive along it but not at its exits
2571 MachineBasicBlock *SuccToVisit = nullptr;
2572 unsigned LiveSuccCount = 0;
2573 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2574 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2575 if (Succ->isLiveIn(*AI)) {
2576 if (LiveSuccCount++)
2577 return E;
2578 if (Succ->pred_size() == 1)
2579 SuccToVisit = Succ;
2580 break;
2581 }
2582 }
2583 }
2584 if (!SuccToVisit)
2585 break;
2586 CurMBB = SuccToVisit;
2587 MBBI = CurMBB->begin();
2588 }
2589
2590 return E;
2591}
2592
2593MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2594 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2595 MachineBasicBlock::iterator B = I->getParent()->begin();
2596 MachineBasicBlock::iterator E = I->getParent()->end();
2597 MachineInstr &MemMI = *I;
2599 MachineFunction &MF = *MemMI.getMF();
2600
2603
2604 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2605 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2606 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2607 : AArch64::NoRegister};
2608
2609 // If the load/store is the first instruction in the block, there's obviously
2610 // not any matching update. Ditto if the memory offset isn't zero.
2611 if (MBBI == B || Offset != 0)
2612 return E;
2613 // If the base register overlaps a destination register, we can't
2614 // merge the update.
2615 if (!isTagStore(MemMI)) {
2616 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2617 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2618 return E;
2619 }
2620
2621 const bool BaseRegSP = BaseReg == AArch64::SP;
2622 if (BaseRegSP && needsWinCFI(I->getMF())) {
2623 // FIXME: For now, we always block the optimization over SP in windows
2624 // targets as it requires to adjust the unwind/debug info, messing up
2625 // the unwind info can actually cause a miscompile.
2626 return E;
2627 }
2628
2629 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2630 unsigned RedZoneSize =
2631 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2632
2633 // Track which register units have been modified and used between the first
2634 // insn (inclusive) and the second insn.
2635 ModifiedRegUnits.clear();
2636 UsedRegUnits.clear();
2637 unsigned Count = 0;
2638 bool MemAccessBeforeSPPreInc = false;
2639 MergeEither = true;
2640 do {
2641 MBBI = prev_nodbg(MBBI, B);
2642 MachineInstr &MI = *MBBI;
2643
2644 // Don't count transient instructions towards the search limit since there
2645 // may be different numbers of them if e.g. debug information is present.
2646 if (!MI.isTransient())
2647 ++Count;
2648
2649 // If we found a match, return it.
2650 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2651 // Check that the update value is within our red zone limit (which may be
2652 // zero).
2653 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2654 return E;
2655 return MBBI;
2656 }
2657
2658 // Update the status of what the instruction clobbered and used.
2659 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2660
2661 // Otherwise, if the base register is used or modified, we have no match, so
2662 // return early.
2663 if (!ModifiedRegUnits.available(BaseReg) ||
2664 !UsedRegUnits.available(BaseReg))
2665 return E;
2666
2667 // If we have a destination register (i.e. a load instruction) and a
2668 // destination register is used or modified, then we can only merge forward,
2669 // i.e. the combined instruction is put in the place of the memory
2670 // instruction. Same applies if we see a memory access or side effects.
2671 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2672 (DestReg[0] != AArch64::NoRegister &&
2673 !(ModifiedRegUnits.available(DestReg[0]) &&
2674 UsedRegUnits.available(DestReg[0]))) ||
2675 (DestReg[1] != AArch64::NoRegister &&
2676 !(ModifiedRegUnits.available(DestReg[1]) &&
2677 UsedRegUnits.available(DestReg[1]))))
2678 MergeEither = false;
2679
2680 // Keep track if we have a memory access before an SP pre-increment, in this
2681 // case we need to validate later that the update amount respects the red
2682 // zone.
2683 if (BaseRegSP && MBBI->mayLoadOrStore())
2684 MemAccessBeforeSPPreInc = true;
2685 } while (MBBI != B && Count < Limit);
2686 return E;
2687}
2688
2690AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2691 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2692 MachineBasicBlock::iterator B = I->getParent()->begin();
2693 MachineBasicBlock::iterator E = I->getParent()->end();
2694 MachineInstr &MemMI = *I;
2696
2697 // If the load is the first instruction in the block, there's obviously
2698 // not any matching load or store.
2699 if (MBBI == B)
2700 return E;
2701
2702 // Make sure the IndexReg is killed and the shift amount is zero.
2703 // TODO: Relex this restriction to extend, simplify processing now.
2704 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2705 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2707 return E;
2708
2710
2711 // Track which register units have been modified and used between the first
2712 // insn (inclusive) and the second insn.
2713 ModifiedRegUnits.clear();
2714 UsedRegUnits.clear();
2715 unsigned Count = 0;
2716 do {
2717 MBBI = prev_nodbg(MBBI, B);
2718 MachineInstr &MI = *MBBI;
2719
2720 // Don't count transient instructions towards the search limit since there
2721 // may be different numbers of them if e.g. debug information is present.
2722 if (!MI.isTransient())
2723 ++Count;
2724
2725 // If we found a match, return it.
2726 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2727 return MBBI;
2728 }
2729
2730 // Update the status of what the instruction clobbered and used.
2731 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2732
2733 // Otherwise, if the index register is used or modified, we have no match,
2734 // so return early.
2735 if (!ModifiedRegUnits.available(IndexReg) ||
2736 !UsedRegUnits.available(IndexReg))
2737 return E;
2738
2739 } while (MBBI != B && Count < Limit);
2740 return E;
2741}
2742
2743bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2745 MachineInstr &MI = *MBBI;
2746 // If this is a volatile load, don't mess with it.
2747 if (MI.hasOrderedMemoryRef())
2748 return false;
2749
2750 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2751 return false;
2752
2753 // Make sure this is a reg+imm.
2754 // FIXME: It is possible to extend it to handle reg+reg cases.
2756 return false;
2757
2758 // Look backward up to LdStLimit instructions.
2760 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2761 ++NumLoadsFromStoresPromoted;
2762 // Promote the load. Keeping the iterator straight is a
2763 // pain, so we let the merge routine tell us what the next instruction
2764 // is after it's done mucking about.
2765 MBBI = promoteLoadFromStore(MBBI, StoreI);
2766 return true;
2767 }
2768 return false;
2769}
2770
2771// Merge adjacent zero stores into a wider store.
2772bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2774 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2775 MachineInstr &MI = *MBBI;
2776 MachineBasicBlock::iterator E = MI.getParent()->end();
2777
2778 if (!TII->isCandidateToMergeOrPair(MI))
2779 return false;
2780
2781 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2782 LdStPairFlags Flags;
2784 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2785 if (MergeMI != E) {
2786 ++NumZeroStoresPromoted;
2787
2788 // Keeping the iterator straight is a pain, so we let the merge routine tell
2789 // us what the next instruction is after it's done mucking about.
2790 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2791 return true;
2792 }
2793 return false;
2794}
2795
2796// Find loads and stores that can be merged into a single load or store pair
2797// instruction.
2798bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2799 MachineInstr &MI = *MBBI;
2800 MachineBasicBlock::iterator E = MI.getParent()->end();
2801
2802 if (!TII->isCandidateToMergeOrPair(MI))
2803 return false;
2804
2805 // If disable-ldp feature is opted, do not emit ldp.
2806 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2807 return false;
2808
2809 // If disable-stp feature is opted, do not emit stp.
2810 if (MI.mayStore() && Subtarget->hasDisableStp())
2811 return false;
2812
2813 // Early exit if the offset is not possible to match. (6 bits of positive
2814 // range, plus allow an extra one in case we find a later insn that matches
2815 // with Offset-1)
2816 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2818 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2819 // Allow one more for offset.
2820 if (Offset > 0)
2821 Offset -= OffsetStride;
2822 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2823 return false;
2824
2825 // Look ahead up to LdStLimit instructions for a pairable instruction.
2826 LdStPairFlags Flags;
2828 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2829 if (Paired != E) {
2830 // Keeping the iterator straight is a pain, so we let the merge routine tell
2831 // us what the next instruction is after it's done mucking about.
2832 auto Prev = std::prev(MBBI);
2833
2834 // Fetch the memoperand of the load/store that is a candidate for
2835 // combination.
2836 MachineMemOperand *MemOp =
2837 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2838
2839 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2840 // that the alignment of the source pointer is at least double the alignment
2841 // of the type.
2842 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2843 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2844 // If there is no size/align information, cancel the transformation.
2845 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2846 NumFailedAlignmentCheck++;
2847 return false;
2848 }
2849
2850 // Get the needed alignments to check them if
2851 // ldp-aligned-only/stp-aligned-only features are opted.
2852 uint64_t MemAlignment = MemOp->getAlign().value();
2853 uint64_t TypeAlignment =
2854 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2855
2856 if (MemAlignment < 2 * TypeAlignment) {
2857 NumFailedAlignmentCheck++;
2858 return false;
2859 }
2860 }
2861
2862 ++NumPairCreated;
2863 if (TII->hasUnscaledLdStOffset(MI))
2864 ++NumUnscaledPairCreated;
2865
2866 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2867 // Collect liveness info for instructions between Prev and the new position
2868 // MBBI.
2869 for (auto I = std::next(Prev); I != MBBI; I++)
2870 updateDefinedRegisters(*I, DefinedInBB, TRI);
2871
2872 return true;
2873 }
2874 return false;
2875}
2876
2877bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2879 MachineInstr &MI = *MBBI;
2880 MachineBasicBlock::iterator E = MI.getParent()->end();
2882
2883 // Look forward to try to form a post-index instruction. For example,
2884 // ldr x0, [x20]
2885 // add x20, x20, #32
2886 // merged into:
2887 // ldr x0, [x20], #32
2888 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2889 if (Update != E) {
2890 // Merge the update into the ld/st.
2891 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2892 /*IsPreIdx=*/false,
2893 /*MergeEither=*/false)) {
2894 MBBI = *NextI;
2895 return true;
2896 }
2897 }
2898
2899 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2900 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2901 return false;
2902
2903 // Look back to try to find a pre-index instruction. For example,
2904 // add x0, x0, #8
2905 // ldr x1, [x0]
2906 // merged into:
2907 // ldr x1, [x0, #8]!
2908 bool MergeEither;
2909 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2910 if (Update != E) {
2911 // Merge the update into the ld/st.
2912 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2913 /*IsPreIdx=*/true, MergeEither)) {
2914 MBBI = *NextI;
2915 return true;
2916 }
2917 }
2918
2919 // The immediate in the load/store is scaled by the size of the memory
2920 // operation. The immediate in the add we're looking for,
2921 // however, is not, so adjust here.
2922 int UnscaledOffset =
2924
2925 // Look forward to try to find a pre-index instruction. For example,
2926 // ldr x1, [x0, #64]
2927 // add x0, x0, #64
2928 // merged into:
2929 // ldr x1, [x0, #64]!
2930 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2931 if (Update != E) {
2932 // Merge the update into the ld/st.
2933 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2934 /*IsPreIdx=*/true,
2935 /*MergeEither=*/false)) {
2936 MBBI = *NextI;
2937 return true;
2938 }
2939 }
2940
2941 return false;
2942}
2943
2944bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
2945 int Scale) {
2946 MachineInstr &MI = *MBBI;
2947 MachineBasicBlock::iterator E = MI.getParent()->end();
2949
2950 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2951 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2952 return false;
2953
2954 // Look back to try to find a const offset for index LdSt instruction. For
2955 // example,
2956 // mov x8, #LargeImm ; = a * (1<<12) + imm12
2957 // ldr x1, [x0, x8]
2958 // merged into:
2959 // add x8, x0, a * (1<<12)
2960 // ldr x1, [x8, imm12]
2961 unsigned Offset;
2962 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
2963 if (Update != E && (Offset & (Scale - 1)) == 0) {
2964 // Merge the imm12 into the ld/st.
2965 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
2966 return true;
2967 }
2968
2969 return false;
2970}
2971
2972bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
2973 bool EnableNarrowZeroStOpt) {
2974 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
2975
2976 bool Modified = false;
2977 // Four transformations to do here:
2978 // 1) Find loads that directly read from stores and promote them by
2979 // replacing with mov instructions. If the store is wider than the load,
2980 // the load will be replaced with a bitfield extract.
2981 // e.g.,
2982 // str w1, [x0, #4]
2983 // ldrh w2, [x0, #6]
2984 // ; becomes
2985 // str w1, [x0, #4]
2986 // lsr w2, w1, #16
2988 MBBI != E;) {
2989 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2990 Modified = true;
2991 else
2992 ++MBBI;
2993 }
2994 // 2) Merge adjacent zero stores into a wider store.
2995 // e.g.,
2996 // strh wzr, [x0]
2997 // strh wzr, [x0, #2]
2998 // ; becomes
2999 // str wzr, [x0]
3000 // e.g.,
3001 // str wzr, [x0]
3002 // str wzr, [x0, #4]
3003 // ; becomes
3004 // str xzr, [x0]
3005 if (EnableNarrowZeroStOpt)
3007 MBBI != E;) {
3008 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3009 Modified = true;
3010 else
3011 ++MBBI;
3012 }
3013 // 3) Find loads and stores that can be merged into a single load or store
3014 // pair instruction.
3015 // When compiling for SVE 128, also try to combine SVE fill/spill
3016 // instructions into LDP/STP.
3017 // e.g.,
3018 // ldr x0, [x2]
3019 // ldr x1, [x2, #8]
3020 // ; becomes
3021 // ldp x0, x1, [x2]
3022 // e.g.,
3023 // ldr z0, [x2]
3024 // ldr z1, [x2, #1, mul vl]
3025 // ; becomes
3026 // ldp q0, q1, [x2]
3027
3029 DefinedInBB.clear();
3030 DefinedInBB.addLiveIns(MBB);
3031 }
3032
3034 MBBI != E;) {
3035 // Track currently live registers up to this point, to help with
3036 // searching for a rename register on demand.
3037 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3038 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3039 Modified = true;
3040 else
3041 ++MBBI;
3042 }
3043 // 4) Find base register updates that can be merged into the load or store
3044 // as a base-reg writeback.
3045 // e.g.,
3046 // ldr x0, [x2]
3047 // add x2, x2, #4
3048 // ; becomes
3049 // ldr x0, [x2], #4
3051 MBBI != E;) {
3052 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3053 Modified = true;
3054 else
3055 ++MBBI;
3056 }
3057
3058 // 5) Find a register assigned with a const value that can be combined with
3059 // into the load or store. e.g.,
3060 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3061 // ldr x1, [x0, x8]
3062 // ; becomes
3063 // add x8, x0, a * (1<<12)
3064 // ldr x1, [x8, imm12]
3066 MBBI != E;) {
3067 int Scale;
3068 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3069 Modified = true;
3070 else
3071 ++MBBI;
3072 }
3073
3074 return Modified;
3075}
3076
3077bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3078 if (skipFunction(Fn.getFunction()))
3079 return false;
3080
3081 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3082 TII = Subtarget->getInstrInfo();
3083 TRI = Subtarget->getRegisterInfo();
3084 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3085
3086 // Resize the modified and used register unit trackers. We do this once
3087 // per function and then clear the register units each time we optimize a load
3088 // or store.
3089 ModifiedRegUnits.init(*TRI);
3090 UsedRegUnits.init(*TRI);
3091 DefinedInBB.init(*TRI);
3092
3093 bool Modified = false;
3094 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3095 for (auto &MBB : Fn) {
3096 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3097 Modified |= M;
3098 }
3099
3100 return Modified;
3101}
3102
3103// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3104// stores near one another? Note: The pre-RA instruction scheduler already has
3105// hooks to try and schedule pairable loads/stores together to improve pairing
3106// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3107
3108// FIXME: When pairing store instructions it's very possible for this pass to
3109// hoist a store with a KILL marker above another use (without a KILL marker).
3110// The resulting IR is invalid, but nothing uses the KILL markers after this
3111// pass, so it's never caused a problem in practice.
3112
3113/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3114/// load / store optimization pass.
3116 return new AArch64LoadStoreOpt();
3117}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:119
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:681
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
OpType getOperation() const
Definition MCDwarf.h:720
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:19
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:134
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:47
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.