LLVM 22.0.0git
ARMLoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains a pass that performs load / store related peephole
10/// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMISelLowering.h"
19#include "ARMSubtarget.h"
22#include "Utils/ARMBaseInfo.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/SetVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/Type.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
60#include "llvm/Support/Debug.h"
63#include <cassert>
64#include <cstddef>
65#include <cstdlib>
66#include <iterator>
67#include <limits>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "arm-ldst-opt"
73
74STATISTIC(NumLDMGened , "Number of ldm instructions generated");
75STATISTIC(NumSTMGened , "Number of stm instructions generated");
76STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
77STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
78STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
79STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
80STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
81STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
82STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
83STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
84STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
85
86/// This switch disables formation of double/multi instructions that could
87/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
88/// disabled. This can be used to create libraries that are robust even when
89/// users provoke undefined behaviour by supplying misaligned pointers.
90/// \see mayCombineMisaligned()
91static cl::opt<bool>
92AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
93 cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
94
95#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
96
97namespace {
98
99 /// Post- register allocation pass the combine load / store instructions to
100 /// form ldm / stm instructions.
101 struct ARMLoadStoreOpt : public MachineFunctionPass {
102 static char ID;
103
104 const MachineFunction *MF;
105 const TargetInstrInfo *TII;
106 const TargetRegisterInfo *TRI;
107 const ARMSubtarget *STI;
108 const TargetLowering *TL;
109 ARMFunctionInfo *AFI;
110 LiveRegUnits LiveRegs;
111 RegisterClassInfo RegClassInfo;
113 bool LiveRegsValid;
114 bool RegClassInfoValid;
115 bool isThumb1, isThumb2;
116
117 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
118
119 bool runOnMachineFunction(MachineFunction &Fn) override;
120
122 return MachineFunctionProperties().setNoVRegs();
123 }
124
125 StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
126
127 private:
128 /// A set of load/store MachineInstrs with same base register sorted by
129 /// offset.
130 struct MemOpQueueEntry {
132 int Offset; ///< Load/Store offset.
133 unsigned Position; ///< Position as counted from end of basic block.
134
135 MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
136 : MI(&MI), Offset(Offset), Position(Position) {}
137 };
138 using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
139
140 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
141 /// merged into a LDM/STM.
142 struct MergeCandidate {
143 /// List of instructions ordered by load/store offset.
145
146 /// Index in Instrs of the instruction being latest in the schedule.
147 unsigned LatestMIIdx;
148
149 /// Index in Instrs of the instruction being earliest in the schedule.
150 unsigned EarliestMIIdx;
151
152 /// Index into the basic block where the merged instruction will be
153 /// inserted. (See MemOpQueueEntry.Position)
154 unsigned InsertPos;
155
156 /// Whether the instructions can be merged into a ldm/stm instruction.
157 bool CanMergeToLSMulti;
158
159 /// Whether the instructions can be merged into a ldrd/strd instruction.
160 bool CanMergeToLSDouble;
161 };
164 SmallVector<MachineInstr*,4> MergeBaseCandidates;
165
166 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
168 unsigned findFreeReg(const TargetRegisterClass &RegClass);
169 void UpdateBaseRegUses(MachineBasicBlock &MBB,
171 unsigned Base, unsigned WordOffset,
172 ARMCC::CondCodes Pred, unsigned PredReg);
173 MachineInstr *CreateLoadStoreMulti(
175 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
176 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
177 ArrayRef<std::pair<unsigned, bool>> Regs,
179 MachineInstr *CreateLoadStoreDouble(
181 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
182 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
183 ArrayRef<std::pair<unsigned, bool>> Regs,
184 ArrayRef<MachineInstr*> Instrs) const;
185 void FormCandidates(const MemOpQueue &MemOps);
186 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
187 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
189 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
190 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
191 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
192 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
193 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
194 bool CombineMovBx(MachineBasicBlock &MBB);
195 };
196
197} // end anonymous namespace
198
199char ARMLoadStoreOpt::ID = 0;
200
201INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
202 false)
203
204static bool definesCPSR(const MachineInstr &MI) {
205 for (const auto &MO : MI.operands()) {
206 if (!MO.isReg())
207 continue;
208 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
209 // If the instruction has live CPSR def, then it's not safe to fold it
210 // into load / store.
211 return true;
212 }
213
214 return false;
215}
216
218 unsigned Opcode = MI.getOpcode();
219 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
220 unsigned NumOperands = MI.getDesc().getNumOperands();
221 unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
222
223 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
224 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
225 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
226 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
227 return OffField;
228
229 // Thumb1 immediate offsets are scaled by 4
230 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
231 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
232 return OffField * 4;
233
234 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
235 : ARM_AM::getAM5Offset(OffField) * 4;
236 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
237 : ARM_AM::getAM5Op(OffField);
238
239 if (Op == ARM_AM::sub)
240 return -Offset;
241
242 return Offset;
243}
244
246 return MI.getOperand(1);
247}
248
250 return MI.getOperand(0);
251}
252
253static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
254 switch (Opcode) {
255 default: llvm_unreachable("Unhandled opcode!");
256 case ARM::LDRi12:
257 ++NumLDMGened;
258 switch (Mode) {
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::LDMIA;
261 case ARM_AM::da: return ARM::LDMDA;
262 case ARM_AM::db: return ARM::LDMDB;
263 case ARM_AM::ib: return ARM::LDMIB;
264 }
265 case ARM::STRi12:
266 ++NumSTMGened;
267 switch (Mode) {
268 default: llvm_unreachable("Unhandled submode!");
269 case ARM_AM::ia: return ARM::STMIA;
270 case ARM_AM::da: return ARM::STMDA;
271 case ARM_AM::db: return ARM::STMDB;
272 case ARM_AM::ib: return ARM::STMIB;
273 }
274 case ARM::tLDRi:
275 case ARM::tLDRspi:
276 // tLDMIA is writeback-only - unless the base register is in the input
277 // reglist.
278 ++NumLDMGened;
279 switch (Mode) {
280 default: llvm_unreachable("Unhandled submode!");
281 case ARM_AM::ia: return ARM::tLDMIA;
282 }
283 case ARM::tSTRi:
284 case ARM::tSTRspi:
285 // There is no non-writeback tSTMIA either.
286 ++NumSTMGened;
287 switch (Mode) {
288 default: llvm_unreachable("Unhandled submode!");
289 case ARM_AM::ia: return ARM::tSTMIA_UPD;
290 }
291 case ARM::t2LDRi8:
292 case ARM::t2LDRi12:
293 ++NumLDMGened;
294 switch (Mode) {
295 default: llvm_unreachable("Unhandled submode!");
296 case ARM_AM::ia: return ARM::t2LDMIA;
297 case ARM_AM::db: return ARM::t2LDMDB;
298 }
299 case ARM::t2STRi8:
300 case ARM::t2STRi12:
301 ++NumSTMGened;
302 switch (Mode) {
303 default: llvm_unreachable("Unhandled submode!");
304 case ARM_AM::ia: return ARM::t2STMIA;
305 case ARM_AM::db: return ARM::t2STMDB;
306 }
307 case ARM::VLDRS:
308 ++NumVLDMGened;
309 switch (Mode) {
310 default: llvm_unreachable("Unhandled submode!");
311 case ARM_AM::ia: return ARM::VLDMSIA;
312 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
313 }
314 case ARM::VSTRS:
315 ++NumVSTMGened;
316 switch (Mode) {
317 default: llvm_unreachable("Unhandled submode!");
318 case ARM_AM::ia: return ARM::VSTMSIA;
319 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
320 }
321 case ARM::VLDRD:
322 ++NumVLDMGened;
323 switch (Mode) {
324 default: llvm_unreachable("Unhandled submode!");
325 case ARM_AM::ia: return ARM::VLDMDIA;
326 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
327 }
328 case ARM::VSTRD:
329 ++NumVSTMGened;
330 switch (Mode) {
331 default: llvm_unreachable("Unhandled submode!");
332 case ARM_AM::ia: return ARM::VSTMDIA;
333 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
334 }
335 }
336}
337
339 switch (Opcode) {
340 default: llvm_unreachable("Unhandled opcode!");
341 case ARM::LDMIA_RET:
342 case ARM::LDMIA:
343 case ARM::LDMIA_UPD:
344 case ARM::STMIA:
345 case ARM::STMIA_UPD:
346 case ARM::tLDMIA:
347 case ARM::tLDMIA_UPD:
348 case ARM::tSTMIA_UPD:
349 case ARM::t2LDMIA_RET:
350 case ARM::t2LDMIA:
351 case ARM::t2LDMIA_UPD:
352 case ARM::t2STMIA:
353 case ARM::t2STMIA_UPD:
354 case ARM::VLDMSIA:
355 case ARM::VLDMSIA_UPD:
356 case ARM::VSTMSIA:
357 case ARM::VSTMSIA_UPD:
358 case ARM::VLDMDIA:
359 case ARM::VLDMDIA_UPD:
360 case ARM::VSTMDIA:
361 case ARM::VSTMDIA_UPD:
362 return ARM_AM::ia;
363
364 case ARM::LDMDA:
365 case ARM::LDMDA_UPD:
366 case ARM::STMDA:
367 case ARM::STMDA_UPD:
368 return ARM_AM::da;
369
370 case ARM::LDMDB:
371 case ARM::LDMDB_UPD:
372 case ARM::STMDB:
373 case ARM::STMDB_UPD:
374 case ARM::t2LDMDB:
375 case ARM::t2LDMDB_UPD:
376 case ARM::t2STMDB:
377 case ARM::t2STMDB_UPD:
378 case ARM::VLDMSDB_UPD:
379 case ARM::VSTMSDB_UPD:
380 case ARM::VLDMDDB_UPD:
381 case ARM::VSTMDDB_UPD:
382 return ARM_AM::db;
383
384 case ARM::LDMIB:
385 case ARM::LDMIB_UPD:
386 case ARM::STMIB:
387 case ARM::STMIB_UPD:
388 return ARM_AM::ib;
389 }
390}
391
392static bool isT1i32Load(unsigned Opc) {
393 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
394}
395
396static bool isT2i32Load(unsigned Opc) {
397 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
398}
399
400static bool isi32Load(unsigned Opc) {
401 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
402}
403
404static bool isT1i32Store(unsigned Opc) {
405 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
406}
407
408static bool isT2i32Store(unsigned Opc) {
409 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
410}
411
412static bool isi32Store(unsigned Opc) {
413 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
414}
415
416static bool isLoadSingle(unsigned Opc) {
417 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
418}
419
420static unsigned getImmScale(unsigned Opc) {
421 switch (Opc) {
422 default: llvm_unreachable("Unhandled opcode!");
423 case ARM::tLDRi:
424 case ARM::tSTRi:
425 case ARM::tLDRspi:
426 case ARM::tSTRspi:
427 return 1;
428 case ARM::tLDRHi:
429 case ARM::tSTRHi:
430 return 2;
431 case ARM::tLDRBi:
432 case ARM::tSTRBi:
433 return 4;
434 }
435}
436
438 switch (MI->getOpcode()) {
439 default: return 0;
440 case ARM::LDRi12:
441 case ARM::STRi12:
442 case ARM::tLDRi:
443 case ARM::tSTRi:
444 case ARM::tLDRspi:
445 case ARM::tSTRspi:
446 case ARM::t2LDRi8:
447 case ARM::t2LDRi12:
448 case ARM::t2STRi8:
449 case ARM::t2STRi12:
450 case ARM::VLDRS:
451 case ARM::VSTRS:
452 return 4;
453 case ARM::VLDRD:
454 case ARM::VSTRD:
455 return 8;
456 case ARM::LDMIA:
457 case ARM::LDMDA:
458 case ARM::LDMDB:
459 case ARM::LDMIB:
460 case ARM::STMIA:
461 case ARM::STMDA:
462 case ARM::STMDB:
463 case ARM::STMIB:
464 case ARM::tLDMIA:
465 case ARM::tLDMIA_UPD:
466 case ARM::tSTMIA_UPD:
467 case ARM::t2LDMIA:
468 case ARM::t2LDMDB:
469 case ARM::t2STMIA:
470 case ARM::t2STMDB:
471 case ARM::VLDMSIA:
472 case ARM::VSTMSIA:
473 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
474 case ARM::VLDMDIA:
475 case ARM::VSTMDIA:
476 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
477 }
478}
479
480/// Update future uses of the base register with the offset introduced
481/// due to writeback. This function only works on Thumb1.
482void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
484 const DebugLoc &DL, unsigned Base,
485 unsigned WordOffset,
486 ARMCC::CondCodes Pred,
487 unsigned PredReg) {
488 assert(isThumb1 && "Can only update base register uses for Thumb1!");
489 // Start updating any instructions with immediate offsets. Insert a SUB before
490 // the first non-updateable instruction (if any).
491 for (; MBBI != MBB.end(); ++MBBI) {
492 bool InsertSub = false;
493 unsigned Opc = MBBI->getOpcode();
494
495 if (MBBI->readsRegister(Base, /*TRI=*/nullptr)) {
496 int Offset;
497 bool IsLoad =
498 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
499 bool IsStore =
500 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
501
502 if (IsLoad || IsStore) {
503 // Loads and stores with immediate offsets can be updated, but only if
504 // the new offset isn't negative.
505 // The MachineOperand containing the offset immediate is the last one
506 // before predicates.
507 MachineOperand &MO =
508 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
509 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
510 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
511
512 // If storing the base register, it needs to be reset first.
513 Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
514
515 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
516 MO.setImm(Offset);
517 else
518 InsertSub = true;
519 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
520 !definesCPSR(*MBBI)) {
521 // SUBS/ADDS using this register, with a dead def of the CPSR.
522 // Merge it with the update; if the merged offset is too large,
523 // insert a new sub instead.
524 MachineOperand &MO =
525 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
526 Offset = (Opc == ARM::tSUBi8) ?
527 MO.getImm() + WordOffset * 4 :
528 MO.getImm() - WordOffset * 4 ;
529 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
530 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
531 // Offset == 0.
532 MO.setImm(Offset);
533 // The base register has now been reset, so exit early.
534 return;
535 } else {
536 InsertSub = true;
537 }
538 } else {
539 // Can't update the instruction.
540 InsertSub = true;
541 }
542 } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
543 // Since SUBS sets the condition flags, we can't place the base reset
544 // after an instruction that has a live CPSR def.
545 // The base register might also contain an argument for a function call.
546 InsertSub = true;
547 }
548
549 if (InsertSub) {
550 // An instruction above couldn't be updated, so insert a sub.
551 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
552 .add(t1CondCodeOp(true))
553 .addReg(Base)
554 .addImm(WordOffset * 4)
555 .addImm(Pred)
556 .addReg(PredReg);
557 return;
558 }
559
560 if (MBBI->killsRegister(Base, /*TRI=*/nullptr) ||
561 MBBI->definesRegister(Base, /*TRI=*/nullptr))
562 // Register got killed. Stop updating.
563 return;
564 }
565
566 // End of block was reached.
567 if (!MBB.succ_empty()) {
568 // FIXME: Because of a bug, live registers are sometimes missing from
569 // the successor blocks' live-in sets. This means we can't trust that
570 // information and *always* have to reset at the end of a block.
571 // See PR21029.
572 if (MBBI != MBB.end()) --MBBI;
573 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
574 .add(t1CondCodeOp(true))
575 .addReg(Base)
576 .addImm(WordOffset * 4)
577 .addImm(Pred)
578 .addReg(PredReg);
579 }
580}
581
582/// Return the first register of class \p RegClass that is not in \p Regs.
583unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
584 if (!RegClassInfoValid) {
585 RegClassInfo.runOnMachineFunction(*MF);
586 RegClassInfoValid = true;
587 }
588
589 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
590 if (LiveRegs.available(Reg) && !MF->getRegInfo().isReserved(Reg))
591 return Reg;
592 return 0;
593}
594
595/// Compute live registers just before instruction \p Before (in normal schedule
596/// direction). Computes backwards so multiple queries in the same block must
597/// come in reverse order.
598void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
600 // Initialize if we never queried in this block.
601 if (!LiveRegsValid) {
602 LiveRegs.init(*TRI);
603 LiveRegs.addLiveOuts(MBB);
604 LiveRegPos = MBB.end();
605 LiveRegsValid = true;
606 }
607 // Move backward just before the "Before" position.
608 while (LiveRegPos != Before) {
609 --LiveRegPos;
610 LiveRegs.stepBackward(*LiveRegPos);
611 }
612}
613
614static bool ContainsReg(ArrayRef<std::pair<unsigned, bool>> Regs,
615 unsigned Reg) {
616 for (const std::pair<unsigned, bool> &R : Regs)
617 if (R.first == Reg)
618 return true;
619 return false;
620}
621
622/// Create and insert a LDM or STM with Base as base register and registers in
623/// Regs as the register operands that would be loaded / stored. It returns
624/// true if the transformation is done.
625MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
627 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
628 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
629 ArrayRef<std::pair<unsigned, bool>> Regs,
631 unsigned NumRegs = Regs.size();
632 assert(NumRegs > 1);
633
634 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
635 // Compute liveness information for that register to make the decision.
636 bool SafeToClobberCPSR = !isThumb1 ||
637 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
639
640 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
641
642 // Exception: If the base register is in the input reglist, Thumb1 LDM is
643 // non-writeback.
644 // It's also not possible to merge an STR of the base register in Thumb1.
645 if (isThumb1 && ContainsReg(Regs, Base)) {
646 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
647 if (Opcode == ARM::tLDRi)
648 Writeback = false;
649 else if (Opcode == ARM::tSTRi)
650 return nullptr;
651 }
652
654 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
655 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
656 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
657
658 if (Offset == 4 && haveIBAndDA) {
660 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
662 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
663 // VLDM/VSTM do not support DB mode without also updating the base reg.
665 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
666 // Check if this is a supported opcode before inserting instructions to
667 // calculate a new base register.
668 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
669
670 // If starting offset isn't zero, insert a MI to materialize a new base.
671 // But only do so if it is cost effective, i.e. merging more than two
672 // loads / stores.
673 if (NumRegs <= 2)
674 return nullptr;
675
676 // On Thumb1, it's not worth materializing a new base register without
677 // clobbering the CPSR (i.e. not using ADDS/SUBS).
678 if (!SafeToClobberCPSR)
679 return nullptr;
680
681 unsigned NewBase;
682 if (isi32Load(Opcode)) {
683 // If it is a load, then just use one of the destination registers
684 // as the new base. Will no longer be writeback in Thumb1.
685 NewBase = Regs[NumRegs-1].first;
686 Writeback = false;
687 } else {
688 // Find a free register that we can use as scratch register.
689 moveLiveRegsBefore(MBB, InsertBefore);
690 // The merged instruction does not exist yet but will use several Regs if
691 // it is a Store.
692 if (!isLoadSingle(Opcode))
693 for (const std::pair<unsigned, bool> &R : Regs)
694 LiveRegs.addReg(R.first);
695
696 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
697 if (NewBase == 0)
698 return nullptr;
699 }
700
701 int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
702 : ARM::t2ADDri)
703 : (isThumb1 && Base == ARM::SP)
704 ? ARM::tADDrSPi
705 : (isThumb1 && Offset < 8)
706 ? ARM::tADDi3
707 : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
708
709 if (Offset < 0) {
710 // FIXME: There are no Thumb1 load/store instructions with negative
711 // offsets. So the Base != ARM::SP might be unnecessary.
712 Offset = -Offset;
713 BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
714 : ARM::t2SUBri)
715 : (isThumb1 && Offset < 8 && Base != ARM::SP)
716 ? ARM::tSUBi3
717 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
718 }
719
720 if (!TL->isLegalAddImmediate(Offset))
721 // FIXME: Try add with register operand?
722 return nullptr; // Probably not worth it then.
723
724 // We can only append a kill flag to the add/sub input if the value is not
725 // used in the register list of the stm as well.
726 bool KillOldBase = BaseKill &&
727 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
728
729 if (isThumb1) {
730 // Thumb1: depending on immediate size, use either
731 // ADDS NewBase, Base, #imm3
732 // or
733 // MOV NewBase, Base
734 // ADDS NewBase, #imm8.
735 if (Base != NewBase &&
736 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
737 // Need to insert a MOV to the new base first.
738 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
739 !STI->hasV6Ops()) {
740 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
741 if (Pred != ARMCC::AL)
742 return nullptr;
743 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
744 .addReg(Base, getKillRegState(KillOldBase));
745 } else
746 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
747 .addReg(Base, getKillRegState(KillOldBase))
748 .add(predOps(Pred, PredReg));
749
750 // The following ADDS/SUBS becomes an update.
751 Base = NewBase;
752 KillOldBase = true;
753 }
754 if (BaseOpc == ARM::tADDrSPi) {
755 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
756 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
757 .addReg(Base, getKillRegState(KillOldBase))
758 .addImm(Offset / 4)
759 .add(predOps(Pred, PredReg));
760 } else
761 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
762 .add(t1CondCodeOp(true))
763 .addReg(Base, getKillRegState(KillOldBase))
764 .addImm(Offset)
765 .add(predOps(Pred, PredReg));
766 } else {
767 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
768 .addReg(Base, getKillRegState(KillOldBase))
769 .addImm(Offset)
770 .add(predOps(Pred, PredReg))
771 .add(condCodeOp());
772 }
773 Base = NewBase;
774 BaseKill = true; // New base is always killed straight away.
775 }
776
777 bool isDef = isLoadSingle(Opcode);
778
779 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
780 // base register writeback.
781 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
782 if (!Opcode)
783 return nullptr;
784
785 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
786 // - There is no writeback (LDM of base register),
787 // - the base register is killed by the merged instruction,
788 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
789 // to reset the base register.
790 // Otherwise, don't merge.
791 // It's safe to return here since the code to materialize a new base register
792 // above is also conditional on SafeToClobberCPSR.
793 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
794 return nullptr;
795
797
798 if (Writeback) {
799 assert(isThumb1 && "expected Writeback only inThumb1");
800 if (Opcode == ARM::tLDMIA) {
801 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
802 // Update tLDMIA with writeback if necessary.
803 Opcode = ARM::tLDMIA_UPD;
804 }
805
806 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
807
808 // Thumb1: we might need to set base writeback when building the MI.
809 MIB.addReg(Base, getDefRegState(true))
810 .addReg(Base, getKillRegState(BaseKill));
811
812 // The base isn't dead after a merged instruction with writeback.
813 // Insert a sub instruction after the newly formed instruction to reset.
814 if (!BaseKill)
815 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
816 } else {
817 // No writeback, simply build the MachineInstr.
818 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
819 MIB.addReg(Base, getKillRegState(BaseKill));
820 }
821
822 MIB.addImm(Pred).addReg(PredReg);
823
824 for (const std::pair<unsigned, bool> &R : Regs)
825 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
826
827 MIB.cloneMergedMemRefs(Instrs);
828
829 return MIB.getInstr();
830}
831
832MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
834 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
835 ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
836 ArrayRef<std::pair<unsigned, bool>> Regs,
837 ArrayRef<MachineInstr*> Instrs) const {
838 bool IsLoad = isi32Load(Opcode);
839 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
840 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
841
842 assert(Regs.size() == 2);
843 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
844 TII->get(LoadStoreOpcode));
845 if (IsLoad) {
846 MIB.addReg(Regs[0].first, RegState::Define)
847 .addReg(Regs[1].first, RegState::Define);
848 } else {
849 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
850 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
851 }
852 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
853 MIB.cloneMergedMemRefs(Instrs);
854 return MIB.getInstr();
855}
856
857/// Call MergeOps and update MemOps and merges accordingly on success.
858MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
859 const MachineInstr *First = Cand.Instrs.front();
860 unsigned Opcode = First->getOpcode();
861 bool IsLoad = isLoadSingle(Opcode);
864 DenseSet<unsigned> KilledRegs;
865 DenseSet<unsigned> UsedRegs;
866 // Determine list of registers and list of implicit super-register defs.
867 for (const MachineInstr *MI : Cand.Instrs) {
868 const MachineOperand &MO = getLoadStoreRegOp(*MI);
869 Register Reg = MO.getReg();
870 bool IsKill = MO.isKill();
871 if (IsKill)
872 KilledRegs.insert(Reg);
873 Regs.push_back(std::make_pair(Reg, IsKill));
874 UsedRegs.insert(Reg);
875
876 if (IsLoad) {
877 // Collect any implicit defs of super-registers, after merging we can't
878 // be sure anymore that we properly preserved these live ranges and must
879 // removed these implicit operands.
880 for (const MachineOperand &MO : MI->implicit_operands()) {
881 if (!MO.isReg() || !MO.isDef() || MO.isDead())
882 continue;
883 assert(MO.isImplicit());
884 Register DefReg = MO.getReg();
885
886 if (is_contained(ImpDefs, DefReg))
887 continue;
888 // We can ignore cases where the super-reg is read and written.
889 if (MI->readsRegister(DefReg, /*TRI=*/nullptr))
890 continue;
891 ImpDefs.push_back(DefReg);
892 }
893 }
894 }
895
896 // Attempt the merge.
897 using iterator = MachineBasicBlock::iterator;
898
899 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
900 iterator InsertBefore = std::next(iterator(LatestMI));
901 MachineBasicBlock &MBB = *LatestMI->getParent();
902 unsigned Offset = getMemoryOpOffset(*First);
904 bool BaseKill = LatestMI->killsRegister(Base, /*TRI=*/nullptr);
905 Register PredReg;
906 ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
907 DebugLoc DL = First->getDebugLoc();
908 MachineInstr *Merged = nullptr;
909 if (Cand.CanMergeToLSDouble)
910 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
911 Opcode, Pred, PredReg, DL, Regs,
912 Cand.Instrs);
913 if (!Merged && Cand.CanMergeToLSMulti)
914 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
915 Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
916 if (!Merged)
917 return nullptr;
918
919 // Determine earliest instruction that will get removed. We then keep an
920 // iterator just above it so the following erases don't invalidated it.
921 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
922 bool EarliestAtBegin = false;
923 if (EarliestI == MBB.begin()) {
924 EarliestAtBegin = true;
925 } else {
926 EarliestI = std::prev(EarliestI);
927 }
928
929 // Remove instructions which have been merged.
930 for (MachineInstr *MI : Cand.Instrs)
931 MBB.erase(MI);
932
933 // Determine range between the earliest removed instruction and the new one.
934 if (EarliestAtBegin)
935 EarliestI = MBB.begin();
936 else
937 EarliestI = std::next(EarliestI);
938 auto FixupRange = make_range(EarliestI, iterator(Merged));
939
940 if (isLoadSingle(Opcode)) {
941 // If the previous loads defined a super-reg, then we have to mark earlier
942 // operands undef; Replicate the super-reg def on the merged instruction.
943 for (MachineInstr &MI : FixupRange) {
944 for (unsigned &ImpDefReg : ImpDefs) {
945 for (MachineOperand &MO : MI.implicit_operands()) {
946 if (!MO.isReg() || MO.getReg() != ImpDefReg)
947 continue;
948 if (MO.readsReg())
949 MO.setIsUndef();
950 else if (MO.isDef())
951 ImpDefReg = 0;
952 }
953 }
954 }
955
956 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
957 for (unsigned ImpDef : ImpDefs)
958 MIB.addReg(ImpDef, RegState::ImplicitDefine);
959 } else {
960 // Remove kill flags: We are possibly storing the values later now.
961 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
962 for (MachineInstr &MI : FixupRange) {
963 for (MachineOperand &MO : MI.uses()) {
964 if (!MO.isReg() || !MO.isKill())
965 continue;
966 if (UsedRegs.count(MO.getReg()))
967 MO.setIsKill(false);
968 }
969 }
970 assert(ImpDefs.empty());
971 }
972
973 return Merged;
974}
975
977 unsigned Value = abs(Offset);
978 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
979 // multiplied by 4.
980 return (Value % 4) == 0 && Value < 1024;
981}
982
983/// Return true for loads/stores that can be combined to a double/multi
984/// operation without increasing the requirements for alignment.
986 const MachineInstr &MI) {
987 // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
988 // difference.
989 unsigned Opcode = MI.getOpcode();
990 if (!isi32Load(Opcode) && !isi32Store(Opcode))
991 return true;
992
993 // Stack pointer alignment is out of the programmers control so we can trust
994 // SP-relative loads/stores.
995 if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
997 return true;
998 return false;
999}
1000
1001/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
1002void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
1003 const MachineInstr *FirstMI = MemOps[0].MI;
1004 unsigned Opcode = FirstMI->getOpcode();
1005 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
1006 unsigned Size = getLSMultipleTransferSize(FirstMI);
1007
1008 unsigned SIndex = 0;
1009 unsigned EIndex = MemOps.size();
1010 do {
1011 // Look at the first instruction.
1012 const MachineInstr *MI = MemOps[SIndex].MI;
1013 int Offset = MemOps[SIndex].Offset;
1014 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1015 Register PReg = PMO.getReg();
1016 unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
1017 : TRI->getEncodingValue(PReg);
1018 unsigned Latest = SIndex;
1019 unsigned Earliest = SIndex;
1020 unsigned Count = 1;
1021 bool CanMergeToLSDouble =
1022 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1023 // ARM errata 602117: LDRD with base in list may result in incorrect base
1024 // register when interrupted or faulted.
1025 if (STI->isCortexM3() && isi32Load(Opcode) &&
1026 PReg == getLoadStoreBaseOp(*MI).getReg())
1027 CanMergeToLSDouble = false;
1028
1029 bool CanMergeToLSMulti = true;
1030 // On swift vldm/vstm starting with an odd register number as that needs
1031 // more uops than single vldrs.
1032 if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1033 CanMergeToLSMulti = false;
1034
1035 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1036 // deprecated; LDM to PC is fine but cannot happen here.
1037 if (PReg == ARM::SP || PReg == ARM::PC)
1038 CanMergeToLSMulti = CanMergeToLSDouble = false;
1039
1040 // Should we be conservative?
1042 CanMergeToLSMulti = CanMergeToLSDouble = false;
1043
1044 // vldm / vstm limit are 32 for S variants, 16 for D variants.
1045 unsigned Limit;
1046 switch (Opcode) {
1047 default:
1048 Limit = UINT_MAX;
1049 break;
1050 case ARM::VLDRD:
1051 case ARM::VSTRD:
1052 Limit = 16;
1053 break;
1054 }
1055
1056 // Merge following instructions where possible.
1057 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1058 int NewOffset = MemOps[I].Offset;
1059 if (NewOffset != Offset + (int)Size)
1060 break;
1061 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1062 Register Reg = MO.getReg();
1063 if (Reg == ARM::SP || Reg == ARM::PC)
1064 break;
1065 if (Count == Limit)
1066 break;
1067
1068 // See if the current load/store may be part of a multi load/store.
1069 unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
1070 : TRI->getEncodingValue(Reg);
1071 bool PartOfLSMulti = CanMergeToLSMulti;
1072 if (PartOfLSMulti) {
1073 // Register numbers must be in ascending order.
1074 if (RegNum <= PRegNum)
1075 PartOfLSMulti = false;
1076 // For VFP / NEON load/store multiples, the registers must be
1077 // consecutive and within the limit on the number of registers per
1078 // instruction.
1079 else if (!isNotVFP && RegNum != PRegNum+1)
1080 PartOfLSMulti = false;
1081 }
1082 // See if the current load/store may be part of a double load/store.
1083 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1084
1085 if (!PartOfLSMulti && !PartOfLSDouble)
1086 break;
1087 CanMergeToLSMulti &= PartOfLSMulti;
1088 CanMergeToLSDouble &= PartOfLSDouble;
1089 // Track MemOp with latest and earliest position (Positions are
1090 // counted in reverse).
1091 unsigned Position = MemOps[I].Position;
1092 if (Position < MemOps[Latest].Position)
1093 Latest = I;
1094 else if (Position > MemOps[Earliest].Position)
1095 Earliest = I;
1096 // Prepare for next MemOp.
1097 Offset += Size;
1098 PRegNum = RegNum;
1099 }
1100
1101 // Form a candidate from the Ops collected so far.
1102 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1103 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1104 Candidate->Instrs.push_back(MemOps[C].MI);
1105 Candidate->LatestMIIdx = Latest - SIndex;
1106 Candidate->EarliestMIIdx = Earliest - SIndex;
1107 Candidate->InsertPos = MemOps[Latest].Position;
1108 if (Count == 1)
1109 CanMergeToLSMulti = CanMergeToLSDouble = false;
1110 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1111 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1112 Candidates.push_back(Candidate);
1113 // Continue after the chain.
1114 SIndex += Count;
1115 } while (SIndex < EIndex);
1116}
1117
1118static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1119 ARM_AM::AMSubMode Mode) {
1120 switch (Opc) {
1121 default: llvm_unreachable("Unhandled opcode!");
1122 case ARM::LDMIA:
1123 case ARM::LDMDA:
1124 case ARM::LDMDB:
1125 case ARM::LDMIB:
1126 switch (Mode) {
1127 default: llvm_unreachable("Unhandled submode!");
1128 case ARM_AM::ia: return ARM::LDMIA_UPD;
1129 case ARM_AM::ib: return ARM::LDMIB_UPD;
1130 case ARM_AM::da: return ARM::LDMDA_UPD;
1131 case ARM_AM::db: return ARM::LDMDB_UPD;
1132 }
1133 case ARM::STMIA:
1134 case ARM::STMDA:
1135 case ARM::STMDB:
1136 case ARM::STMIB:
1137 switch (Mode) {
1138 default: llvm_unreachable("Unhandled submode!");
1139 case ARM_AM::ia: return ARM::STMIA_UPD;
1140 case ARM_AM::ib: return ARM::STMIB_UPD;
1141 case ARM_AM::da: return ARM::STMDA_UPD;
1142 case ARM_AM::db: return ARM::STMDB_UPD;
1143 }
1144 case ARM::t2LDMIA:
1145 case ARM::t2LDMDB:
1146 switch (Mode) {
1147 default: llvm_unreachable("Unhandled submode!");
1148 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1149 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1150 }
1151 case ARM::t2STMIA:
1152 case ARM::t2STMDB:
1153 switch (Mode) {
1154 default: llvm_unreachable("Unhandled submode!");
1155 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1156 case ARM_AM::db: return ARM::t2STMDB_UPD;
1157 }
1158 case ARM::VLDMSIA:
1159 switch (Mode) {
1160 default: llvm_unreachable("Unhandled submode!");
1161 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1162 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1163 }
1164 case ARM::VLDMDIA:
1165 switch (Mode) {
1166 default: llvm_unreachable("Unhandled submode!");
1167 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1168 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1169 }
1170 case ARM::VSTMSIA:
1171 switch (Mode) {
1172 default: llvm_unreachable("Unhandled submode!");
1173 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1174 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1175 }
1176 case ARM::VSTMDIA:
1177 switch (Mode) {
1178 default: llvm_unreachable("Unhandled submode!");
1179 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1180 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1181 }
1182 }
1183}
1184
1185/// Check if the given instruction increments or decrements a register and
1186/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1187/// generated by the instruction are possibly read as well.
1189 ARMCC::CondCodes Pred, Register PredReg) {
1190 bool CheckCPSRDef;
1191 int Scale;
1192 switch (MI.getOpcode()) {
1193 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1194 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1195 case ARM::t2SUBri:
1196 case ARM::t2SUBspImm:
1197 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1198 case ARM::t2ADDri:
1199 case ARM::t2ADDspImm:
1200 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1201 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1202 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1203 default: return 0;
1204 }
1205
1206 Register MIPredReg;
1207 if (MI.getOperand(0).getReg() != Reg ||
1208 MI.getOperand(1).getReg() != Reg ||
1209 getInstrPredicate(MI, MIPredReg) != Pred ||
1210 MIPredReg != PredReg)
1211 return 0;
1212
1213 if (CheckCPSRDef && definesCPSR(MI))
1214 return 0;
1215 return MI.getOperand(2).getImm() * Scale;
1216}
1217
1218/// Searches for an increment or decrement of \p Reg before \p MBBI.
1221 ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
1222 Offset = 0;
1226 if (MBBI == BeginMBBI)
1227 return EndMBBI;
1228
1229 // Skip debug values.
1230 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1231 while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
1232 --PrevMBBI;
1233
1234 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1235 return Offset == 0 ? EndMBBI : PrevMBBI;
1236}
1237
1238/// Searches for a increment or decrement of \p Reg after \p MBBI.
1241 ARMCC::CondCodes Pred, Register PredReg, int &Offset,
1242 const TargetRegisterInfo *TRI) {
1243 Offset = 0;
1246 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1247 while (NextMBBI != EndMBBI) {
1248 // Skip debug values.
1249 while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
1250 ++NextMBBI;
1251 if (NextMBBI == EndMBBI)
1252 return EndMBBI;
1253
1254 unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1255 if (Off) {
1256 Offset = Off;
1257 return NextMBBI;
1258 }
1259
1260 // SP can only be combined if it is the next instruction after the original
1261 // MBBI, otherwise we may be incrementing the stack pointer (invalidating
1262 // anything below the new pointer) when its frame elements are still in
1263 // use. Other registers can attempt to look further, until a different use
1264 // or def of the register is found.
1265 if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
1266 NextMBBI->definesRegister(Reg, TRI))
1267 return EndMBBI;
1268
1269 ++NextMBBI;
1270 }
1271 return EndMBBI;
1272}
1273
1274/// Fold proceeding/trailing inc/dec of base register into the
1275/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1276///
1277/// stmia rn, <ra, rb, rc>
1278/// rn := rn + 4 * 3;
1279/// =>
1280/// stmia rn!, <ra, rb, rc>
1281///
1282/// rn := rn - 4 * 3;
1283/// ldmia rn, <ra, rb, rc>
1284/// =>
1285/// ldmdb rn!, <ra, rb, rc>
1286bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1287 // Thumb1 is already using updating loads/stores.
1288 if (isThumb1) return false;
1289 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1290
1291 const MachineOperand &BaseOP = MI->getOperand(0);
1292 Register Base = BaseOP.getReg();
1293 bool BaseKill = BaseOP.isKill();
1294 Register PredReg;
1295 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1296 unsigned Opcode = MI->getOpcode();
1297 DebugLoc DL = MI->getDebugLoc();
1298
1299 // Can't use an updating ld/st if the base register is also a dest
1300 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1301 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1302 if (MO.getReg() == Base)
1303 return false;
1304
1305 int Bytes = getLSMultipleTransferSize(MI);
1306 MachineBasicBlock &MBB = *MI->getParent();
1308 int Offset;
1310 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1312 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1313 Mode = ARM_AM::db;
1314 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1315 Mode = ARM_AM::da;
1316 } else {
1317 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1318 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1319 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
1320
1321 // We couldn't find an inc/dec to merge. But if the base is dead, we
1322 // can still change to a writeback form as that will save us 2 bytes
1323 // of code size. It can create WAW hazards though, so only do it if
1324 // we're minimizing code size.
1325 if (!STI->hasMinSize() || !BaseKill)
1326 return false;
1327
1328 bool HighRegsUsed = false;
1329 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
1330 if (MO.getReg() >= ARM::R8) {
1331 HighRegsUsed = true;
1332 break;
1333 }
1334
1335 if (!HighRegsUsed)
1336 MergeInstr = MBB.end();
1337 else
1338 return false;
1339 }
1340 }
1341 if (MergeInstr != MBB.end()) {
1342 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1343 MBB.erase(MergeInstr);
1344 }
1345
1346 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1347 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1348 .addReg(Base, getDefRegState(true)) // WB base register
1349 .addReg(Base, getKillRegState(BaseKill))
1350 .addImm(Pred).addReg(PredReg);
1351
1352 // Transfer the rest of operands.
1353 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
1354 MIB.add(MO);
1355
1356 // Transfer memoperands.
1357 MIB.setMemRefs(MI->memoperands());
1358
1359 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1360 MBB.erase(MBBI);
1361 return true;
1362}
1363
1364static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1365 ARM_AM::AddrOpc Mode) {
1366 switch (Opc) {
1367 case ARM::LDRi12:
1368 return ARM::LDR_PRE_IMM;
1369 case ARM::STRi12:
1370 return ARM::STR_PRE_IMM;
1371 case ARM::VLDRS:
1372 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1373 case ARM::VLDRD:
1374 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1375 case ARM::VSTRS:
1376 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1377 case ARM::VSTRD:
1378 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1379 case ARM::t2LDRi8:
1380 case ARM::t2LDRi12:
1381 return ARM::t2LDR_PRE;
1382 case ARM::t2STRi8:
1383 case ARM::t2STRi12:
1384 return ARM::t2STR_PRE;
1385 default: llvm_unreachable("Unhandled opcode!");
1386 }
1387}
1388
1389static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1390 ARM_AM::AddrOpc Mode) {
1391 switch (Opc) {
1392 case ARM::LDRi12:
1393 return ARM::LDR_POST_IMM;
1394 case ARM::STRi12:
1395 return ARM::STR_POST_IMM;
1396 case ARM::VLDRS:
1397 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1398 case ARM::VLDRD:
1399 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1400 case ARM::VSTRS:
1401 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1402 case ARM::VSTRD:
1403 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1404 case ARM::t2LDRi8:
1405 case ARM::t2LDRi12:
1406 return ARM::t2LDR_POST;
1407 case ARM::t2LDRBi8:
1408 case ARM::t2LDRBi12:
1409 return ARM::t2LDRB_POST;
1410 case ARM::t2LDRSBi8:
1411 case ARM::t2LDRSBi12:
1412 return ARM::t2LDRSB_POST;
1413 case ARM::t2LDRHi8:
1414 case ARM::t2LDRHi12:
1415 return ARM::t2LDRH_POST;
1416 case ARM::t2LDRSHi8:
1417 case ARM::t2LDRSHi12:
1418 return ARM::t2LDRSH_POST;
1419 case ARM::t2STRi8:
1420 case ARM::t2STRi12:
1421 return ARM::t2STR_POST;
1422 case ARM::t2STRBi8:
1423 case ARM::t2STRBi12:
1424 return ARM::t2STRB_POST;
1425 case ARM::t2STRHi8:
1426 case ARM::t2STRHi12:
1427 return ARM::t2STRH_POST;
1428
1429 case ARM::MVE_VLDRBS16:
1430 return ARM::MVE_VLDRBS16_post;
1431 case ARM::MVE_VLDRBS32:
1432 return ARM::MVE_VLDRBS32_post;
1433 case ARM::MVE_VLDRBU16:
1434 return ARM::MVE_VLDRBU16_post;
1435 case ARM::MVE_VLDRBU32:
1436 return ARM::MVE_VLDRBU32_post;
1437 case ARM::MVE_VLDRHS32:
1438 return ARM::MVE_VLDRHS32_post;
1439 case ARM::MVE_VLDRHU32:
1440 return ARM::MVE_VLDRHU32_post;
1441 case ARM::MVE_VLDRBU8:
1442 return ARM::MVE_VLDRBU8_post;
1443 case ARM::MVE_VLDRHU16:
1444 return ARM::MVE_VLDRHU16_post;
1445 case ARM::MVE_VLDRWU32:
1446 return ARM::MVE_VLDRWU32_post;
1447 case ARM::MVE_VSTRB16:
1448 return ARM::MVE_VSTRB16_post;
1449 case ARM::MVE_VSTRB32:
1450 return ARM::MVE_VSTRB32_post;
1451 case ARM::MVE_VSTRH32:
1452 return ARM::MVE_VSTRH32_post;
1453 case ARM::MVE_VSTRBU8:
1454 return ARM::MVE_VSTRBU8_post;
1455 case ARM::MVE_VSTRHU16:
1456 return ARM::MVE_VSTRHU16_post;
1457 case ARM::MVE_VSTRWU32:
1458 return ARM::MVE_VSTRWU32_post;
1459
1460 default: llvm_unreachable("Unhandled opcode!");
1461 }
1462}
1463
1464/// Fold proceeding/trailing inc/dec of base register into the
1465/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1466bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1467 // Thumb1 doesn't have updating LDR/STR.
1468 // FIXME: Use LDM/STM with single register instead.
1469 if (isThumb1) return false;
1470 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
1471
1473 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1474 unsigned Opcode = MI->getOpcode();
1475 DebugLoc DL = MI->getDebugLoc();
1476 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1477 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1478 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1479 if (isi32Load(Opcode) || isi32Store(Opcode))
1480 if (MI->getOperand(2).getImm() != 0)
1481 return false;
1482 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1483 return false;
1484
1485 // Can't do the merge if the destination register is the same as the would-be
1486 // writeback register.
1487 if (MI->getOperand(0).getReg() == Base)
1488 return false;
1489
1490 Register PredReg;
1491 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1492 int Bytes = getLSMultipleTransferSize(MI);
1493 MachineBasicBlock &MBB = *MI->getParent();
1495 int Offset;
1497 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1498 unsigned NewOpc;
1499 if (!isAM5 && Offset == Bytes) {
1500 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1501 } else if (Offset == -Bytes) {
1502 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1503 } else {
1504 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1505 if (MergeInstr == MBB.end())
1506 return false;
1507
1509 if ((isAM5 && Offset != Bytes) ||
1510 (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
1512 if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
1513 return false;
1514 }
1515 }
1516 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1517 MBB.erase(MergeInstr);
1518
1520
1521 bool isLd = isLoadSingle(Opcode);
1522 if (isAM5) {
1523 // VLDM[SD]_UPD, VSTM[SD]_UPD
1524 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1525 // updating load/store-multiple instructions can be used with only one
1526 // register.)
1527 MachineOperand &MO = MI->getOperand(0);
1528 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1529 .addReg(Base, getDefRegState(true)) // WB base register
1530 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1531 .addImm(Pred)
1532 .addReg(PredReg)
1533 .addReg(MO.getReg(), (isLd ? getDefRegState(true)
1534 : getKillRegState(MO.isKill())))
1535 .cloneMemRefs(*MI);
1536 (void)MIB;
1537 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1538 } else if (isLd) {
1539 if (isAM2) {
1540 // LDR_PRE, LDR_POST
1541 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1542 auto MIB =
1543 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1545 .addReg(Base)
1546 .addImm(Offset)
1547 .addImm(Pred)
1548 .addReg(PredReg)
1549 .cloneMemRefs(*MI);
1550 (void)MIB;
1551 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1552 } else {
1554 auto MIB =
1555 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1557 .addReg(Base)
1558 .addReg(0)
1559 .addImm(Imm)
1560 .add(predOps(Pred, PredReg))
1561 .cloneMemRefs(*MI);
1562 (void)MIB;
1563 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1564 }
1565 } else {
1566 // t2LDR_PRE, t2LDR_POST
1567 auto MIB =
1568 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1570 .addReg(Base)
1571 .addImm(Offset)
1572 .add(predOps(Pred, PredReg))
1573 .cloneMemRefs(*MI);
1574 (void)MIB;
1575 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1576 }
1577 } else {
1578 MachineOperand &MO = MI->getOperand(0);
1579 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1580 // the vestigal zero-reg offset register. When that's fixed, this clause
1581 // can be removed entirely.
1582 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1584 // STR_PRE, STR_POST
1585 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1586 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1587 .addReg(Base)
1588 .addReg(0)
1589 .addImm(Imm)
1590 .add(predOps(Pred, PredReg))
1591 .cloneMemRefs(*MI);
1592 (void)MIB;
1593 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1594 } else {
1595 // t2STR_PRE, t2STR_POST
1596 auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1597 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1598 .addReg(Base)
1599 .addImm(Offset)
1600 .add(predOps(Pred, PredReg))
1601 .cloneMemRefs(*MI);
1602 (void)MIB;
1603 LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
1604 }
1605 }
1606 MBB.erase(MBBI);
1607
1608 return true;
1609}
1610
1611bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1612 unsigned Opcode = MI.getOpcode();
1613 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1614 "Must have t2STRDi8 or t2LDRDi8");
1615 if (MI.getOperand(3).getImm() != 0)
1616 return false;
1617 LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
1618
1619 // Behaviour for writeback is undefined if base register is the same as one
1620 // of the others.
1621 const MachineOperand &BaseOp = MI.getOperand(2);
1622 Register Base = BaseOp.getReg();
1623 const MachineOperand &Reg0Op = MI.getOperand(0);
1624 const MachineOperand &Reg1Op = MI.getOperand(1);
1625 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1626 return false;
1627
1628 Register PredReg;
1629 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1631 MachineBasicBlock &MBB = *MI.getParent();
1632 int Offset;
1634 PredReg, Offset);
1635 unsigned NewOpc;
1636 if (Offset == 8 || Offset == -8) {
1637 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1638 } else {
1639 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
1640 if (MergeInstr == MBB.end())
1641 return false;
1642 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1643 if (!isLegalAddressImm(NewOpc, Offset, TII))
1644 return false;
1645 }
1646 LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
1647 MBB.erase(MergeInstr);
1648
1649 DebugLoc DL = MI.getDebugLoc();
1650 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1651 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1652 MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
1653 } else {
1654 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1655 MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
1656 }
1657 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1658 .addImm(Offset).addImm(Pred).addReg(PredReg);
1659 assert(TII->get(Opcode).getNumOperands() == 6 &&
1660 TII->get(NewOpc).getNumOperands() == 7 &&
1661 "Unexpected number of operands in Opcode specification.");
1662
1663 // Transfer implicit operands.
1664 for (const MachineOperand &MO : MI.implicit_operands())
1665 MIB.add(MO);
1666 MIB.cloneMemRefs(MI);
1667
1668 LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
1669 MBB.erase(MBBI);
1670 return true;
1671}
1672
1673/// Returns true if instruction is a memory operation that this pass is capable
1674/// of operating on.
1675static bool isMemoryOp(const MachineInstr &MI) {
1676 unsigned Opcode = MI.getOpcode();
1677 switch (Opcode) {
1678 case ARM::VLDRS:
1679 case ARM::VSTRS:
1680 case ARM::VLDRD:
1681 case ARM::VSTRD:
1682 case ARM::LDRi12:
1683 case ARM::STRi12:
1684 case ARM::tLDRi:
1685 case ARM::tSTRi:
1686 case ARM::tLDRspi:
1687 case ARM::tSTRspi:
1688 case ARM::t2LDRi8:
1689 case ARM::t2LDRi12:
1690 case ARM::t2STRi8:
1691 case ARM::t2STRi12:
1692 break;
1693 default:
1694 return false;
1695 }
1696 if (!MI.getOperand(1).isReg())
1697 return false;
1698
1699 // When no memory operands are present, conservatively assume unaligned,
1700 // volatile, unfoldable.
1701 if (!MI.hasOneMemOperand())
1702 return false;
1703
1704 const MachineMemOperand &MMO = **MI.memoperands_begin();
1705
1706 // Don't touch volatile memory accesses - we may be changing their order.
1707 // TODO: We could allow unordered and monotonic atomics here, but we need to
1708 // make sure the resulting ldm/stm is correctly marked as atomic.
1709 if (MMO.isVolatile() || MMO.isAtomic())
1710 return false;
1711
1712 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1713 // not.
1714 if (MMO.getAlign() < Align(4))
1715 return false;
1716
1717 // str <undef> could probably be eliminated entirely, but for now we just want
1718 // to avoid making a mess of it.
1719 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1720 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1721 return false;
1722
1723 // Likewise don't mess with references to undefined addresses.
1724 if (MI.getOperand(1).isUndef())
1725 return false;
1726
1727 return true;
1728}
1729
1732 bool isDef, unsigned NewOpc, unsigned Reg,
1733 bool RegDeadKill, bool RegUndef, unsigned BaseReg,
1734 bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
1735 unsigned PredReg, const TargetInstrInfo *TII,
1736 MachineInstr *MI) {
1737 if (isDef) {
1738 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1739 TII->get(NewOpc))
1740 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1741 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1742 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1743 // FIXME: This is overly conservative; the new instruction accesses 4
1744 // bytes, not 8.
1745 MIB.cloneMemRefs(*MI);
1746 } else {
1747 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1748 TII->get(NewOpc))
1749 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1750 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1751 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1752 // FIXME: This is overly conservative; the new instruction accesses 4
1753 // bytes, not 8.
1754 MIB.cloneMemRefs(*MI);
1755 }
1756}
1757
1758bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1760 MachineInstr *MI = &*MBBI;
1761 unsigned Opcode = MI->getOpcode();
1762 // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
1763 // if we see this opcode.
1764 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1765 return false;
1766
1767 const MachineOperand &BaseOp = MI->getOperand(2);
1768 Register BaseReg = BaseOp.getReg();
1769 Register EvenReg = MI->getOperand(0).getReg();
1770 Register OddReg = MI->getOperand(1).getReg();
1771 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1772 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1773
1774 // ARM errata 602117: LDRD with base in list may result in incorrect base
1775 // register when interrupted or faulted.
1776 bool Errata602117 = EvenReg == BaseReg &&
1777 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1778 // ARM LDRD/STRD needs consecutive registers.
1779 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1780 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1781
1782 if (!Errata602117 && !NonConsecutiveRegs)
1783 return false;
1784
1785 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1786 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1787 bool EvenDeadKill = isLd ?
1788 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1789 bool EvenUndef = MI->getOperand(0).isUndef();
1790 bool OddDeadKill = isLd ?
1791 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1792 bool OddUndef = MI->getOperand(1).isUndef();
1793 bool BaseKill = BaseOp.isKill();
1794 bool BaseUndef = BaseOp.isUndef();
1795 assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
1796 "register offset not handled below");
1797 int OffImm = getMemoryOpOffset(*MI);
1798 Register PredReg;
1799 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1800
1801 if (OddRegNum > EvenRegNum && OffImm == 0) {
1802 // Ascending register numbers and no offset. It's safe to change it to a
1803 // ldm or stm.
1804 unsigned NewOpc = (isLd)
1805 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1806 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1807 if (isLd) {
1808 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1809 .addReg(BaseReg, getKillRegState(BaseKill))
1810 .addImm(Pred).addReg(PredReg)
1811 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1812 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
1813 .cloneMemRefs(*MI);
1814 ++NumLDRD2LDM;
1815 } else {
1816 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1817 .addReg(BaseReg, getKillRegState(BaseKill))
1818 .addImm(Pred).addReg(PredReg)
1819 .addReg(EvenReg,
1820 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1821 .addReg(OddReg,
1822 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
1823 .cloneMemRefs(*MI);
1824 ++NumSTRD2STM;
1825 }
1826 } else {
1827 // Split into two instructions.
1828 unsigned NewOpc = (isLd)
1829 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1830 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1831 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1832 // so adjust and use t2LDRi12 here for that.
1833 unsigned NewOpc2 = (isLd)
1834 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1835 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1836 // If this is a load, make sure the first load does not clobber the base
1837 // register before the second load reads it.
1838 if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
1839 assert(!TRI->regsOverlap(OddReg, BaseReg));
1840 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1841 false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
1842 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1843 false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1844 MI);
1845 } else {
1846 if (OddReg == EvenReg && EvenDeadKill) {
1847 // If the two source operands are the same, the kill marker is
1848 // probably on the first one. e.g.
1849 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1850 EvenDeadKill = false;
1851 OddDeadKill = true;
1852 }
1853 // Never kill the base register in the first instruction.
1854 if (EvenReg == BaseReg)
1855 EvenDeadKill = false;
1856 InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
1857 EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
1858 MI);
1859 InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
1860 OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
1861 MI);
1862 }
1863 if (isLd)
1864 ++NumLDRD2LDR;
1865 else
1866 ++NumSTRD2STR;
1867 }
1868
1869 MBBI = MBB.erase(MBBI);
1870 return true;
1871}
1872
1873/// An optimization pass to turn multiple LDR / STR ops of the same base and
1874/// incrementing offset into LDM / STM ops.
1875bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1876 MemOpQueue MemOps;
1877 unsigned CurrBase = 0;
1878 unsigned CurrOpc = ~0u;
1879 ARMCC::CondCodes CurrPred = ARMCC::AL;
1880 unsigned Position = 0;
1881 assert(Candidates.size() == 0);
1882 assert(MergeBaseCandidates.size() == 0);
1883 LiveRegsValid = false;
1884
1886 I = MBBI) {
1887 // The instruction in front of the iterator is the one we look at.
1888 MBBI = std::prev(I);
1889 if (FixInvalidRegPairOp(MBB, MBBI))
1890 continue;
1891 ++Position;
1892
1893 if (isMemoryOp(*MBBI)) {
1894 unsigned Opcode = MBBI->getOpcode();
1895 const MachineOperand &MO = MBBI->getOperand(0);
1896 Register Reg = MO.getReg();
1898 Register PredReg;
1899 ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1901 if (CurrBase == 0) {
1902 // Start of a new chain.
1903 CurrBase = Base;
1904 CurrOpc = Opcode;
1905 CurrPred = Pred;
1906 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1907 continue;
1908 }
1909 // Note: No need to match PredReg in the next if.
1910 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1911 // Watch out for:
1912 // r4 := ldr [r0, #8]
1913 // r4 := ldr [r0, #4]
1914 // or
1915 // r0 := ldr [r0]
1916 // If a load overrides the base register or a register loaded by
1917 // another load in our chain, we cannot take this instruction.
1918 bool Overlap = false;
1919 if (isLoadSingle(Opcode)) {
1920 Overlap = (Base == Reg);
1921 if (!Overlap) {
1922 for (const MemOpQueueEntry &E : MemOps) {
1923 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1924 Overlap = true;
1925 break;
1926 }
1927 }
1928 }
1929 }
1930
1931 if (!Overlap) {
1932 // Check offset and sort memory operation into the current chain.
1933 if (Offset > MemOps.back().Offset) {
1934 MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1935 continue;
1936 } else {
1937 MemOpQueue::iterator MI, ME;
1938 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1939 if (Offset < MI->Offset) {
1940 // Found a place to insert.
1941 break;
1942 }
1943 if (Offset == MI->Offset) {
1944 // Collision, abort.
1945 MI = ME;
1946 break;
1947 }
1948 }
1949 if (MI != MemOps.end()) {
1950 MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1951 continue;
1952 }
1953 }
1954 }
1955 }
1956
1957 // Don't advance the iterator; The op will start a new chain next.
1958 MBBI = I;
1959 --Position;
1960 // Fallthrough to look into existing chain.
1961 } else if (MBBI->isDebugInstr()) {
1962 continue;
1963 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1964 MBBI->getOpcode() == ARM::t2STRDi8) {
1965 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1966 // remember them because we may still be able to merge add/sub into them.
1967 MergeBaseCandidates.push_back(&*MBBI);
1968 }
1969
1970 // If we are here then the chain is broken; Extract candidates for a merge.
1971 if (MemOps.size() > 0) {
1972 FormCandidates(MemOps);
1973 // Reset for the next chain.
1974 CurrBase = 0;
1975 CurrOpc = ~0u;
1976 CurrPred = ARMCC::AL;
1977 MemOps.clear();
1978 }
1979 }
1980 if (MemOps.size() > 0)
1981 FormCandidates(MemOps);
1982
1983 // Sort candidates so they get processed from end to begin of the basic
1984 // block later; This is necessary for liveness calculation.
1985 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1986 return M0->InsertPos < M1->InsertPos;
1987 };
1988 llvm::sort(Candidates, LessThan);
1989
1990 // Go through list of candidates and merge.
1991 bool Changed = false;
1992 for (const MergeCandidate *Candidate : Candidates) {
1993 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1994 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1995 // Merge preceding/trailing base inc/dec into the merged op.
1996 if (Merged) {
1997 Changed = true;
1998 unsigned Opcode = Merged->getOpcode();
1999 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
2000 MergeBaseUpdateLSDouble(*Merged);
2001 else
2002 MergeBaseUpdateLSMultiple(Merged);
2003 } else {
2004 for (MachineInstr *MI : Candidate->Instrs) {
2005 if (MergeBaseUpdateLoadStore(MI))
2006 Changed = true;
2007 }
2008 }
2009 } else {
2010 assert(Candidate->Instrs.size() == 1);
2011 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
2012 Changed = true;
2013 }
2014 }
2015 Candidates.clear();
2016 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
2017 for (MachineInstr *MI : MergeBaseCandidates)
2018 MergeBaseUpdateLSDouble(*MI);
2019 MergeBaseCandidates.clear();
2020
2021 return Changed;
2022}
2023
2024/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
2025/// into the preceding stack restore so it directly restore the value of LR
2026/// into pc.
2027/// ldmfd sp!, {..., lr}
2028/// bx lr
2029/// or
2030/// ldmfd sp!, {..., lr}
2031/// mov pc, lr
2032/// =>
2033/// ldmfd sp!, {..., pc}
2034bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
2035 // Thumb1 LDM doesn't allow high registers.
2036 if (isThumb1) return false;
2037 if (MBB.empty()) return false;
2038
2040 if (MBBI != MBB.begin() && MBBI != MBB.end() &&
2041 (MBBI->getOpcode() == ARM::BX_RET ||
2042 MBBI->getOpcode() == ARM::tBX_RET ||
2043 MBBI->getOpcode() == ARM::MOVPCLR)) {
2044 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
2045 // Ignore any debug instructions.
2046 while (PrevI->isDebugInstr() && PrevI != MBB.begin())
2047 --PrevI;
2048 MachineInstr &PrevMI = *PrevI;
2049 unsigned Opcode = PrevMI.getOpcode();
2050 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
2051 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
2052 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
2053 MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
2054 if (MO.getReg() != ARM::LR)
2055 return false;
2056 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
2057 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
2058 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
2059 PrevMI.setDesc(TII->get(NewOpc));
2060 MO.setReg(ARM::PC);
2061 PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
2062 MBB.erase(MBBI);
2063 return true;
2064 }
2065 }
2066 return false;
2067}
2068
2069bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
2071 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
2072 MBBI->getOpcode() != ARM::tBX_RET)
2073 return false;
2074
2076 --Prev;
2077 if (Prev->getOpcode() != ARM::tMOVr ||
2078 !Prev->definesRegister(ARM::LR, /*TRI=*/nullptr))
2079 return false;
2080
2081 for (auto Use : Prev->uses())
2082 if (Use.isKill()) {
2083 assert(STI->hasV4TOps());
2084 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
2085 .addReg(Use.getReg(), RegState::Kill)
2088 MBB.erase(MBBI);
2089 MBB.erase(Prev);
2090 return true;
2091 }
2092
2093 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
2094}
2095
2096bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2097 if (skipFunction(Fn.getFunction()))
2098 return false;
2099
2100 MF = &Fn;
2101 STI = &Fn.getSubtarget<ARMSubtarget>();
2102 TL = STI->getTargetLowering();
2103 AFI = Fn.getInfo<ARMFunctionInfo>();
2104 TII = STI->getInstrInfo();
2105 TRI = STI->getRegisterInfo();
2106
2107 RegClassInfoValid = false;
2108 isThumb2 = AFI->isThumb2Function();
2109 isThumb1 = AFI->isThumbFunction() && !isThumb2;
2110
2111 bool Modified = false, ModifiedLDMReturn = false;
2112 for (MachineBasicBlock &MBB : Fn) {
2113 Modified |= LoadStoreMultipleOpti(MBB);
2114 if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
2115 ModifiedLDMReturn |= MergeReturnIntoLDM(MBB);
2116 if (isThumb1)
2117 Modified |= CombineMovBx(MBB);
2118 }
2119 Modified |= ModifiedLDMReturn;
2120
2121 // If we merged a BX instruction into an LDM, we need to re-calculate whether
2122 // LR is restored. This check needs to consider the whole function, not just
2123 // the instruction(s) we changed, because there may be other BX returns which
2124 // still need LR to be restored.
2125 if (ModifiedLDMReturn)
2127
2128 Allocator.DestroyAll();
2129 return Modified;
2130}
2131
2132#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
2133 "ARM pre- register allocation load / store optimization pass"
2134
2135namespace {
2136
2137 /// Pre- register allocation pass that move load / stores from consecutive
2138 /// locations close to make it more likely they will be combined later.
2139 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
2140 static char ID;
2141
2142 AliasAnalysis *AA;
2143 const DataLayout *TD;
2144 const TargetInstrInfo *TII;
2145 const TargetRegisterInfo *TRI;
2146 const ARMSubtarget *STI;
2149 MachineFunction *MF;
2150
2151 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
2152
2153 bool runOnMachineFunction(MachineFunction &Fn) override;
2154
2155 StringRef getPassName() const override {
2157 }
2158
2159 void getAnalysisUsage(AnalysisUsage &AU) const override {
2164 }
2165
2166 private:
2167 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2168 unsigned &NewOpc, Register &EvenReg, Register &OddReg,
2169 Register &BaseReg, int &Offset, Register &PredReg,
2170 ARMCC::CondCodes &Pred, bool &isT2);
2171 bool RescheduleOps(
2173 unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2175 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2176 bool DistributeIncrements();
2177 bool DistributeIncrements(Register Base);
2178 };
2179
2180} // end anonymous namespace
2181
2182char ARMPreAllocLoadStoreOpt::ID = 0;
2183
2184INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2187INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
2189
2190// Limit the number of instructions to be rescheduled.
2191// FIXME: tune this limit, and/or come up with some better heuristics.
2192static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
2193 cl::init(8), cl::Hidden);
2194
2195bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2196 if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
2197 return false;
2198
2199 TD = &Fn.getDataLayout();
2200 STI = &Fn.getSubtarget<ARMSubtarget>();
2201 TII = STI->getInstrInfo();
2202 TRI = STI->getRegisterInfo();
2203 MRI = &Fn.getRegInfo();
2204 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2205 MF = &Fn;
2206 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2207
2208 bool Modified = DistributeIncrements();
2209 for (MachineBasicBlock &MFI : Fn)
2210 Modified |= RescheduleLoadStoreInstrs(&MFI);
2211
2212 return Modified;
2213}
2214
2215static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2219 SmallSet<unsigned, 4> &MemRegs,
2220 const TargetRegisterInfo *TRI,
2221 AliasAnalysis *AA) {
2222 // Are there stores / loads / calls between them?
2223 SmallSet<unsigned, 4> AddedRegPressure;
2224 while (++I != E) {
2225 if (I->isDebugInstr() || MemOps.count(&*I))
2226 continue;
2227 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2228 return false;
2229 if (I->mayStore() || (!isLd && I->mayLoad()))
2230 for (MachineInstr *MemOp : MemOps)
2231 if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2232 return false;
2233 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2234 MachineOperand &MO = I->getOperand(j);
2235 if (!MO.isReg())
2236 continue;
2237 Register Reg = MO.getReg();
2238 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2239 return false;
2240 if (Reg != Base && !MemRegs.count(Reg))
2241 AddedRegPressure.insert(Reg);
2242 }
2243 }
2244
2245 // Estimate register pressure increase due to the transformation.
2246 if (MemRegs.size() <= 4)
2247 // Ok if we are moving small number of instructions.
2248 return true;
2249 return AddedRegPressure.size() <= MemRegs.size() * 2;
2250}
2251
2252bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
2253 MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
2254 Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
2255 Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
2256 // Make sure we're allowed to generate LDRD/STRD.
2257 if (!STI->hasV5TEOps())
2258 return false;
2259
2260 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2261 unsigned Scale = 1;
2262 unsigned Opcode = Op0->getOpcode();
2263 if (Opcode == ARM::LDRi12) {
2264 NewOpc = ARM::LDRD;
2265 } else if (Opcode == ARM::STRi12) {
2266 NewOpc = ARM::STRD;
2267 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2268 NewOpc = ARM::t2LDRDi8;
2269 Scale = 4;
2270 isT2 = true;
2271 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2272 NewOpc = ARM::t2STRDi8;
2273 Scale = 4;
2274 isT2 = true;
2275 } else {
2276 return false;
2277 }
2278
2279 // Make sure the base address satisfies i64 ld / st alignment requirement.
2280 // At the moment, we ignore the memoryoperand's value.
2281 // If we want to use AliasAnalysis, we should check it accordingly.
2282 if (!Op0->hasOneMemOperand() ||
2283 (*Op0->memoperands_begin())->isVolatile() ||
2284 (*Op0->memoperands_begin())->isAtomic())
2285 return false;
2286
2287 Align Alignment = (*Op0->memoperands_begin())->getAlign();
2288 Align ReqAlign = STI->getDualLoadStoreAlignment();
2289 if (Alignment < ReqAlign)
2290 return false;
2291
2292 // Then make sure the immediate offset fits.
2293 int OffImm = getMemoryOpOffset(*Op0);
2294 if (isT2) {
2295 int Limit = (1 << 8) * Scale;
2296 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2297 return false;
2298 Offset = OffImm;
2299 } else {
2301 if (OffImm < 0) {
2303 OffImm = - OffImm;
2304 }
2305 int Limit = (1 << 8) * Scale;
2306 if (OffImm >= Limit || (OffImm & (Scale-1)))
2307 return false;
2308 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2309 }
2310 FirstReg = Op0->getOperand(0).getReg();
2311 SecondReg = Op1->getOperand(0).getReg();
2312 if (FirstReg == SecondReg)
2313 return false;
2314 BaseReg = Op0->getOperand(1).getReg();
2315 Pred = getInstrPredicate(*Op0, PredReg);
2316 dl = Op0->getDebugLoc();
2317 return true;
2318}
2319
2320bool ARMPreAllocLoadStoreOpt::RescheduleOps(
2322 bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
2324 bool RetVal = false;
2325
2326 // Sort by offset (in reverse order).
2327 llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
2328 int LOffset = getMemoryOpOffset(*LHS);
2329 int ROffset = getMemoryOpOffset(*RHS);
2330 assert(LHS == RHS || LOffset != ROffset);
2331 return LOffset > ROffset;
2332 });
2333
2334 // The loads / stores of the same base are in order. Scan them from first to
2335 // last and check for the following:
2336 // 1. Any def of base.
2337 // 2. Any gaps.
2338 while (Ops.size() > 1) {
2339 unsigned FirstLoc = ~0U;
2340 unsigned LastLoc = 0;
2341 MachineInstr *FirstOp = nullptr;
2342 MachineInstr *LastOp = nullptr;
2343 int LastOffset = 0;
2344 unsigned LastOpcode = 0;
2345 unsigned LastBytes = 0;
2346 unsigned NumMove = 0;
2347 for (MachineInstr *Op : llvm::reverse(Ops)) {
2348 // Make sure each operation has the same kind.
2349 unsigned LSMOpcode
2350 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2351 if (LastOpcode && LSMOpcode != LastOpcode)
2352 break;
2353
2354 // Check that we have a continuous set of offsets.
2355 int Offset = getMemoryOpOffset(*Op);
2356 unsigned Bytes = getLSMultipleTransferSize(Op);
2357 if (LastBytes) {
2358 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2359 break;
2360 }
2361
2362 // Don't try to reschedule too many instructions.
2363 if (NumMove == InstReorderLimit)
2364 break;
2365
2366 // Found a mergable instruction; save information about it.
2367 ++NumMove;
2368 LastOffset = Offset;
2369 LastBytes = Bytes;
2370 LastOpcode = LSMOpcode;
2371
2372 unsigned Loc = MI2LocMap[Op];
2373 if (Loc <= FirstLoc) {
2374 FirstLoc = Loc;
2375 FirstOp = Op;
2376 }
2377 if (Loc >= LastLoc) {
2378 LastLoc = Loc;
2379 LastOp = Op;
2380 }
2381 }
2382
2383 if (NumMove <= 1)
2384 Ops.pop_back();
2385 else {
2387 SmallSet<unsigned, 4> MemRegs;
2388 for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2389 MemOps.insert(Ops[i]);
2390 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2391 }
2392
2393 // Be conservative, if the instructions are too far apart, don't
2394 // move them. We want to limit the increase of register pressure.
2395 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2396 if (DoMove)
2397 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2398 MemOps, MemRegs, TRI, AA);
2399 if (!DoMove) {
2400 for (unsigned i = 0; i != NumMove; ++i)
2401 Ops.pop_back();
2402 } else {
2403 // This is the new location for the loads / stores.
2404 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2405 while (InsertPos != MBB->end() &&
2406 (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2407 ++InsertPos;
2408
2409 // If we are moving a pair of loads / stores, see if it makes sense
2410 // to try to allocate a pair of registers that can form register pairs.
2411 MachineInstr *Op0 = Ops.back();
2412 MachineInstr *Op1 = Ops[Ops.size()-2];
2413 Register FirstReg, SecondReg;
2414 Register BaseReg, PredReg;
2416 bool isT2 = false;
2417 unsigned NewOpc = 0;
2418 int Offset = 0;
2419 DebugLoc dl;
2420 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2421 FirstReg, SecondReg, BaseReg,
2422 Offset, PredReg, Pred, isT2)) {
2423 Ops.pop_back();
2424 Ops.pop_back();
2425
2426 const MCInstrDesc &MCID = TII->get(NewOpc);
2427 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2428 MRI->constrainRegClass(FirstReg, TRC);
2429 MRI->constrainRegClass(SecondReg, TRC);
2430
2431 // Form the pair instruction.
2432 if (isLd) {
2433 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2434 .addReg(FirstReg, RegState::Define)
2435 .addReg(SecondReg, RegState::Define)
2436 .addReg(BaseReg);
2437 // FIXME: We're converting from LDRi12 to an insn that still
2438 // uses addrmode2, so we need an explicit offset reg. It should
2439 // always by reg0 since we're transforming LDRi12s.
2440 if (!isT2)
2441 MIB.addReg(0);
2442 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2443 MIB.cloneMergedMemRefs({Op0, Op1});
2444 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2445 ++NumLDRDFormed;
2446 } else {
2447 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2448 .addReg(FirstReg)
2449 .addReg(SecondReg)
2450 .addReg(BaseReg);
2451 // FIXME: We're converting from LDRi12 to an insn that still
2452 // uses addrmode2, so we need an explicit offset reg. It should
2453 // always by reg0 since we're transforming STRi12s.
2454 if (!isT2)
2455 MIB.addReg(0);
2456 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2457 MIB.cloneMergedMemRefs({Op0, Op1});
2458 LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2459 ++NumSTRDFormed;
2460 }
2461 MBB->erase(Op0);
2462 MBB->erase(Op1);
2463
2464 if (!isT2) {
2465 // Add register allocation hints to form register pairs.
2466 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2467 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2468 }
2469 } else {
2470 for (unsigned i = 0; i != NumMove; ++i) {
2471 MachineInstr *Op = Ops.pop_back_val();
2472 if (isLd) {
2473 // Populate RegisterMap with all Registers defined by loads.
2474 Register Reg = Op->getOperand(0).getReg();
2475 RegisterMap[Reg];
2476 }
2477
2478 MBB->splice(InsertPos, MBB, Op);
2479 }
2480 }
2481
2482 NumLdStMoved += NumMove;
2483 RetVal = true;
2484 }
2485 }
2486 }
2487
2488 return RetVal;
2489}
2490
2492 std::function<void(MachineOperand &)> Fn) {
2493 if (MI->isNonListDebugValue()) {
2494 auto &Op = MI->getOperand(0);
2495 if (Op.isReg())
2496 Fn(Op);
2497 } else {
2498 for (unsigned I = 2; I < MI->getNumOperands(); I++) {
2499 auto &Op = MI->getOperand(I);
2500 if (Op.isReg())
2501 Fn(Op);
2502 }
2503 }
2504}
2505
2506// Update the RegisterMap with the instruction that was moved because a
2507// DBG_VALUE_LIST may need to be moved again.
2510 MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
2511
2512 forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
2513 auto RegIt = RegisterMap.find(Op.getReg());
2514 if (RegIt == RegisterMap.end())
2515 return;
2516 auto &InstrVec = RegIt->getSecond();
2517 llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
2518 });
2519}
2520
2522 auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
2523 MI->getDebugLoc()->getInlinedAt());
2524 return DbgVar;
2525}
2526
2527bool
2528ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2529 bool RetVal = false;
2530
2533 using BaseVec = SmallVector<unsigned, 4>;
2534 Base2InstMap Base2LdsMap;
2535 Base2InstMap Base2StsMap;
2536 BaseVec LdBases;
2537 BaseVec StBases;
2538 // This map is used to track the relationship between the virtual
2539 // register that is the result of a load that is moved and the DBG_VALUE
2540 // MachineInstr pointer that uses that virtual register.
2542
2543 unsigned Loc = 0;
2546 while (MBBI != E) {
2547 for (; MBBI != E; ++MBBI) {
2548 MachineInstr &MI = *MBBI;
2549 if (MI.isCall() || MI.isTerminator()) {
2550 // Stop at barriers.
2551 ++MBBI;
2552 break;
2553 }
2554
2555 if (!MI.isDebugInstr())
2556 MI2LocMap[&MI] = ++Loc;
2557
2558 if (!isMemoryOp(MI))
2559 continue;
2560 Register PredReg;
2561 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2562 continue;
2563
2564 int Opc = MI.getOpcode();
2565 bool isLd = isLoadSingle(Opc);
2566 Register Base = MI.getOperand(1).getReg();
2568 bool StopHere = false;
2569 auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) {
2570 auto [BI, Inserted] = Base2Ops.try_emplace(Base);
2571 if (Inserted) {
2572 BI->second.push_back(&MI);
2573 Bases.push_back(Base);
2574 return;
2575 }
2576 for (const MachineInstr *MI : BI->second) {
2577 if (Offset == getMemoryOpOffset(*MI)) {
2578 StopHere = true;
2579 break;
2580 }
2581 }
2582 if (!StopHere)
2583 BI->second.push_back(&MI);
2584 };
2585
2586 if (isLd)
2587 FindBases(Base2LdsMap, LdBases);
2588 else
2589 FindBases(Base2StsMap, StBases);
2590
2591 if (StopHere) {
2592 // Found a duplicate (a base+offset combination that's seen earlier).
2593 // Backtrack.
2594 --Loc;
2595 break;
2596 }
2597 }
2598
2599 // Re-schedule loads.
2600 for (unsigned Base : LdBases) {
2601 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2602 if (Lds.size() > 1)
2603 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
2604 }
2605
2606 // Re-schedule stores.
2607 for (unsigned Base : StBases) {
2608 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2609 if (Sts.size() > 1)
2610 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
2611 }
2612
2613 if (MBBI != E) {
2614 Base2LdsMap.clear();
2615 Base2StsMap.clear();
2616 LdBases.clear();
2617 StBases.clear();
2618 }
2619 }
2620
2621 // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
2622 // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
2623 // use-before-def, resulting in a loss of debug info.
2624
2625 // Example:
2626 // Before the Pre Register Allocation Load Store Pass
2627 // inst_a
2628 // %2 = ld ...
2629 // inst_b
2630 // DBG_VALUE %2, "x", ...
2631 // %3 = ld ...
2632
2633 // After the Pass:
2634 // inst_a
2635 // inst_b
2636 // DBG_VALUE %2, "x", ...
2637 // %2 = ld ...
2638 // %3 = ld ...
2639
2640 // The code below addresses this by moving the DBG_VALUE to the position
2641 // immediately after the load.
2642
2643 // Example:
2644 // After the code below:
2645 // inst_a
2646 // inst_b
2647 // %2 = ld ...
2648 // DBG_VALUE %2, "x", ...
2649 // %3 = ld ...
2650
2651 // The algorithm works in two phases: First RescheduleOps() populates the
2652 // RegisterMap with registers that were moved as keys, there is no value
2653 // inserted. In the next phase, every MachineInstr in a basic block is
2654 // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
2655 // or more registers in the RegisterMap, the RegisterMap and InstrMap are
2656 // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
2657 // describes debug information for a variable that already exists in the
2658 // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
2659 // be set to undef. If the current MachineInstr is a load that was moved,
2660 // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
2661 // the load.
2662
2663 // To illustrate the above algorithm visually let's take this example.
2664
2665 // Before the Pre Register Allocation Load Store Pass:
2666 // %2 = ld ...
2667 // DBG_VALUE %2, A, .... # X
2668 // DBG_VALUE 0, A, ... # Y
2669 // %3 = ld ...
2670 // DBG_VALUE %3, A, ..., # Z
2671 // %4 = ld ...
2672
2673 // After Pre Register Allocation Load Store Pass:
2674 // DBG_VALUE %2, A, .... # X
2675 // DBG_VALUE 0, A, ... # Y
2676 // DBG_VALUE %3, A, ..., # Z
2677 // %2 = ld ...
2678 // %3 = ld ...
2679 // %4 = ld ...
2680
2681 // The algorithm below does the following:
2682
2683 // In the beginning, the RegisterMap will have been populated with the virtual
2684 // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
2685 // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
2686 // InstrMap {}
2687 // -> DBG_VALUE %2, A, .... # X
2688 // DBG_VALUE 0, A, ... # Y
2689 // DBG_VALUE %3, A, ..., # Z
2690 // %2 = ld ...
2691 // %3 = ld ...
2692 // %4 = ld ...
2693
2694 // After the first DBG_VALUE (denoted with an X) is processed, the
2695 // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
2696 // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
2697 // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
2698 // DBG_VALUE %2, A, .... # X
2699 // -> DBG_VALUE 0, A, ... # Y
2700 // DBG_VALUE %3, A, ..., # Z
2701 // %2 = ld ...
2702 // %3 = ld ...
2703 // %4 = ld ...
2704
2705 // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
2706 // to now hold Y for A and the RegisterMap is also updated to remove X from
2707 // %2, this is because both X and Y describe the same debug variable A. X is
2708 // also updated to have a $noreg as the first operand.
2709 // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
2710 // InstrMap = {X-> 2}
2711 // DBG_VALUE $noreg, A, .... # X
2712 // DBG_VALUE 0, A, ... # Y
2713 // -> DBG_VALUE %3, A, ..., # Z
2714 // %2 = ld ...
2715 // %3 = ld ...
2716 // %4 = ld ...
2717
2718 // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
2719 // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
2720 // is updated to have Z mapped to %3. This is again because Z describes the
2721 // debug variable A, Y is not updated to have $noreg as first operand because
2722 // its first operand is an immediate, not a register.
2723 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2724 // InstrMap = {X -> 2, Z -> 3}
2725 // DBG_VALUE $noreg, A, .... # X
2726 // DBG_VALUE 0, A, ... # Y
2727 // DBG_VALUE %3, A, ..., # Z
2728 // -> %2 = ld ...
2729 // %3 = ld ...
2730 // %4 = ld ...
2731
2732 // Nothing happens here since the RegisterMap for %2 contains no value.
2733 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2734 // InstrMap = {X -> 2, Z -> 3}
2735 // DBG_VALUE $noreg, A, .... # X
2736 // DBG_VALUE 0, A, ... # Y
2737 // DBG_VALUE %3, A, ..., # Z
2738 // %2 = ld ...
2739 // -> %3 = ld ...
2740 // %4 = ld ...
2741
2742 // Since the RegisterMap contains Z as a value for %3, the MachineInstr
2743 // pointer Z is copied to come after the load for %3 and the old Z's first
2744 // operand is changed to $noreg the Basic Block iterator is moved to after the
2745 // DBG_VALUE Z's new position.
2746 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2747 // InstrMap = {X -> 2, Z -> 3}
2748 // DBG_VALUE $noreg, A, .... # X
2749 // DBG_VALUE 0, A, ... # Y
2750 // DBG_VALUE $noreg, A, ..., # Old Z
2751 // %2 = ld ...
2752 // %3 = ld ...
2753 // DBG_VALUE %3, A, ..., # Z
2754 // -> %4 = ld ...
2755
2756 // Nothing happens for %4 and the algorithm exits having processed the entire
2757 // Basic Block.
2758 // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
2759 // InstrMap = {X -> 2, Z -> 3}
2760 // DBG_VALUE $noreg, A, .... # X
2761 // DBG_VALUE 0, A, ... # Y
2762 // DBG_VALUE $noreg, A, ..., # Old Z
2763 // %2 = ld ...
2764 // %3 = ld ...
2765 // DBG_VALUE %3, A, ..., # Z
2766 // %4 = ld ...
2767
2768 // This map is used to track the relationship between
2769 // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
2770 // debug information for that Debug Variable.
2772 // This map is used to track the relationship between a DBG_VALUE or
2773 // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
2775 for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
2776 MachineInstr &MI = *MBBI;
2777
2778 auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
2779 auto RegIt = RegisterMap.find(Reg);
2780 if (RegIt == RegisterMap.end())
2781 return;
2782 auto &InstrVec = RegIt->getSecond();
2783 InstrVec.push_back(&MI);
2784 InstrMap[&MI].push_back(Reg);
2785 };
2786
2787 if (MI.isDebugValue()) {
2788 assert(MI.getDebugVariable() &&
2789 "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
2790
2792 // If the first operand is a register and it exists in the RegisterMap, we
2793 // know this is a DBG_VALUE that uses the result of a load that was moved,
2794 // and is therefore a candidate to also be moved, add it to the
2795 // RegisterMap and InstrMap.
2797 PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
2798 });
2799
2800 // If the current DBG_VALUE describes the same variable as one of the
2801 // in-flight DBG_VALUEs, remove the candidate from the list and set it to
2802 // undef. Moving one DBG_VALUE past another would result in the variable's
2803 // value going back in time when stepping through the block in the
2804 // debugger.
2805 auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
2806 if (InstrIt != DbgValueSinkCandidates.end()) {
2807 auto *Instr = InstrIt->getSecond();
2808 auto RegIt = InstrMap.find(Instr);
2809 if (RegIt != InstrMap.end()) {
2810 const auto &RegVec = RegIt->getSecond();
2811 // For every Register in the RegVec, remove the MachineInstr in the
2812 // RegisterMap that describes the DbgVar.
2813 for (auto &Reg : RegVec) {
2814 auto RegIt = RegisterMap.find(Reg);
2815 if (RegIt == RegisterMap.end())
2816 continue;
2817 auto &InstrVec = RegIt->getSecond();
2818 auto IsDbgVar = [&](MachineInstr *I) -> bool {
2820 return Var == DbgVar;
2821 };
2822
2823 llvm::erase_if(InstrVec, IsDbgVar);
2824 }
2826 [&](MachineOperand &Op) { Op.setReg(0); });
2827 }
2828 }
2829 DbgValueSinkCandidates[DbgVar] = &MI;
2830 } else {
2831 // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
2832 // then move that DBG_VALUE to below the load.
2833 auto Opc = MI.getOpcode();
2834 if (!isLoadSingle(Opc))
2835 continue;
2836 auto Reg = MI.getOperand(0).getReg();
2837 auto RegIt = RegisterMap.find(Reg);
2838 if (RegIt == RegisterMap.end())
2839 continue;
2840 auto &DbgInstrVec = RegIt->getSecond();
2841 if (!DbgInstrVec.size())
2842 continue;
2843 for (auto *DbgInstr : DbgInstrVec) {
2844 MachineBasicBlock::iterator InsertPos = std::next(MBBI);
2845 auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
2846 MBB->insert(InsertPos, ClonedMI);
2847 MBBI++;
2848 // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
2849 // that was moved.
2850 auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
2851 // Erase DbgVar from DbgValueSinkCandidates if still present. If the
2852 // instruction is a DBG_VALUE_LIST, it may have already been erased from
2853 // DbgValueSinkCandidates.
2854 DbgValueSinkCandidates.erase(DbgVar);
2855 // Zero out original dbg instr
2856 forEachDbgRegOperand(DbgInstr,
2857 [&](MachineOperand &Op) { Op.setReg(0); });
2858 // Update RegisterMap with ClonedMI because it might have to be moved
2859 // again.
2860 if (DbgInstr->isDebugValueList())
2861 updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
2862 DbgInstr);
2863 }
2864 }
2865 }
2866 return RetVal;
2867}
2868
2869// Get the Base register operand index from the memory access MachineInst if we
2870// should attempt to distribute postinc on it. Return -1 if not of a valid
2871// instruction type. If it returns an index, it is assumed that instruction is a
2872// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2874 switch (MI.getOpcode()) {
2875 case ARM::MVE_VLDRBS16:
2876 case ARM::MVE_VLDRBS32:
2877 case ARM::MVE_VLDRBU16:
2878 case ARM::MVE_VLDRBU32:
2879 case ARM::MVE_VLDRHS32:
2880 case ARM::MVE_VLDRHU32:
2881 case ARM::MVE_VLDRBU8:
2882 case ARM::MVE_VLDRHU16:
2883 case ARM::MVE_VLDRWU32:
2884 case ARM::MVE_VSTRB16:
2885 case ARM::MVE_VSTRB32:
2886 case ARM::MVE_VSTRH32:
2887 case ARM::MVE_VSTRBU8:
2888 case ARM::MVE_VSTRHU16:
2889 case ARM::MVE_VSTRWU32:
2890 case ARM::t2LDRHi8:
2891 case ARM::t2LDRHi12:
2892 case ARM::t2LDRSHi8:
2893 case ARM::t2LDRSHi12:
2894 case ARM::t2LDRBi8:
2895 case ARM::t2LDRBi12:
2896 case ARM::t2LDRSBi8:
2897 case ARM::t2LDRSBi12:
2898 case ARM::t2STRBi8:
2899 case ARM::t2STRBi12:
2900 case ARM::t2STRHi8:
2901 case ARM::t2STRHi12:
2902 return 1;
2903 case ARM::MVE_VLDRBS16_post:
2904 case ARM::MVE_VLDRBS32_post:
2905 case ARM::MVE_VLDRBU16_post:
2906 case ARM::MVE_VLDRBU32_post:
2907 case ARM::MVE_VLDRHS32_post:
2908 case ARM::MVE_VLDRHU32_post:
2909 case ARM::MVE_VLDRBU8_post:
2910 case ARM::MVE_VLDRHU16_post:
2911 case ARM::MVE_VLDRWU32_post:
2912 case ARM::MVE_VSTRB16_post:
2913 case ARM::MVE_VSTRB32_post:
2914 case ARM::MVE_VSTRH32_post:
2915 case ARM::MVE_VSTRBU8_post:
2916 case ARM::MVE_VSTRHU16_post:
2917 case ARM::MVE_VSTRWU32_post:
2918 case ARM::MVE_VLDRBS16_pre:
2919 case ARM::MVE_VLDRBS32_pre:
2920 case ARM::MVE_VLDRBU16_pre:
2921 case ARM::MVE_VLDRBU32_pre:
2922 case ARM::MVE_VLDRHS32_pre:
2923 case ARM::MVE_VLDRHU32_pre:
2924 case ARM::MVE_VLDRBU8_pre:
2925 case ARM::MVE_VLDRHU16_pre:
2926 case ARM::MVE_VLDRWU32_pre:
2927 case ARM::MVE_VSTRB16_pre:
2928 case ARM::MVE_VSTRB32_pre:
2929 case ARM::MVE_VSTRH32_pre:
2930 case ARM::MVE_VSTRBU8_pre:
2931 case ARM::MVE_VSTRHU16_pre:
2932 case ARM::MVE_VSTRWU32_pre:
2933 return 2;
2934 }
2935 return -1;
2936}
2937
2939 switch (MI.getOpcode()) {
2940 case ARM::MVE_VLDRBS16_post:
2941 case ARM::MVE_VLDRBS32_post:
2942 case ARM::MVE_VLDRBU16_post:
2943 case ARM::MVE_VLDRBU32_post:
2944 case ARM::MVE_VLDRHS32_post:
2945 case ARM::MVE_VLDRHU32_post:
2946 case ARM::MVE_VLDRBU8_post:
2947 case ARM::MVE_VLDRHU16_post:
2948 case ARM::MVE_VLDRWU32_post:
2949 case ARM::MVE_VSTRB16_post:
2950 case ARM::MVE_VSTRB32_post:
2951 case ARM::MVE_VSTRH32_post:
2952 case ARM::MVE_VSTRBU8_post:
2953 case ARM::MVE_VSTRHU16_post:
2954 case ARM::MVE_VSTRWU32_post:
2955 return true;
2956 }
2957 return false;
2958}
2959
2961 switch (MI.getOpcode()) {
2962 case ARM::MVE_VLDRBS16_pre:
2963 case ARM::MVE_VLDRBS32_pre:
2964 case ARM::MVE_VLDRBU16_pre:
2965 case ARM::MVE_VLDRBU32_pre:
2966 case ARM::MVE_VLDRHS32_pre:
2967 case ARM::MVE_VLDRHU32_pre:
2968 case ARM::MVE_VLDRBU8_pre:
2969 case ARM::MVE_VLDRHU16_pre:
2970 case ARM::MVE_VLDRWU32_pre:
2971 case ARM::MVE_VSTRB16_pre:
2972 case ARM::MVE_VSTRB32_pre:
2973 case ARM::MVE_VSTRH32_pre:
2974 case ARM::MVE_VSTRBU8_pre:
2975 case ARM::MVE_VSTRHU16_pre:
2976 case ARM::MVE_VSTRWU32_pre:
2977 return true;
2978 }
2979 return false;
2980}
2981
2982// Given a memory access Opcode, check that the give Imm would be a valid Offset
2983// for this instruction (same as isLegalAddressImm), Or if the instruction
2984// could be easily converted to one where that was valid. For example converting
2985// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
2986// AdjustBaseAndOffset below.
2987static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
2988 const TargetInstrInfo *TII,
2989 int &CodesizeEstimate) {
2990 if (isLegalAddressImm(Opcode, Imm, TII))
2991 return true;
2992
2993 // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
2994 const MCInstrDesc &Desc = TII->get(Opcode);
2995 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2996 switch (AddrMode) {
2998 CodesizeEstimate += 1;
2999 return Imm < 0 && -Imm < ((1 << 8) * 1);
3000 }
3001 return false;
3002}
3003
3004// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
3005// by -Offset. This can either happen in-place or be a replacement as MI is
3006// converted to another instruction type.
3008 int Offset, const TargetInstrInfo *TII,
3009 const TargetRegisterInfo *TRI) {
3010 // Set the Base reg
3011 unsigned BaseOp = getBaseOperandIndex(*MI);
3012 MI->getOperand(BaseOp).setReg(NewBaseReg);
3013 // and constrain the reg class to that required by the instruction.
3014 MachineFunction *MF = MI->getMF();
3016 const MCInstrDesc &MCID = TII->get(MI->getOpcode());
3017 const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI, *MF);
3018 MRI.constrainRegClass(NewBaseReg, TRC);
3019
3020 int OldOffset = MI->getOperand(BaseOp + 1).getImm();
3021 if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
3022 MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
3023 else {
3024 unsigned ConvOpcode;
3025 switch (MI->getOpcode()) {
3026 case ARM::t2LDRHi12:
3027 ConvOpcode = ARM::t2LDRHi8;
3028 break;
3029 case ARM::t2LDRSHi12:
3030 ConvOpcode = ARM::t2LDRSHi8;
3031 break;
3032 case ARM::t2LDRBi12:
3033 ConvOpcode = ARM::t2LDRBi8;
3034 break;
3035 case ARM::t2LDRSBi12:
3036 ConvOpcode = ARM::t2LDRSBi8;
3037 break;
3038 case ARM::t2STRHi12:
3039 ConvOpcode = ARM::t2STRHi8;
3040 break;
3041 case ARM::t2STRBi12:
3042 ConvOpcode = ARM::t2STRBi8;
3043 break;
3044 default:
3045 llvm_unreachable("Unhandled convertable opcode");
3046 }
3047 assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
3048 "Illegal Address Immediate after convert!");
3049
3050 const MCInstrDesc &MCID = TII->get(ConvOpcode);
3051 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3052 .add(MI->getOperand(0))
3053 .add(MI->getOperand(1))
3054 .addImm(OldOffset - Offset)
3055 .add(MI->getOperand(3))
3056 .add(MI->getOperand(4))
3057 .cloneMemRefs(*MI);
3058 MI->eraseFromParent();
3059 }
3060}
3061
3063 Register NewReg,
3064 const TargetInstrInfo *TII,
3065 const TargetRegisterInfo *TRI) {
3066 MachineFunction *MF = MI->getMF();
3068
3069 unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
3070 MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
3071
3072 const MCInstrDesc &MCID = TII->get(NewOpcode);
3073 // Constrain the def register class
3074 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
3075 MRI.constrainRegClass(NewReg, TRC);
3076 // And do the same for the base operand
3077 TRC = TII->getRegClass(MCID, 2, TRI, *MF);
3078 MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
3079
3080 unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
3081 switch (AddrMode) {
3085 // Any MVE load/store
3086 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3087 .addReg(NewReg, RegState::Define)
3088 .add(MI->getOperand(0))
3089 .add(MI->getOperand(1))
3090 .addImm(Offset)
3091 .add(MI->getOperand(3))
3092 .add(MI->getOperand(4))
3093 .add(MI->getOperand(5))
3094 .cloneMemRefs(*MI);
3096 if (MI->mayLoad()) {
3097 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3098 .add(MI->getOperand(0))
3099 .addReg(NewReg, RegState::Define)
3100 .add(MI->getOperand(1))
3101 .addImm(Offset)
3102 .add(MI->getOperand(3))
3103 .add(MI->getOperand(4))
3104 .cloneMemRefs(*MI);
3105 } else {
3106 return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
3107 .addReg(NewReg, RegState::Define)
3108 .add(MI->getOperand(0))
3109 .add(MI->getOperand(1))
3110 .addImm(Offset)
3111 .add(MI->getOperand(3))
3112 .add(MI->getOperand(4))
3113 .cloneMemRefs(*MI);
3114 }
3115 default:
3116 llvm_unreachable("Unhandled createPostIncLoadStore");
3117 }
3118}
3119
3120// Given a Base Register, optimise the load/store uses to attempt to create more
3121// post-inc accesses and less register moves. We do this by taking zero offset
3122// loads/stores with an add, and convert them to a postinc load/store of the
3123// same type. Any subsequent accesses will be adjusted to use and account for
3124// the post-inc value.
3125// For example:
3126// LDR #0 LDR_POSTINC #16
3127// LDR #4 LDR #-12
3128// LDR #8 LDR #-8
3129// LDR #12 LDR #-4
3130// ADD #16
3131//
3132// At the same time if we do not find an increment but do find an existing
3133// pre/post inc instruction, we can still adjust the offsets of subsequent
3134// instructions to save the register move that would otherwise be needed for the
3135// in-place increment.
3136bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
3137 // We are looking for:
3138 // One zero offset load/store that can become postinc
3139 MachineInstr *BaseAccess = nullptr;
3140 MachineInstr *PrePostInc = nullptr;
3141 // An increment that can be folded in
3142 MachineInstr *Increment = nullptr;
3143 // Other accesses after BaseAccess that will need to be updated to use the
3144 // postinc value.
3145 SmallPtrSet<MachineInstr *, 8> OtherAccesses;
3146 for (auto &Use : MRI->use_nodbg_instructions(Base)) {
3147 if (!Increment && getAddSubImmediate(Use) != 0) {
3148 Increment = &Use;
3149 continue;
3150 }
3151
3152 int BaseOp = getBaseOperandIndex(Use);
3153 if (BaseOp == -1)
3154 return false;
3155
3156 if (!Use.getOperand(BaseOp).isReg() ||
3157 Use.getOperand(BaseOp).getReg() != Base)
3158 return false;
3159 if (isPreIndex(Use) || isPostIndex(Use))
3160 PrePostInc = &Use;
3161 else if (Use.getOperand(BaseOp + 1).getImm() == 0)
3162 BaseAccess = &Use;
3163 else
3164 OtherAccesses.insert(&Use);
3165 }
3166
3167 int IncrementOffset;
3168 Register NewBaseReg;
3169 if (BaseAccess && Increment) {
3170 if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
3171 return false;
3172 Register PredReg;
3173 if (Increment->definesRegister(ARM::CPSR, /*TRI=*/nullptr) ||
3174 getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
3175 return false;
3176
3177 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
3178 << Base.virtRegIndex() << "\n");
3179
3180 // Make sure that Increment has no uses before BaseAccess that are not PHI
3181 // uses.
3182 for (MachineInstr &Use :
3183 MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
3184 if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
3185 !DT->dominates(BaseAccess, &Use))) {
3186 LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
3187 return false;
3188 }
3189 }
3190
3191 // Make sure that Increment can be folded into Base
3192 IncrementOffset = getAddSubImmediate(*Increment);
3193 unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
3194 BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
3195 if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
3196 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
3197 return false;
3198 }
3199 }
3200 else if (PrePostInc) {
3201 // If we already have a pre/post index load/store then set BaseAccess,
3202 // IncrementOffset and NewBaseReg to the values it already produces,
3203 // allowing us to update and subsequent uses of BaseOp reg with the
3204 // incremented value.
3205 if (Increment)
3206 return false;
3207
3208 LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
3209 << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
3210 int BaseOp = getBaseOperandIndex(*PrePostInc);
3211 IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
3212 BaseAccess = PrePostInc;
3213 NewBaseReg = PrePostInc->getOperand(0).getReg();
3214 }
3215 else
3216 return false;
3217
3218 // And make sure that the negative value of increment can be added to all
3219 // other offsets after the BaseAccess. We rely on either
3220 // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
3221 // to keep things simple.
3222 // This also adds a simple codesize metric, to detect if an instruction (like
3223 // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
3224 // cannot because it is converted to something else (t2LDRBi8). We start this
3225 // at -1 for the gain from removing the increment.
3226 SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
3227 int CodesizeEstimate = -1;
3228 for (auto *Use : OtherAccesses) {
3229 if (DT->dominates(BaseAccess, Use)) {
3230 SuccessorAccesses.insert(Use);
3231 unsigned BaseOp = getBaseOperandIndex(*Use);
3232 if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
3233 Use->getOperand(BaseOp + 1).getImm() -
3234 IncrementOffset,
3235 TII, CodesizeEstimate)) {
3236 LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
3237 return false;
3238 }
3239 } else if (!DT->dominates(Use, BaseAccess)) {
3240 LLVM_DEBUG(
3241 dbgs() << " Unknown dominance relation between Base and Use\n");
3242 return false;
3243 }
3244 }
3245 if (STI->hasMinSize() && CodesizeEstimate > 0) {
3246 LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
3247 return false;
3248 }
3249
3250 if (!PrePostInc) {
3251 // Replace BaseAccess with a post inc
3252 LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
3253 LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
3254 NewBaseReg = Increment->getOperand(0).getReg();
3255 MachineInstr *BaseAccessPost =
3256 createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
3257 BaseAccess->eraseFromParent();
3258 Increment->eraseFromParent();
3259 (void)BaseAccessPost;
3260 LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
3261 }
3262
3263 for (auto *Use : SuccessorAccesses) {
3264 LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
3265 AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
3266 LLVM_DEBUG(dbgs() << " To : "; Use->dump());
3267 }
3268
3269 // Remove the kill flag from all uses of NewBaseReg, in case any old uses
3270 // remain.
3271 for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
3272 Op.setIsKill(false);
3273 return true;
3274}
3275
3276bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
3277 bool Changed = false;
3279 for (auto &MBB : *MF) {
3280 for (auto &MI : MBB) {
3281 int BaseOp = getBaseOperandIndex(MI);
3282 if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
3283 continue;
3284
3285 Register Base = MI.getOperand(BaseOp).getReg();
3286 if (!Base.isVirtual())
3287 continue;
3288
3289 Visited.insert(Base);
3290 }
3291 }
3292
3293 for (auto Base : Visited)
3294 Changed |= DistributeIncrements(Base);
3295
3296 return Changed;
3297}
3298
3299/// Returns an instance of the load / store optimization pass.
3301 if (PreAlloc)
3302 return new ARMPreAllocLoadStoreOpt();
3303 return new ARMLoadStoreOpt();
3304}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static bool isLoadSingle(unsigned Opc)
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, SmallPtrSetImpl< MachineInstr * > &MemOps, SmallSet< unsigned, 4 > &MemRegs, const TargetRegisterInfo *TRI, AliasAnalysis *AA)
static bool ContainsReg(ArrayRef< std::pair< unsigned, bool > > Regs, unsigned Reg)
static bool isPreIndex(MachineInstr &MI)
static void forEachDbgRegOperand(MachineInstr *MI, std::function< void(MachineOperand &)> Fn)
static bool isPostIndex(MachineInstr &MI)
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode)
static bool isMemoryOp(const MachineInstr &MI)
Returns true if instruction is a memory operation that this pass is capable of operating on.
static unsigned getLSMultipleTransferSize(const MachineInstr *MI)
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode)
static bool isT1i32Load(unsigned Opc)
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, int Offset, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode)
static MachineInstr * createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isi32Store(unsigned Opc)
static MachineBasicBlock::iterator findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset, const TargetRegisterInfo *TRI)
Searches for a increment or decrement of Reg after MBBI.
static MachineBasicBlock::iterator findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg, ARMCC::CondCodes Pred, Register PredReg, int &Offset)
Searches for an increment or decrement of Reg before MBBI.
static int getMemoryOpOffset(const MachineInstr &MI)
static const MachineOperand & getLoadStoreBaseOp(const MachineInstr &MI)
static void updateRegisterMapForDbgValueListAfterMove(SmallDenseMap< Register, SmallVector< MachineInstr * >, 8 > &RegisterMap, MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace)
arm prera ldst static false cl::opt< unsigned > InstReorderLimit("arm-prera-ldst-opt-reorder-limit", cl::init(8), cl::Hidden)
static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, MachineInstr *MI)
static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg, ARMCC::CondCodes Pred, Register PredReg)
Check if the given instruction increments or decrements a register and return the amount it is increm...
static bool isT2i32Store(unsigned Opc)
arm prera ldst opt
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII, int &CodesizeEstimate)
static bool mayCombineMisaligned(const TargetSubtargetInfo &STI, const MachineInstr &MI)
Return true for loads/stores that can be combined to a double/multi operation without increasing the ...
static int getBaseOperandIndex(MachineInstr &MI)
static bool isT2i32Load(unsigned Opc)
static bool isi32Load(unsigned Opc)
static unsigned getImmScale(unsigned Opc)
static bool isT1i32Store(unsigned Opc)
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME
#define ARM_LOAD_STORE_OPT_NAME
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode)
static const MachineOperand & getLoadStoreRegOp(const MachineInstr &MI)
static bool isValidLSDoubleOffset(int Offset)
static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI)
static cl::opt< bool > AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, cl::init(false), cl::desc("Be more conservative in ARM load/store opt"))
This switch disables formation of double/multi instructions that could potentially lead to (new) alig...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file defines the BumpPtrAllocator interface.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:442
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
Basic Register Allocator
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A private abstract base class describing the concept of an individual alias analysis implementation.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:239
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:124
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
iterator end()
Definition: DenseMap.h:87
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:813
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:798
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
A description of a memory reference used in the backend.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
size_type size() const
Definition: SmallSet.h:171
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:390
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Align getTransientStackAlign() const
getTransientStackAlignment - This method returns the number of bytes to which the stack pointer must ...
TargetInstrInfo - Interface to description of machine instruction set.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetFrameLowering * getFrameLowering() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5465
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:174
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
AddrOpc getAM5Op(unsigned AM5Opc)
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned char getAM5Offset(unsigned AM5Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Define
Register definition.
@ Kill
The last use of a register.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition: SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1563
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
unsigned getDeadRegState(bool B)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLoadStoreOptimizationPass(bool PreAlloc=false)
Returns an instance of the load / store optimization pass.
unsigned M1(unsigned Val)
Definition: VE.h:377
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1879
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:376
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
int getAddSubImmediate(MachineInstr &MI)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.