LLVM 22.0.0git
ARMBaseInstrInfo.cpp
Go to the documentation of this file.
1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
19#include "ARMSubtarget.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
59#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78/// ARM_MLxEntry - Record information about MLA / MLS instructions.
80 uint16_t MLxOpc; // MLA / MLS opcode
81 uint16_t MulOpc; // Expanded multiplication opcode
82 uint16_t AddSubOpc; // Expanded add / sub opcode
83 bool NegAcc; // True if the acc is negated before the add / sub.
84 bool HasLane; // True if instruction has an extra "lane" operand.
85};
86
87static const ARM_MLxEntry ARM_MLxTable[] = {
88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 // fp scalar ops
90 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
91 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
92 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
93 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
94 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
95 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
96 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
97 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
98
99 // fp SIMD ops
100 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
101 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
102 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
103 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
104 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
105 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
106 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
107 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
108};
109
111 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112 Subtarget(STI) {
113 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
114 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
115 llvm_unreachable("Duplicated entries?");
116 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
117 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
118 }
119}
120
121// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122// currently defaults to no prepass hazard recognizer.
125 const ScheduleDAG *DAG) const {
126 if (usePreRAHazardRecognizer()) {
127 const InstrItineraryData *II =
128 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130 }
132}
133
134// Called during:
135// - pre-RA scheduling
136// - post-RA scheduling when FeatureUseMISched is set
138 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
140
141 // We would like to restrict this hazard recognizer to only
142 // post-RA scheduling; we can tell that we're post-RA because we don't
143 // track VRegLiveness.
144 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
145 // banks banked on bit 2. Assume that TCMs are in use.
146 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
148 std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
149
150 // Not inserting ARMHazardRecognizerFPMLx because that would change
151 // legacy behavior
152
154 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
155 return MHR;
156}
157
158// Called during post-RA scheduling when FeatureUseMISched is not set
161 const ScheduleDAG *DAG) const {
163
164 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
165 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
166
168 if (BHR)
169 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
170 return MHR;
171}
172
173// Branch analysis.
174// Cond vector output format:
175// 0 elements indicates an unconditional branch
176// 2 elements indicates a conditional branch; the elements are
177// the condition to check and the CPSR.
178// 3 elements indicates a hardware loop end; the elements
179// are the opcode, the operand value to test, and a dummy
180// operand used to pad out to 3 operands.
183 MachineBasicBlock *&FBB,
185 bool AllowModify) const {
186 TBB = nullptr;
187 FBB = nullptr;
188
190 if (I == MBB.instr_begin())
191 return false; // Empty blocks are easy.
192 --I;
193
194 // Walk backwards from the end of the basic block until the branch is
195 // analyzed or we give up.
196 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
197 // Flag to be raised on unanalyzeable instructions. This is useful in cases
198 // where we want to clean up on the end of the basic block before we bail
199 // out.
200 bool CantAnalyze = false;
201
202 // Skip over DEBUG values, predicated nonterminators and speculation
203 // barrier terminators.
204 while (I->isDebugInstr() || !I->isTerminator() ||
205 isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
206 I->getOpcode() == ARM::t2DoLoopStartTP){
207 if (I == MBB.instr_begin())
208 return false;
209 --I;
210 }
211
212 if (isIndirectBranchOpcode(I->getOpcode()) ||
213 isJumpTableBranchOpcode(I->getOpcode())) {
214 // Indirect branches and jump tables can't be analyzed, but we still want
215 // to clean up any instructions at the tail of the basic block.
216 CantAnalyze = true;
217 } else if (isUncondBranchOpcode(I->getOpcode())) {
218 TBB = I->getOperand(0).getMBB();
219 } else if (isCondBranchOpcode(I->getOpcode())) {
220 // Bail out if we encounter multiple conditional branches.
221 if (!Cond.empty())
222 return true;
223
224 assert(!FBB && "FBB should have been null.");
225 FBB = TBB;
226 TBB = I->getOperand(0).getMBB();
227 Cond.push_back(I->getOperand(1));
228 Cond.push_back(I->getOperand(2));
229 } else if (I->isReturn()) {
230 // Returns can't be analyzed, but we should run cleanup.
231 CantAnalyze = true;
232 } else if (I->getOpcode() == ARM::t2LoopEnd &&
233 MBB.getParent()
236 if (!Cond.empty())
237 return true;
238 FBB = TBB;
239 TBB = I->getOperand(1).getMBB();
240 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
241 Cond.push_back(I->getOperand(0));
242 Cond.push_back(MachineOperand::CreateImm(0));
243 } else {
244 // We encountered other unrecognized terminator. Bail out immediately.
245 return true;
246 }
247
248 // Cleanup code - to be run for unpredicated unconditional branches and
249 // returns.
250 if (!isPredicated(*I) &&
251 (isUncondBranchOpcode(I->getOpcode()) ||
252 isIndirectBranchOpcode(I->getOpcode()) ||
253 isJumpTableBranchOpcode(I->getOpcode()) ||
254 I->isReturn())) {
255 // Forget any previous condition branch information - it no longer applies.
256 Cond.clear();
257 FBB = nullptr;
258
259 // If we can modify the function, delete everything below this
260 // unconditional branch.
261 if (AllowModify) {
262 MachineBasicBlock::iterator DI = std::next(I);
263 while (DI != MBB.instr_end()) {
264 MachineInstr &InstToDelete = *DI;
265 ++DI;
266 // Speculation barriers must not be deleted.
267 if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
268 continue;
269 InstToDelete.eraseFromParent();
270 }
271 }
272 }
273
274 if (CantAnalyze) {
275 // We may not be able to analyze the block, but we could still have
276 // an unconditional branch as the last instruction in the block, which
277 // just branches to layout successor. If this is the case, then just
278 // remove it if we're allowed to make modifications.
279 if (AllowModify && !isPredicated(MBB.back()) &&
283 return true;
284 }
285
286 if (I == MBB.instr_begin())
287 return false;
288
289 --I;
290 }
291
292 // We made it past the terminators without bailing out - we must have
293 // analyzed this branch successfully.
294 return false;
295}
296
298 int *BytesRemoved) const {
299 assert(!BytesRemoved && "code size not handled");
300
302 if (I == MBB.end())
303 return 0;
304
305 if (!isUncondBranchOpcode(I->getOpcode()) &&
306 !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
307 return 0;
308
309 // Remove the branch.
310 I->eraseFromParent();
311
312 I = MBB.end();
313
314 if (I == MBB.begin()) return 1;
315 --I;
316 if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
317 return 1;
318
319 // Remove the branch.
320 I->eraseFromParent();
321 return 2;
322}
323
328 const DebugLoc &DL,
329 int *BytesAdded) const {
330 assert(!BytesAdded && "code size not handled");
332 int BOpc = !AFI->isThumbFunction()
333 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
334 int BccOpc = !AFI->isThumbFunction()
335 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
336 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
337
338 // Shouldn't be a fall through.
339 assert(TBB && "insertBranch must not be told to insert a fallthrough");
340 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
341 "ARM branch conditions have two or three components!");
342
343 // For conditional branches, we use addOperand to preserve CPSR flags.
344
345 if (!FBB) {
346 if (Cond.empty()) { // Unconditional branch?
347 if (isThumb)
349 else
350 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
351 } else if (Cond.size() == 2) {
352 BuildMI(&MBB, DL, get(BccOpc))
353 .addMBB(TBB)
354 .addImm(Cond[0].getImm())
355 .add(Cond[1]);
356 } else
357 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
358 return 1;
359 }
360
361 // Two-way conditional branch.
362 if (Cond.size() == 2)
363 BuildMI(&MBB, DL, get(BccOpc))
364 .addMBB(TBB)
365 .addImm(Cond[0].getImm())
366 .add(Cond[1]);
367 else if (Cond.size() == 3)
368 BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
369 if (isThumb)
370 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
371 else
372 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
373 return 2;
374}
375
378 if (Cond.size() == 2) {
379 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
380 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
381 return false;
382 }
383 return true;
384}
385
387 if (MI.isBundle()) {
389 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
390 while (++I != E && I->isInsideBundle()) {
391 int PIdx = I->findFirstPredOperandIdx();
392 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
393 return true;
394 }
395 return false;
396 }
397
398 int PIdx = MI.findFirstPredOperandIdx();
399 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
400}
401
403 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
404 const TargetRegisterInfo *TRI) const {
405
406 // First, let's see if there is a generic comment for this operand
407 std::string GenericComment =
409 if (!GenericComment.empty())
410 return GenericComment;
411
412 // If not, check if we have an immediate operand.
413 if (!Op.isImm())
414 return std::string();
415
416 // And print its corresponding condition code if the immediate is a
417 // predicate.
418 int FirstPredOp = MI.findFirstPredOperandIdx();
419 if (FirstPredOp != (int) OpIdx)
420 return std::string();
421
422 std::string CC = "CC::";
423 CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
424 return CC;
425}
426
429 unsigned Opc = MI.getOpcode();
432 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
433 .addImm(Pred[0].getImm())
434 .addReg(Pred[1].getReg());
435 return true;
436 }
437
438 int PIdx = MI.findFirstPredOperandIdx();
439 if (PIdx != -1) {
440 MachineOperand &PMO = MI.getOperand(PIdx);
441 PMO.setImm(Pred[0].getImm());
442 MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
443
444 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
445 // IT block. This affects how they are printed.
446 const MCInstrDesc &MCID = MI.getDesc();
448 assert(MCID.operands()[1].isOptionalDef() &&
449 "CPSR def isn't expected operand");
450 assert((MI.getOperand(1).isDead() ||
451 MI.getOperand(1).getReg() != ARM::CPSR) &&
452 "if conversion tried to stop defining used CPSR");
453 MI.getOperand(1).setReg(ARM::NoRegister);
454 }
455
456 return true;
457 }
458 return false;
459}
460
462 ArrayRef<MachineOperand> Pred2) const {
463 if (Pred1.size() > 2 || Pred2.size() > 2)
464 return false;
465
466 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
467 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
468 if (CC1 == CC2)
469 return true;
470
471 switch (CC1) {
472 default:
473 return false;
474 case ARMCC::AL:
475 return true;
476 case ARMCC::HS:
477 return CC2 == ARMCC::HI;
478 case ARMCC::LS:
479 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
480 case ARMCC::GE:
481 return CC2 == ARMCC::GT;
482 case ARMCC::LE:
483 return CC2 == ARMCC::LT;
484 }
485}
486
488 std::vector<MachineOperand> &Pred,
489 bool SkipDead) const {
490 bool Found = false;
491 for (const MachineOperand &MO : MI.operands()) {
492 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
493 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
494 if (ClobbersCPSR || IsCPSR) {
495
496 // Filter out T1 instructions that have a dead CPSR,
497 // allowing IT blocks to be generated containing T1 instructions
498 const MCInstrDesc &MCID = MI.getDesc();
499 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
500 SkipDead)
501 continue;
502
503 Pred.push_back(MO);
504 Found = true;
505 }
506 }
507
508 return Found;
509}
510
512 for (const auto &MO : MI.operands())
513 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
514 return true;
515 return false;
516}
517
519 switch (MI->getOpcode()) {
520 default: return true;
521 case ARM::tADC: // ADC (register) T1
522 case ARM::tADDi3: // ADD (immediate) T1
523 case ARM::tADDi8: // ADD (immediate) T2
524 case ARM::tADDrr: // ADD (register) T1
525 case ARM::tAND: // AND (register) T1
526 case ARM::tASRri: // ASR (immediate) T1
527 case ARM::tASRrr: // ASR (register) T1
528 case ARM::tBIC: // BIC (register) T1
529 case ARM::tEOR: // EOR (register) T1
530 case ARM::tLSLri: // LSL (immediate) T1
531 case ARM::tLSLrr: // LSL (register) T1
532 case ARM::tLSRri: // LSR (immediate) T1
533 case ARM::tLSRrr: // LSR (register) T1
534 case ARM::tMUL: // MUL T1
535 case ARM::tMVN: // MVN (register) T1
536 case ARM::tORR: // ORR (register) T1
537 case ARM::tROR: // ROR (register) T1
538 case ARM::tRSB: // RSB (immediate) T1
539 case ARM::tSBC: // SBC (register) T1
540 case ARM::tSUBi3: // SUB (immediate) T1
541 case ARM::tSUBi8: // SUB (immediate) T2
542 case ARM::tSUBrr: // SUB (register) T1
544 }
545}
546
547/// isPredicable - Return true if the specified instruction can be predicated.
548/// By default, this returns true for every instruction with a
549/// PredicateOperand.
551 if (!MI.isPredicable())
552 return false;
553
554 if (MI.isBundle())
555 return false;
556
558 return false;
559
560 const MachineFunction *MF = MI.getParent()->getParent();
561 const ARMFunctionInfo *AFI =
563
564 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
565 // In their ARM encoding, they can't be encoded in a conditional form.
566 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
567 return false;
568
569 // Make indirect control flow changes unpredicable when SLS mitigation is
570 // enabled.
571 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
572 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
573 return false;
574 if (ST.hardenSlsBlr() && isIndirectCall(MI))
575 return false;
576
577 if (AFI->isThumb2Function()) {
578 if (getSubtarget().restrictIT())
579 return isV8EligibleForIT(&MI);
580 }
581
582 return true;
583}
584
585namespace llvm {
586
587template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
588 for (const MachineOperand &MO : MI->operands()) {
589 if (!MO.isReg() || MO.isUndef() || MO.isUse())
590 continue;
591 if (MO.getReg() != ARM::CPSR)
592 continue;
593 if (!MO.isDead())
594 return false;
595 }
596 // all definitions of CPSR are dead
597 return true;
598}
599
600} // end namespace llvm
601
602/// GetInstSize - Return the size of the specified MachineInstr.
603///
605 const MachineBasicBlock &MBB = *MI.getParent();
606 const MachineFunction *MF = MBB.getParent();
607 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
608
609 const MCInstrDesc &MCID = MI.getDesc();
610
611 switch (MI.getOpcode()) {
612 default:
613 // Return the size specified in .td file. If there's none, return 0, as we
614 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
615 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
616 // contrast to AArch64 instructions which have a default size of 4 bytes for
617 // example.
618 return MCID.getSize();
619 case TargetOpcode::BUNDLE:
620 return getInstBundleLength(MI);
621 case ARM::CONSTPOOL_ENTRY:
622 case ARM::JUMPTABLE_INSTS:
623 case ARM::JUMPTABLE_ADDRS:
624 case ARM::JUMPTABLE_TBB:
625 case ARM::JUMPTABLE_TBH:
626 // If this machine instr is a constant pool entry, its size is recorded as
627 // operand #2.
628 return MI.getOperand(2).getImm();
629 case ARM::SPACE:
630 return MI.getOperand(1).getImm();
631 case ARM::INLINEASM:
632 case ARM::INLINEASM_BR: {
633 // If this machine instr is an inline asm, measure it.
634 unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
636 Size = alignTo(Size, 4);
637 return Size;
638 }
639 }
640}
641
642unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
643 unsigned Size = 0;
645 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
646 while (++I != E && I->isInsideBundle()) {
647 assert(!I->isBundle() && "No nested bundle!");
649 }
650 return Size;
651}
652
655 MCRegister DestReg, bool KillSrc,
656 const ARMSubtarget &Subtarget) const {
657 unsigned Opc = Subtarget.isThumb()
658 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
659 : ARM::MRS;
660
662 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
663
664 // There is only 1 A/R class MRS instruction, and it always refers to
665 // APSR. However, there are lots of other possibilities on M-class cores.
666 if (Subtarget.isMClass())
667 MIB.addImm(0x800);
668
669 MIB.add(predOps(ARMCC::AL))
670 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
671}
672
675 MCRegister SrcReg, bool KillSrc,
676 const ARMSubtarget &Subtarget) const {
677 unsigned Opc = Subtarget.isThumb()
678 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
679 : ARM::MSR;
680
681 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
682
683 if (Subtarget.isMClass())
684 MIB.addImm(0x800);
685 else
686 MIB.addImm(8);
687
688 MIB.addReg(SrcReg, getKillRegState(KillSrc))
691}
692
694 MIB.addImm(ARMVCC::None);
695 MIB.addReg(0);
696 MIB.addReg(0); // tp_reg
697}
698
700 Register DestReg) {
702 MIB.addReg(DestReg, RegState::Undef);
703}
704
706 MIB.addImm(Cond);
707 MIB.addReg(ARM::VPR, RegState::Implicit);
708 MIB.addReg(0); // tp_reg
709}
710
712 unsigned Cond, unsigned Inactive) {
714 MIB.addReg(Inactive);
715}
716
719 const DebugLoc &DL, Register DestReg,
720 Register SrcReg, bool KillSrc,
721 bool RenamableDest,
722 bool RenamableSrc) const {
723 bool GPRDest = ARM::GPRRegClass.contains(DestReg);
724 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
725
726 if (GPRDest && GPRSrc) {
727 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
728 .addReg(SrcReg, getKillRegState(KillSrc))
730 .add(condCodeOp());
731 return;
732 }
733
734 bool SPRDest = ARM::SPRRegClass.contains(DestReg);
735 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
736
737 unsigned Opc = 0;
738 if (SPRDest && SPRSrc)
739 Opc = ARM::VMOVS;
740 else if (GPRDest && SPRSrc)
741 Opc = ARM::VMOVRS;
742 else if (SPRDest && GPRSrc)
743 Opc = ARM::VMOVSR;
744 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
745 Opc = ARM::VMOVD;
746 else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
747 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
748
749 if (Opc) {
750 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
751 MIB.addReg(SrcReg, getKillRegState(KillSrc));
752 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
753 MIB.addReg(SrcReg, getKillRegState(KillSrc));
754 if (Opc == ARM::MVE_VORR)
755 addUnpredicatedMveVpredROp(MIB, DestReg);
756 else if (Opc != ARM::MQPRCopy)
757 MIB.add(predOps(ARMCC::AL));
758 return;
759 }
760
761 // Handle register classes that require multiple instructions.
762 unsigned BeginIdx = 0;
763 unsigned SubRegs = 0;
764 int Spacing = 1;
765
766 // Use VORRq when possible.
767 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
768 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
769 BeginIdx = ARM::qsub_0;
770 SubRegs = 2;
771 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
772 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
773 BeginIdx = ARM::qsub_0;
774 SubRegs = 4;
775 // Fall back to VMOVD.
776 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
777 Opc = ARM::VMOVD;
778 BeginIdx = ARM::dsub_0;
779 SubRegs = 2;
780 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
781 Opc = ARM::VMOVD;
782 BeginIdx = ARM::dsub_0;
783 SubRegs = 3;
784 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
785 Opc = ARM::VMOVD;
786 BeginIdx = ARM::dsub_0;
787 SubRegs = 4;
788 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
789 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
790 BeginIdx = ARM::gsub_0;
791 SubRegs = 2;
792 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
793 Opc = ARM::VMOVD;
794 BeginIdx = ARM::dsub_0;
795 SubRegs = 2;
796 Spacing = 2;
797 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
798 Opc = ARM::VMOVD;
799 BeginIdx = ARM::dsub_0;
800 SubRegs = 3;
801 Spacing = 2;
802 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
803 Opc = ARM::VMOVD;
804 BeginIdx = ARM::dsub_0;
805 SubRegs = 4;
806 Spacing = 2;
807 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
808 !Subtarget.hasFP64()) {
809 Opc = ARM::VMOVS;
810 BeginIdx = ARM::ssub_0;
811 SubRegs = 2;
812 } else if (SrcReg == ARM::CPSR) {
813 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
814 return;
815 } else if (DestReg == ARM::CPSR) {
816 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
817 return;
818 } else if (DestReg == ARM::VPR) {
819 assert(ARM::GPRRegClass.contains(SrcReg));
820 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
821 .addReg(SrcReg, getKillRegState(KillSrc))
823 return;
824 } else if (SrcReg == ARM::VPR) {
825 assert(ARM::GPRRegClass.contains(DestReg));
826 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
827 .addReg(SrcReg, getKillRegState(KillSrc))
829 return;
830 } else if (DestReg == ARM::FPSCR_NZCV) {
831 assert(ARM::GPRRegClass.contains(SrcReg));
832 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
833 .addReg(SrcReg, getKillRegState(KillSrc))
835 return;
836 } else if (SrcReg == ARM::FPSCR_NZCV) {
837 assert(ARM::GPRRegClass.contains(DestReg));
838 BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
839 .addReg(SrcReg, getKillRegState(KillSrc))
841 return;
842 }
843
844 assert(Opc && "Impossible reg-to-reg copy");
845
848
849 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
850 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
851 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
852 Spacing = -Spacing;
853 }
854#ifndef NDEBUG
855 SmallSet<unsigned, 4> DstRegs;
856#endif
857 for (unsigned i = 0; i != SubRegs; ++i) {
858 Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
859 Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
860 assert(Dst && Src && "Bad sub-register");
861#ifndef NDEBUG
862 assert(!DstRegs.count(Src) && "destructive vector copy");
863 DstRegs.insert(Dst);
864#endif
865 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
866 // VORR (NEON or MVE) takes two source operands.
867 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
868 Mov.addReg(Src);
869 }
870 // MVE VORR takes predicate operands in place of an ordinary condition.
871 if (Opc == ARM::MVE_VORR)
873 else
874 Mov = Mov.add(predOps(ARMCC::AL));
875 // MOVr can set CC.
876 if (Opc == ARM::MOVr)
877 Mov = Mov.add(condCodeOp());
878 }
879 // Add implicit super-register defs and kills to the last instruction.
880 Mov->addRegisterDefined(DestReg, TRI);
881 if (KillSrc)
882 Mov->addRegisterKilled(SrcReg, TRI);
883}
884
885std::optional<DestSourcePair>
887 // VMOVRRD is also a copy instruction but it requires
888 // special way of handling. It is more complex copy version
889 // and since that we are not considering it. For recognition
890 // of such instruction isExtractSubregLike MI interface fuction
891 // could be used.
892 // VORRq is considered as a move only if two inputs are
893 // the same register.
894 if (!MI.isMoveReg() ||
895 (MI.getOpcode() == ARM::VORRq &&
896 MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))
897 return std::nullopt;
898 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
899}
900
901std::optional<ParamLoadedValue>
903 Register Reg) const {
904 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
905 Register DstReg = DstSrcPair->Destination->getReg();
906
907 // TODO: We don't handle cases where the forwarding reg is narrower/wider
908 // than the copy registers. Consider for example:
909 //
910 // s16 = VMOVS s0
911 // s17 = VMOVS s1
912 // call @callee(d0)
913 //
914 // We'd like to describe the call site value of d0 as d8, but this requires
915 // gathering and merging the descriptions for the two VMOVS instructions.
916 //
917 // We also don't handle the reverse situation, where the forwarding reg is
918 // narrower than the copy destination:
919 //
920 // d8 = VMOVD d0
921 // call @callee(s1)
922 //
923 // We need to produce a fragment description (the call site value of s1 is
924 // /not/ just d8).
925 if (DstReg != Reg)
926 return std::nullopt;
927 }
929}
930
933 unsigned SubIdx, unsigned State,
934 const TargetRegisterInfo *TRI) const {
935 if (!SubIdx)
936 return MIB.addReg(Reg, State);
937
939 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
940 return MIB.addReg(Reg, State, SubIdx);
941}
942
945 Register SrcReg, bool isKill, int FI,
946 const TargetRegisterClass *RC,
947 const TargetRegisterInfo *TRI,
948 Register VReg,
949 MachineInstr::MIFlag Flags) const {
951 MachineFrameInfo &MFI = MF.getFrameInfo();
952 Align Alignment = MFI.getObjectAlign(FI);
953
956 MFI.getObjectSize(FI), Alignment);
957
958 switch (TRI->getSpillSize(*RC)) {
959 case 2:
960 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
961 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
962 .addReg(SrcReg, getKillRegState(isKill))
963 .addFrameIndex(FI)
964 .addImm(0)
965 .addMemOperand(MMO)
967 } else
968 llvm_unreachable("Unknown reg class!");
969 break;
970 case 4:
971 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
972 BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
973 .addReg(SrcReg, getKillRegState(isKill))
974 .addFrameIndex(FI)
975 .addImm(0)
976 .addMemOperand(MMO)
978 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
979 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
980 .addReg(SrcReg, getKillRegState(isKill))
981 .addFrameIndex(FI)
982 .addImm(0)
983 .addMemOperand(MMO)
985 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
986 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
987 .addReg(SrcReg, getKillRegState(isKill))
988 .addFrameIndex(FI)
989 .addImm(0)
990 .addMemOperand(MMO)
992 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
993 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_FPSCR_NZCVQC_off))
994 .addReg(SrcReg, getKillRegState(isKill))
995 .addFrameIndex(FI)
996 .addImm(0)
997 .addMemOperand(MMO)
999 } else
1000 llvm_unreachable("Unknown reg class!");
1001 break;
1002 case 8:
1003 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1004 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
1005 .addReg(SrcReg, getKillRegState(isKill))
1006 .addFrameIndex(FI)
1007 .addImm(0)
1008 .addMemOperand(MMO)
1010 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1011 if (Subtarget.hasV5TEOps()) {
1012 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
1013 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1014 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1015 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1017 } else {
1018 // Fallback to STM instruction, which has existed since the dawn of
1019 // time.
1020 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
1021 .addFrameIndex(FI)
1022 .addMemOperand(MMO)
1024 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
1025 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
1026 }
1027 } else
1028 llvm_unreachable("Unknown reg class!");
1029 break;
1030 case 16:
1031 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1032 // Use aligned spills if the stack can be realigned.
1033 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1034 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
1035 .addFrameIndex(FI)
1036 .addImm(16)
1037 .addReg(SrcReg, getKillRegState(isKill))
1038 .addMemOperand(MMO)
1040 } else {
1041 BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
1042 .addReg(SrcReg, getKillRegState(isKill))
1043 .addFrameIndex(FI)
1044 .addMemOperand(MMO)
1046 }
1047 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1048 Subtarget.hasMVEIntegerOps()) {
1049 auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
1050 MIB.addReg(SrcReg, getKillRegState(isKill))
1051 .addFrameIndex(FI)
1052 .addImm(0)
1053 .addMemOperand(MMO);
1055 } else
1056 llvm_unreachable("Unknown reg class!");
1057 break;
1058 case 24:
1059 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1060 // Use aligned spills if the stack can be realigned.
1061 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1062 Subtarget.hasNEON()) {
1063 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
1064 .addFrameIndex(FI)
1065 .addImm(16)
1066 .addReg(SrcReg, getKillRegState(isKill))
1067 .addMemOperand(MMO)
1069 } else {
1071 get(ARM::VSTMDIA))
1072 .addFrameIndex(FI)
1074 .addMemOperand(MMO);
1075 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1076 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1077 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1078 }
1079 } else
1080 llvm_unreachable("Unknown reg class!");
1081 break;
1082 case 32:
1083 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1084 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1085 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1086 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1087 Subtarget.hasNEON()) {
1088 // FIXME: It's possible to only store part of the QQ register if the
1089 // spilled def has a sub-register index.
1090 BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
1091 .addFrameIndex(FI)
1092 .addImm(16)
1093 .addReg(SrcReg, getKillRegState(isKill))
1094 .addMemOperand(MMO)
1096 } else if (Subtarget.hasMVEIntegerOps()) {
1097 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
1098 .addReg(SrcReg, getKillRegState(isKill))
1099 .addFrameIndex(FI)
1100 .addMemOperand(MMO);
1101 } else {
1103 get(ARM::VSTMDIA))
1104 .addFrameIndex(FI)
1106 .addMemOperand(MMO);
1107 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1108 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1109 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1110 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1111 }
1112 } else
1113 llvm_unreachable("Unknown reg class!");
1114 break;
1115 case 64:
1116 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1117 Subtarget.hasMVEIntegerOps()) {
1118 BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
1119 .addReg(SrcReg, getKillRegState(isKill))
1120 .addFrameIndex(FI)
1121 .addMemOperand(MMO);
1122 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1123 MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
1124 .addFrameIndex(FI)
1126 .addMemOperand(MMO);
1127 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
1128 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
1129 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
1130 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
1131 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
1132 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
1133 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
1134 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
1135 } else
1136 llvm_unreachable("Unknown reg class!");
1137 break;
1138 default:
1139 llvm_unreachable("Unknown reg class!");
1140 }
1141}
1142
1144 int &FrameIndex) const {
1145 switch (MI.getOpcode()) {
1146 default: break;
1147 case ARM::STRrs:
1148 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1149 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1150 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1151 MI.getOperand(3).getImm() == 0) {
1152 FrameIndex = MI.getOperand(1).getIndex();
1153 return MI.getOperand(0).getReg();
1154 }
1155 break;
1156 case ARM::STRi12:
1157 case ARM::t2STRi12:
1158 case ARM::tSTRspi:
1159 case ARM::VSTRD:
1160 case ARM::VSTRS:
1161 case ARM::VSTRH:
1162 case ARM::VSTR_P0_off:
1163 case ARM::VSTR_FPSCR_NZCVQC_off:
1164 case ARM::MVE_VSTRWU32:
1165 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1166 MI.getOperand(2).getImm() == 0) {
1167 FrameIndex = MI.getOperand(1).getIndex();
1168 return MI.getOperand(0).getReg();
1169 }
1170 break;
1171 case ARM::VST1q64:
1172 case ARM::VST1d64TPseudo:
1173 case ARM::VST1d64QPseudo:
1174 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
1175 FrameIndex = MI.getOperand(0).getIndex();
1176 return MI.getOperand(2).getReg();
1177 }
1178 break;
1179 case ARM::VSTMQIA:
1180 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1181 FrameIndex = MI.getOperand(1).getIndex();
1182 return MI.getOperand(0).getReg();
1183 }
1184 break;
1185 case ARM::MQQPRStore:
1186 case ARM::MQQQQPRStore:
1187 if (MI.getOperand(1).isFI()) {
1188 FrameIndex = MI.getOperand(1).getIndex();
1189 return MI.getOperand(0).getReg();
1190 }
1191 break;
1192 }
1193
1194 return 0;
1195}
1196
1198 int &FrameIndex) const {
1200 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1201 Accesses.size() == 1) {
1202 FrameIndex =
1203 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1204 ->getFrameIndex();
1205 return true;
1206 }
1207 return false;
1208}
1209
1212 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
1213 Register VReg, MachineInstr::MIFlag Flags) const {
1214 DebugLoc DL;
1215 if (I != MBB.end()) DL = I->getDebugLoc();
1216 MachineFunction &MF = *MBB.getParent();
1217 MachineFrameInfo &MFI = MF.getFrameInfo();
1218 const Align Alignment = MFI.getObjectAlign(FI);
1221 MFI.getObjectSize(FI), Alignment);
1222
1223 switch (TRI->getSpillSize(*RC)) {
1224 case 2:
1225 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1226 BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
1227 .addFrameIndex(FI)
1228 .addImm(0)
1229 .addMemOperand(MMO)
1231 } else
1232 llvm_unreachable("Unknown reg class!");
1233 break;
1234 case 4:
1235 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1236 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
1237 .addFrameIndex(FI)
1238 .addImm(0)
1239 .addMemOperand(MMO)
1241 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1242 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
1243 .addFrameIndex(FI)
1244 .addImm(0)
1245 .addMemOperand(MMO)
1247 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1248 BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
1249 .addFrameIndex(FI)
1250 .addImm(0)
1251 .addMemOperand(MMO)
1253 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1254 BuildMI(MBB, I, DL, get(ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1255 .addFrameIndex(FI)
1256 .addImm(0)
1257 .addMemOperand(MMO)
1259 } else
1260 llvm_unreachable("Unknown reg class!");
1261 break;
1262 case 8:
1263 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1264 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1265 .addFrameIndex(FI)
1266 .addImm(0)
1267 .addMemOperand(MMO)
1269 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1271
1272 if (Subtarget.hasV5TEOps()) {
1273 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1274 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1275 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1276 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
1278 } else {
1279 // Fallback to LDM instruction, which has existed since the dawn of
1280 // time.
1281 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
1282 .addFrameIndex(FI)
1283 .addMemOperand(MMO)
1285 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1286 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1287 }
1288
1289 if (DestReg.isPhysical())
1290 MIB.addReg(DestReg, RegState::ImplicitDefine);
1291 } else
1292 llvm_unreachable("Unknown reg class!");
1293 break;
1294 case 16:
1295 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1296 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1297 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1298 .addFrameIndex(FI)
1299 .addImm(16)
1300 .addMemOperand(MMO)
1302 } else {
1303 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1304 .addFrameIndex(FI)
1305 .addMemOperand(MMO)
1307 }
1308 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1309 Subtarget.hasMVEIntegerOps()) {
1310 auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
1311 MIB.addFrameIndex(FI)
1312 .addImm(0)
1313 .addMemOperand(MMO);
1315 } else
1316 llvm_unreachable("Unknown reg class!");
1317 break;
1318 case 24:
1319 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1320 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1321 Subtarget.hasNEON()) {
1322 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1323 .addFrameIndex(FI)
1324 .addImm(16)
1325 .addMemOperand(MMO)
1327 } else {
1328 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1329 .addFrameIndex(FI)
1330 .addMemOperand(MMO)
1332 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1333 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1334 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1335 if (DestReg.isPhysical())
1336 MIB.addReg(DestReg, RegState::ImplicitDefine);
1337 }
1338 } else
1339 llvm_unreachable("Unknown reg class!");
1340 break;
1341 case 32:
1342 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1343 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1344 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1345 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1346 Subtarget.hasNEON()) {
1347 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1348 .addFrameIndex(FI)
1349 .addImm(16)
1350 .addMemOperand(MMO)
1352 } else if (Subtarget.hasMVEIntegerOps()) {
1353 BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
1354 .addFrameIndex(FI)
1355 .addMemOperand(MMO);
1356 } else {
1357 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1358 .addFrameIndex(FI)
1360 .addMemOperand(MMO);
1361 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1362 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1363 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1364 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1365 if (DestReg.isPhysical())
1366 MIB.addReg(DestReg, RegState::ImplicitDefine);
1367 }
1368 } else
1369 llvm_unreachable("Unknown reg class!");
1370 break;
1371 case 64:
1372 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1373 Subtarget.hasMVEIntegerOps()) {
1374 BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
1375 .addFrameIndex(FI)
1376 .addMemOperand(MMO);
1377 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1378 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1379 .addFrameIndex(FI)
1381 .addMemOperand(MMO);
1382 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1383 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1384 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1385 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1386 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1387 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1388 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1389 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1390 if (DestReg.isPhysical())
1391 MIB.addReg(DestReg, RegState::ImplicitDefine);
1392 } else
1393 llvm_unreachable("Unknown reg class!");
1394 break;
1395 default:
1396 llvm_unreachable("Unknown regclass!");
1397 }
1398}
1399
1401 int &FrameIndex) const {
1402 switch (MI.getOpcode()) {
1403 default: break;
1404 case ARM::LDRrs:
1405 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1406 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
1407 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
1408 MI.getOperand(3).getImm() == 0) {
1409 FrameIndex = MI.getOperand(1).getIndex();
1410 return MI.getOperand(0).getReg();
1411 }
1412 break;
1413 case ARM::LDRi12:
1414 case ARM::t2LDRi12:
1415 case ARM::tLDRspi:
1416 case ARM::VLDRD:
1417 case ARM::VLDRS:
1418 case ARM::VLDRH:
1419 case ARM::VLDR_P0_off:
1420 case ARM::VLDR_FPSCR_NZCVQC_off:
1421 case ARM::MVE_VLDRWU32:
1422 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
1423 MI.getOperand(2).getImm() == 0) {
1424 FrameIndex = MI.getOperand(1).getIndex();
1425 return MI.getOperand(0).getReg();
1426 }
1427 break;
1428 case ARM::VLD1q64:
1429 case ARM::VLD1d8TPseudo:
1430 case ARM::VLD1d16TPseudo:
1431 case ARM::VLD1d32TPseudo:
1432 case ARM::VLD1d64TPseudo:
1433 case ARM::VLD1d8QPseudo:
1434 case ARM::VLD1d16QPseudo:
1435 case ARM::VLD1d32QPseudo:
1436 case ARM::VLD1d64QPseudo:
1437 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1438 FrameIndex = MI.getOperand(1).getIndex();
1439 return MI.getOperand(0).getReg();
1440 }
1441 break;
1442 case ARM::VLDMQIA:
1443 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
1444 FrameIndex = MI.getOperand(1).getIndex();
1445 return MI.getOperand(0).getReg();
1446 }
1447 break;
1448 case ARM::MQQPRLoad:
1449 case ARM::MQQQQPRLoad:
1450 if (MI.getOperand(1).isFI()) {
1451 FrameIndex = MI.getOperand(1).getIndex();
1452 return MI.getOperand(0).getReg();
1453 }
1454 break;
1455 }
1456
1457 return 0;
1458}
1459
1461 int &FrameIndex) const {
1463 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1464 Accesses.size() == 1) {
1465 FrameIndex =
1466 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
1467 ->getFrameIndex();
1468 return true;
1469 }
1470 return false;
1471}
1472
1473/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1474/// depending on whether the result is used.
1475void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1476 bool isThumb1 = Subtarget.isThumb1Only();
1477 bool isThumb2 = Subtarget.isThumb2();
1478 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1479
1480 DebugLoc dl = MI->getDebugLoc();
1481 MachineBasicBlock *BB = MI->getParent();
1482
1483 MachineInstrBuilder LDM, STM;
1484 if (isThumb1 || !MI->getOperand(1).isDead()) {
1485 MachineOperand LDWb(MI->getOperand(1));
1486 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
1487 : isThumb1 ? ARM::tLDMIA_UPD
1488 : ARM::LDMIA_UPD))
1489 .add(LDWb);
1490 } else {
1491 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1492 }
1493
1494 if (isThumb1 || !MI->getOperand(0).isDead()) {
1495 MachineOperand STWb(MI->getOperand(0));
1496 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
1497 : isThumb1 ? ARM::tSTMIA_UPD
1498 : ARM::STMIA_UPD))
1499 .add(STWb);
1500 } else {
1501 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1502 }
1503
1504 MachineOperand LDBase(MI->getOperand(3));
1505 LDM.add(LDBase).add(predOps(ARMCC::AL));
1506
1507 MachineOperand STBase(MI->getOperand(2));
1508 STM.add(STBase).add(predOps(ARMCC::AL));
1509
1510 // Sort the scratch registers into ascending order.
1512 SmallVector<unsigned, 6> ScratchRegs;
1513 for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
1514 ScratchRegs.push_back(MO.getReg());
1515 llvm::sort(ScratchRegs,
1516 [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1517 return TRI.getEncodingValue(Reg1) <
1518 TRI.getEncodingValue(Reg2);
1519 });
1520
1521 for (const auto &Reg : ScratchRegs) {
1522 LDM.addReg(Reg, RegState::Define);
1523 STM.addReg(Reg, RegState::Kill);
1524 }
1525
1526 BB->erase(MI);
1527}
1528
1530 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1531 expandLoadStackGuard(MI);
1532 MI.getParent()->erase(MI);
1533 return true;
1534 }
1535
1536 if (MI.getOpcode() == ARM::MEMCPY) {
1537 expandMEMCPY(MI);
1538 return true;
1539 }
1540
1541 // This hook gets to expand COPY instructions before they become
1542 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1543 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1544 // changed into a VORR that can go down the NEON pipeline.
1545 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1546 return false;
1547
1548 // Look for a copy between even S-registers. That is where we keep floats
1549 // when using NEON v2f32 instructions for f32 arithmetic.
1550 Register DstRegS = MI.getOperand(0).getReg();
1551 Register SrcRegS = MI.getOperand(1).getReg();
1552 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1553 return false;
1554
1556 MCRegister DstRegD =
1557 TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, &ARM::DPRRegClass);
1558 MCRegister SrcRegD =
1559 TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, &ARM::DPRRegClass);
1560 if (!DstRegD || !SrcRegD)
1561 return false;
1562
1563 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1564 // legal if the COPY already defines the full DstRegD, and it isn't a
1565 // sub-register insertion.
1566 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
1567 return false;
1568
1569 // A dead copy shouldn't show up here, but reject it just in case.
1570 if (MI.getOperand(0).isDead())
1571 return false;
1572
1573 // All clear, widen the COPY.
1574 LLVM_DEBUG(dbgs() << "widening: " << MI);
1575 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1576
1577 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1578 // or some other super-register.
1579 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);
1580 if (ImpDefIdx != -1)
1581 MI.removeOperand(ImpDefIdx);
1582
1583 // Change the opcode and operands.
1584 MI.setDesc(get(ARM::VMOVD));
1585 MI.getOperand(0).setReg(DstRegD);
1586 MI.getOperand(1).setReg(SrcRegD);
1587 MIB.add(predOps(ARMCC::AL));
1588
1589 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1590 // register scavenger and machine verifier, so we need to indicate that we
1591 // are reading an undefined value from SrcRegD, but a proper value from
1592 // SrcRegS.
1593 MI.getOperand(1).setIsUndef();
1594 MIB.addReg(SrcRegS, RegState::Implicit);
1595
1596 // SrcRegD may actually contain an unrelated value in the ssub_1
1597 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1598 if (MI.getOperand(1).isKill()) {
1599 MI.getOperand(1).setIsKill(false);
1600 MI.addRegisterKilled(SrcRegS, TRI, true);
1601 }
1602
1603 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1604 return true;
1605}
1606
1607/// Create a copy of a const pool value. Update CPI to the new index and return
1608/// the label UID.
1609static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1612
1613 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1614 assert(MCPE.isMachineConstantPoolEntry() &&
1615 "Expecting a machine constantpool entry!");
1616 ARMConstantPoolValue *ACPV =
1617 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1618
1619 unsigned PCLabelId = AFI->createPICLabelUId();
1620 ARMConstantPoolValue *NewCPV = nullptr;
1621
1622 // FIXME: The below assumes PIC relocation model and that the function
1623 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1624 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1625 // instructions, so that's probably OK, but is PIC always correct when
1626 // we get here?
1627 if (ACPV->isGlobalValue())
1629 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
1630 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
1631 else if (ACPV->isExtSymbol())
1632 NewCPV = ARMConstantPoolSymbol::
1633 Create(MF.getFunction().getContext(),
1634 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1635 else if (ACPV->isBlockAddress())
1636 NewCPV = ARMConstantPoolConstant::
1637 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1639 else if (ACPV->isLSDA())
1640 NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,
1641 ARMCP::CPLSDA, 4);
1642 else if (ACPV->isMachineBasicBlock())
1643 NewCPV = ARMConstantPoolMBB::
1644 Create(MF.getFunction().getContext(),
1645 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1646 else
1647 llvm_unreachable("Unexpected ARM constantpool value type!!");
1648 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
1649 return PCLabelId;
1650}
1651
1654 Register DestReg, unsigned SubIdx,
1655 const MachineInstr &Orig,
1656 const TargetRegisterInfo &TRI) const {
1657 unsigned Opcode = Orig.getOpcode();
1658 switch (Opcode) {
1659 default: {
1661 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1662 MBB.insert(I, MI);
1663 break;
1664 }
1665 case ARM::tLDRpci_pic:
1666 case ARM::t2LDRpci_pic: {
1667 MachineFunction &MF = *MBB.getParent();
1668 unsigned CPI = Orig.getOperand(1).getIndex();
1669 unsigned PCLabelId = duplicateCPV(MF, CPI);
1670 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
1672 .addImm(PCLabelId)
1673 .cloneMemRefs(Orig);
1674 break;
1675 }
1676 }
1677}
1678
1681 MachineBasicBlock::iterator InsertBefore,
1682 const MachineInstr &Orig) const {
1683 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1685 for (;;) {
1686 switch (I->getOpcode()) {
1687 case ARM::tLDRpci_pic:
1688 case ARM::t2LDRpci_pic: {
1689 MachineFunction &MF = *MBB.getParent();
1690 unsigned CPI = I->getOperand(1).getIndex();
1691 unsigned PCLabelId = duplicateCPV(MF, CPI);
1692 I->getOperand(1).setIndex(CPI);
1693 I->getOperand(2).setImm(PCLabelId);
1694 break;
1695 }
1696 }
1697 if (!I->isBundledWithSucc())
1698 break;
1699 ++I;
1700 }
1701 return Cloned;
1702}
1703
1705 const MachineInstr &MI1,
1706 const MachineRegisterInfo *MRI) const {
1707 unsigned Opcode = MI0.getOpcode();
1708 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1709 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1710 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1711 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1712 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1713 Opcode == ARM::t2MOV_ga_pcrel) {
1714 if (MI1.getOpcode() != Opcode)
1715 return false;
1716 if (MI0.getNumOperands() != MI1.getNumOperands())
1717 return false;
1718
1719 const MachineOperand &MO0 = MI0.getOperand(1);
1720 const MachineOperand &MO1 = MI1.getOperand(1);
1721 if (MO0.getOffset() != MO1.getOffset())
1722 return false;
1723
1724 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1725 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1726 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1727 Opcode == ARM::t2MOV_ga_pcrel)
1728 // Ignore the PC labels.
1729 return MO0.getGlobal() == MO1.getGlobal();
1730
1731 const MachineFunction *MF = MI0.getParent()->getParent();
1732 const MachineConstantPool *MCP = MF->getConstantPool();
1733 int CPI0 = MO0.getIndex();
1734 int CPI1 = MO1.getIndex();
1735 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1736 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1737 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1738 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1739 if (isARMCP0 && isARMCP1) {
1740 ARMConstantPoolValue *ACPV0 =
1741 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1742 ARMConstantPoolValue *ACPV1 =
1743 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1744 return ACPV0->hasSameValue(ACPV1);
1745 } else if (!isARMCP0 && !isARMCP1) {
1746 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1747 }
1748 return false;
1749 } else if (Opcode == ARM::PICLDR) {
1750 if (MI1.getOpcode() != Opcode)
1751 return false;
1752 if (MI0.getNumOperands() != MI1.getNumOperands())
1753 return false;
1754
1755 Register Addr0 = MI0.getOperand(1).getReg();
1756 Register Addr1 = MI1.getOperand(1).getReg();
1757 if (Addr0 != Addr1) {
1758 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1759 return false;
1760
1761 // This assumes SSA form.
1762 MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1763 MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1764 // Check if the loaded value, e.g. a constantpool of a global address, are
1765 // the same.
1766 if (!produceSameValue(*Def0, *Def1, MRI))
1767 return false;
1768 }
1769
1770 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1771 // %12 = PICLDR %11, 0, 14, %noreg
1772 const MachineOperand &MO0 = MI0.getOperand(i);
1773 const MachineOperand &MO1 = MI1.getOperand(i);
1774 if (!MO0.isIdenticalTo(MO1))
1775 return false;
1776 }
1777 return true;
1778 }
1779
1781}
1782
1783/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1784/// determine if two loads are loading from the same base address. It should
1785/// only return true if the base pointers are the same and the only differences
1786/// between the two addresses is the offset. It also returns the offsets by
1787/// reference.
1788///
1789/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1790/// is permanently disabled.
1792 int64_t &Offset1,
1793 int64_t &Offset2) const {
1794 // Don't worry about Thumb: just ARM and Thumb2.
1795 if (Subtarget.isThumb1Only()) return false;
1796
1797 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1798 return false;
1799
1800 auto IsLoadOpcode = [&](unsigned Opcode) {
1801 switch (Opcode) {
1802 default:
1803 return false;
1804 case ARM::LDRi12:
1805 case ARM::LDRBi12:
1806 case ARM::LDRD:
1807 case ARM::LDRH:
1808 case ARM::LDRSB:
1809 case ARM::LDRSH:
1810 case ARM::VLDRD:
1811 case ARM::VLDRS:
1812 case ARM::t2LDRi8:
1813 case ARM::t2LDRBi8:
1814 case ARM::t2LDRDi8:
1815 case ARM::t2LDRSHi8:
1816 case ARM::t2LDRi12:
1817 case ARM::t2LDRBi12:
1818 case ARM::t2LDRSHi12:
1819 return true;
1820 }
1821 };
1822
1823 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1824 !IsLoadOpcode(Load2->getMachineOpcode()))
1825 return false;
1826
1827 // Check if base addresses and chain operands match.
1828 if (Load1->getOperand(0) != Load2->getOperand(0) ||
1829 Load1->getOperand(4) != Load2->getOperand(4))
1830 return false;
1831
1832 // Index should be Reg0.
1833 if (Load1->getOperand(3) != Load2->getOperand(3))
1834 return false;
1835
1836 // Determine the offsets.
1837 if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1838 isa<ConstantSDNode>(Load2->getOperand(1))) {
1839 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1840 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1841 return true;
1842 }
1843
1844 return false;
1845}
1846
1847/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1848/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1849/// be scheduled togther. On some targets if two loads are loading from
1850/// addresses in the same cache line, it's better if they are scheduled
1851/// together. This function takes two integers that represent the load offsets
1852/// from the common base address. It returns true if it decides it's desirable
1853/// to schedule the two loads together. "NumLoads" is the number of loads that
1854/// have already been scheduled after Load1.
1855///
1856/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1857/// is permanently disabled.
1859 int64_t Offset1, int64_t Offset2,
1860 unsigned NumLoads) const {
1861 // Don't worry about Thumb: just ARM and Thumb2.
1862 if (Subtarget.isThumb1Only()) return false;
1863
1864 assert(Offset2 > Offset1);
1865
1866 if ((Offset2 - Offset1) / 8 > 64)
1867 return false;
1868
1869 // Check if the machine opcodes are different. If they are different
1870 // then we consider them to not be of the same base address,
1871 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1872 // In this case, they are considered to be the same because they are different
1873 // encoding forms of the same basic instruction.
1874 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1875 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1876 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1877 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1878 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1879 return false; // FIXME: overly conservative?
1880
1881 // Four loads in a row should be sufficient.
1882 if (NumLoads >= 3)
1883 return false;
1884
1885 return true;
1886}
1887
1889 const MachineBasicBlock *MBB,
1890 const MachineFunction &MF) const {
1891 // Debug info is never a scheduling boundary. It's necessary to be explicit
1892 // due to the special treatment of IT instructions below, otherwise a
1893 // dbg_value followed by an IT will result in the IT instruction being
1894 // considered a scheduling hazard, which is wrong. It should be the actual
1895 // instruction preceding the dbg_value instruction(s), just like it is
1896 // when debug info is not present.
1897 if (MI.isDebugInstr())
1898 return false;
1899
1900 // Terminators and labels can't be scheduled around.
1901 if (MI.isTerminator() || MI.isPosition())
1902 return true;
1903
1904 // INLINEASM_BR can jump to another block
1905 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
1906 return true;
1907
1908 if (isSEHInstruction(MI))
1909 return true;
1910
1911 // Treat the start of the IT block as a scheduling boundary, but schedule
1912 // t2IT along with all instructions following it.
1913 // FIXME: This is a big hammer. But the alternative is to add all potential
1914 // true and anti dependencies to IT block instructions as implicit operands
1915 // to the t2IT instruction. The added compile time and complexity does not
1916 // seem worth it.
1918 // Make sure to skip any debug instructions
1919 while (++I != MBB->end() && I->isDebugInstr())
1920 ;
1921 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1922 return true;
1923
1924 // Don't attempt to schedule around any instruction that defines
1925 // a stack-oriented pointer, as it's unlikely to be profitable. This
1926 // saves compile time, because it doesn't require every single
1927 // stack slot reference to depend on the instruction that does the
1928 // modification.
1929 // Calls don't actually change the stack pointer, even if they have imp-defs.
1930 // No ARM calling conventions change the stack pointer. (X86 calling
1931 // conventions sometimes do).
1932 if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))
1933 return true;
1934
1935 return false;
1936}
1937
1940 unsigned NumCycles, unsigned ExtraPredCycles,
1941 BranchProbability Probability) const {
1942 if (!NumCycles)
1943 return false;
1944
1945 // If we are optimizing for size, see if the branch in the predecessor can be
1946 // lowered to cbn?z by the constant island lowering pass, and return false if
1947 // so. This results in a shorter instruction sequence.
1948 if (MBB.getParent()->getFunction().hasOptSize()) {
1949 MachineBasicBlock *Pred = *MBB.pred_begin();
1950 if (!Pred->empty()) {
1951 MachineInstr *LastMI = &*Pred->rbegin();
1952 if (LastMI->getOpcode() == ARM::t2Bcc) {
1954 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
1955 if (CmpMI)
1956 return false;
1957 }
1958 }
1959 }
1960 return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
1961 MBB, 0, 0, Probability);
1962}
1963
1966 unsigned TCycles, unsigned TExtra,
1967 MachineBasicBlock &FBB,
1968 unsigned FCycles, unsigned FExtra,
1969 BranchProbability Probability) const {
1970 if (!TCycles)
1971 return false;
1972
1973 // In thumb code we often end up trading one branch for a IT block, and
1974 // if we are cloning the instruction can increase code size. Prevent
1975 // blocks with multiple predecesors from being ifcvted to prevent this
1976 // cloning.
1977 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
1978 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
1979 return false;
1980 }
1981
1982 // Attempt to estimate the relative costs of predication versus branching.
1983 // Here we scale up each component of UnpredCost to avoid precision issue when
1984 // scaling TCycles/FCycles by Probability.
1985 const unsigned ScalingUpFactor = 1024;
1986
1987 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1988 unsigned UnpredCost;
1989 if (!Subtarget.hasBranchPredictor()) {
1990 // When we don't have a branch predictor it's always cheaper to not take a
1991 // branch than take it, so we have to take that into account.
1992 unsigned NotTakenBranchCost = 1;
1993 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1994 unsigned TUnpredCycles, FUnpredCycles;
1995 if (!FCycles) {
1996 // Triangle: TBB is the fallthrough
1997 TUnpredCycles = TCycles + NotTakenBranchCost;
1998 FUnpredCycles = TakenBranchCost;
1999 } else {
2000 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2001 TUnpredCycles = TCycles + TakenBranchCost;
2002 FUnpredCycles = FCycles + NotTakenBranchCost;
2003 // The branch at the end of FBB will disappear when it's predicated, so
2004 // discount it from PredCost.
2005 PredCost -= 1 * ScalingUpFactor;
2006 }
2007 // The total cost is the cost of each path scaled by their probabilites
2008 unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
2009 unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
2010 UnpredCost = TUnpredCost + FUnpredCost;
2011 // When predicating assume that the first IT can be folded away but later
2012 // ones cost one cycle each
2013 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2014 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2015 }
2016 } else {
2017 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
2018 unsigned FUnpredCost =
2019 Probability.getCompl().scale(FCycles * ScalingUpFactor);
2020 UnpredCost = TUnpredCost + FUnpredCost;
2021 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2022 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2023 }
2024
2025 return PredCost <= UnpredCost;
2026}
2027
2028unsigned
2030 unsigned NumInsts) const {
2031 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2032 // ARM has a condition code field in every predicable instruction, using it
2033 // doesn't change code size.
2034 if (!Subtarget.isThumb2())
2035 return 0;
2036
2037 // It's possible that the size of the IT is restricted to a single block.
2038 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2039 return divideCeil(NumInsts, MaxInsts) * 2;
2040}
2041
2042unsigned
2044 // If this branch is likely to be folded into the comparison to form a
2045 // CB(N)Z, then removing it won't reduce code size at all, because that will
2046 // just replace the CB(N)Z with a CMP.
2047 if (MI.getOpcode() == ARM::t2Bcc &&
2049 return 0;
2050
2051 unsigned Size = getInstSizeInBytes(MI);
2052
2053 // For Thumb2, all branches are 32-bit instructions during the if conversion
2054 // pass, but may be replaced with 16-bit instructions during size reduction.
2055 // Since the branches considered by if conversion tend to be forward branches
2056 // over small basic blocks, they are very likely to be in range for the
2057 // narrow instructions, so we assume the final code size will be half what it
2058 // currently is.
2059 if (Subtarget.isThumb2())
2060 Size /= 2;
2061
2062 return Size;
2063}
2064
2065bool
2067 MachineBasicBlock &FMBB) const {
2068 // Reduce false anti-dependencies to let the target's out-of-order execution
2069 // engine do its thing.
2070 return Subtarget.isProfitableToUnpredicate();
2071}
2072
2073/// getInstrPredicate - If instruction is predicated, returns its predicate
2074/// condition, otherwise returns AL. It also returns the condition code
2075/// register by reference.
2077 Register &PredReg) {
2078 int PIdx = MI.findFirstPredOperandIdx();
2079 if (PIdx == -1) {
2080 PredReg = 0;
2081 return ARMCC::AL;
2082 }
2083
2084 PredReg = MI.getOperand(PIdx+1).getReg();
2085 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
2086}
2087
2089 if (Opc == ARM::B)
2090 return ARM::Bcc;
2091 if (Opc == ARM::tB)
2092 return ARM::tBcc;
2093 if (Opc == ARM::t2B)
2094 return ARM::t2Bcc;
2095
2096 llvm_unreachable("Unknown unconditional branch opcode!");
2097}
2098
2100 bool NewMI,
2101 unsigned OpIdx1,
2102 unsigned OpIdx2) const {
2103 switch (MI.getOpcode()) {
2104 case ARM::MOVCCr:
2105 case ARM::t2MOVCCr: {
2106 // MOVCC can be commuted by inverting the condition.
2107 Register PredReg;
2108 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2109 // MOVCC AL can't be inverted. Shouldn't happen.
2110 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2111 return nullptr;
2112 MachineInstr *CommutedMI =
2113 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2114 if (!CommutedMI)
2115 return nullptr;
2116 // After swapping the MOVCC operands, also invert the condition.
2117 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
2119 return CommutedMI;
2120 }
2121 }
2122 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2123}
2124
2125/// Identify instructions that can be folded into a MOVCC instruction, and
2126/// return the defining instruction.
2128ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2129 const TargetInstrInfo *TII) const {
2130 if (!Reg.isVirtual())
2131 return nullptr;
2132 if (!MRI.hasOneNonDBGUse(Reg))
2133 return nullptr;
2134 MachineInstr *MI = MRI.getVRegDef(Reg);
2135 if (!MI)
2136 return nullptr;
2137 // Check if MI can be predicated and folded into the MOVCC.
2138 if (!isPredicable(*MI))
2139 return nullptr;
2140 // Check if MI has any non-dead defs or physreg uses. This also detects
2141 // predicated instructions which will be reading CPSR.
2142 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
2143 // Reject frame index operands, PEI can't handle the predicated pseudos.
2144 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2145 return nullptr;
2146 if (!MO.isReg())
2147 continue;
2148 // MI can't have any tied operands, that would conflict with predication.
2149 if (MO.isTied())
2150 return nullptr;
2151 if (MO.getReg().isPhysical())
2152 return nullptr;
2153 if (MO.isDef() && !MO.isDead())
2154 return nullptr;
2155 }
2156 bool DontMoveAcrossStores = true;
2157 if (!MI->isSafeToMove(DontMoveAcrossStores))
2158 return nullptr;
2159 return MI;
2160}
2161
2164 unsigned &TrueOp, unsigned &FalseOp,
2165 bool &Optimizable) const {
2166 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2167 "Unknown select instruction");
2168 // MOVCC operands:
2169 // 0: Def.
2170 // 1: True use.
2171 // 2: False use.
2172 // 3: Condition code.
2173 // 4: CPSR use.
2174 TrueOp = 1;
2175 FalseOp = 2;
2176 Cond.push_back(MI.getOperand(3));
2177 Cond.push_back(MI.getOperand(4));
2178 // We can always fold a def.
2179 Optimizable = true;
2180 return false;
2181}
2182
2186 bool PreferFalse) const {
2187 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2188 "Unknown select instruction");
2189 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2190 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
2191 bool Invert = !DefMI;
2192 if (!DefMI)
2193 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
2194 if (!DefMI)
2195 return nullptr;
2196
2197 // Find new register class to use.
2198 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
2199 MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
2200 Register DestReg = MI.getOperand(0).getReg();
2201 const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
2202 const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
2203 if (!MRI.constrainRegClass(DestReg, FalseClass))
2204 return nullptr;
2205 if (!MRI.constrainRegClass(DestReg, TrueClass))
2206 return nullptr;
2207
2208 // Create a new predicated version of DefMI.
2209 // Rfalse is the first use.
2210 MachineInstrBuilder NewMI =
2211 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
2212
2213 // Copy all the DefMI operands, excluding its (null) predicate.
2214 const MCInstrDesc &DefDesc = DefMI->getDesc();
2215 for (unsigned i = 1, e = DefDesc.getNumOperands();
2216 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2217 NewMI.add(DefMI->getOperand(i));
2218
2219 unsigned CondCode = MI.getOperand(3).getImm();
2220 if (Invert)
2222 else
2223 NewMI.addImm(CondCode);
2224 NewMI.add(MI.getOperand(4));
2225
2226 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2227 if (NewMI->hasOptionalDef())
2228 NewMI.add(condCodeOp());
2229
2230 // The output register value when the predicate is false is an implicit
2231 // register operand tied to the first def.
2232 // The tie makes the register allocator ensure the FalseReg is allocated the
2233 // same register as operand 0.
2234 FalseReg.setImplicit();
2235 NewMI.add(FalseReg);
2236 NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
2237
2238 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2239 SeenMIs.insert(NewMI);
2240 SeenMIs.erase(DefMI);
2241
2242 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2243 // DefMI would be invalid when tranferred inside the loop. Checking for a
2244 // loop is expensive, but at least remove kill flags if they are in different
2245 // BBs.
2246 if (DefMI->getParent() != MI.getParent())
2247 NewMI->clearKillInfo();
2248
2249 // The caller will erase MI, but not DefMI.
2251 return NewMI;
2252}
2253
2254/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2255/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2256/// def operand.
2257///
2258/// This will go away once we can teach tblgen how to set the optional CPSR def
2259/// operand itself.
2263};
2264
2266 {ARM::ADDSri, ARM::ADDri},
2267 {ARM::ADDSrr, ARM::ADDrr},
2268 {ARM::ADDSrsi, ARM::ADDrsi},
2269 {ARM::ADDSrsr, ARM::ADDrsr},
2270
2271 {ARM::SUBSri, ARM::SUBri},
2272 {ARM::SUBSrr, ARM::SUBrr},
2273 {ARM::SUBSrsi, ARM::SUBrsi},
2274 {ARM::SUBSrsr, ARM::SUBrsr},
2275
2276 {ARM::RSBSri, ARM::RSBri},
2277 {ARM::RSBSrsi, ARM::RSBrsi},
2278 {ARM::RSBSrsr, ARM::RSBrsr},
2279
2280 {ARM::tADDSi3, ARM::tADDi3},
2281 {ARM::tADDSi8, ARM::tADDi8},
2282 {ARM::tADDSrr, ARM::tADDrr},
2283 {ARM::tADCS, ARM::tADC},
2284
2285 {ARM::tSUBSi3, ARM::tSUBi3},
2286 {ARM::tSUBSi8, ARM::tSUBi8},
2287 {ARM::tSUBSrr, ARM::tSUBrr},
2288 {ARM::tSBCS, ARM::tSBC},
2289 {ARM::tRSBS, ARM::tRSB},
2290 {ARM::tLSLSri, ARM::tLSLri},
2291
2292 {ARM::t2ADDSri, ARM::t2ADDri},
2293 {ARM::t2ADDSrr, ARM::t2ADDrr},
2294 {ARM::t2ADDSrs, ARM::t2ADDrs},
2295
2296 {ARM::t2SUBSri, ARM::t2SUBri},
2297 {ARM::t2SUBSrr, ARM::t2SUBrr},
2298 {ARM::t2SUBSrs, ARM::t2SUBrs},
2299
2300 {ARM::t2RSBSri, ARM::t2RSBri},
2301 {ARM::t2RSBSrs, ARM::t2RSBrs},
2302};
2303
2304unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2305 for (const auto &Entry : AddSubFlagsOpcodeMap)
2306 if (OldOpc == Entry.PseudoOpc)
2307 return Entry.MachineOpc;
2308 return 0;
2309}
2310
2313 const DebugLoc &dl, Register DestReg,
2314 Register BaseReg, int NumBytes,
2315 ARMCC::CondCodes Pred, Register PredReg,
2316 const ARMBaseInstrInfo &TII,
2317 unsigned MIFlags) {
2318 if (NumBytes == 0 && DestReg != BaseReg) {
2319 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
2320 .addReg(BaseReg, RegState::Kill)
2321 .add(predOps(Pred, PredReg))
2322 .add(condCodeOp())
2323 .setMIFlags(MIFlags);
2324 return;
2325 }
2326
2327 bool isSub = NumBytes < 0;
2328 if (isSub) NumBytes = -NumBytes;
2329
2330 while (NumBytes) {
2331 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
2332 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
2333 assert(ThisVal && "Didn't extract field correctly");
2334
2335 // We will handle these bits from offset, clear them.
2336 NumBytes &= ~ThisVal;
2337
2338 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2339
2340 // Build the new ADD / SUB.
2341 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2342 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
2343 .addReg(BaseReg, RegState::Kill)
2344 .addImm(ThisVal)
2345 .add(predOps(Pred, PredReg))
2346 .add(condCodeOp())
2347 .setMIFlags(MIFlags);
2348 BaseReg = DestReg;
2349 }
2350}
2351
2354 unsigned NumBytes) {
2355 // This optimisation potentially adds lots of load and store
2356 // micro-operations, it's only really a great benefit to code-size.
2357 if (!Subtarget.hasMinSize())
2358 return false;
2359
2360 // If only one register is pushed/popped, LLVM can use an LDR/STR
2361 // instead. We can't modify those so make sure we're dealing with an
2362 // instruction we understand.
2363 bool IsPop = isPopOpcode(MI->getOpcode());
2364 bool IsPush = isPushOpcode(MI->getOpcode());
2365 if (!IsPush && !IsPop)
2366 return false;
2367
2368 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2369 MI->getOpcode() == ARM::VLDMDIA_UPD;
2370 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2371 MI->getOpcode() == ARM::tPOP ||
2372 MI->getOpcode() == ARM::tPOP_RET;
2373
2374 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2375 MI->getOperand(1).getReg() == ARM::SP)) &&
2376 "trying to fold sp update into non-sp-updating push/pop");
2377
2378 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2379 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2380 // if this is violated.
2381 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2382 return false;
2383
2384 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2385 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2386 int RegListIdx = IsT1PushPop ? 2 : 4;
2387
2388 // Calculate the space we'll need in terms of registers.
2389 unsigned RegsNeeded;
2390 const TargetRegisterClass *RegClass;
2391 if (IsVFPPushPop) {
2392 RegsNeeded = NumBytes / 8;
2393 RegClass = &ARM::DPRRegClass;
2394 } else {
2395 RegsNeeded = NumBytes / 4;
2396 RegClass = &ARM::GPRRegClass;
2397 }
2398
2399 // We're going to have to strip all list operands off before
2400 // re-adding them since the order matters, so save the existing ones
2401 // for later.
2403
2404 // We're also going to need the first register transferred by this
2405 // instruction, which won't necessarily be the first register in the list.
2406 unsigned FirstRegEnc = -1;
2407
2409 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2410 MachineOperand &MO = MI->getOperand(i);
2411 RegList.push_back(MO);
2412
2413 if (MO.isReg() && !MO.isImplicit() &&
2414 TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
2415 FirstRegEnc = TRI->getEncodingValue(MO.getReg());
2416 }
2417
2418 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
2419
2420 // Now try to find enough space in the reglist to allocate NumBytes.
2421 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2422 --CurRegEnc) {
2423 MCRegister CurReg = RegClass->getRegister(CurRegEnc);
2424 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))
2425 continue;
2426 if (!IsPop) {
2427 // Pushing any register is completely harmless, mark the register involved
2428 // as undef since we don't care about its value and must not restore it
2429 // during stack unwinding.
2430 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
2431 false, false, true));
2432 --RegsNeeded;
2433 continue;
2434 }
2435
2436 // However, we can only pop an extra register if it's not live. For
2437 // registers live within the function we might clobber a return value
2438 // register; the other way a register can be live here is if it's
2439 // callee-saved.
2440 if (isCalleeSavedRegister(CurReg, CSRegs) ||
2441 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
2443 // VFP pops don't allow holes in the register list, so any skip is fatal
2444 // for our transformation. GPR pops do, so we should just keep looking.
2445 if (IsVFPPushPop)
2446 return false;
2447 else
2448 continue;
2449 }
2450
2451 // Mark the unimportant registers as <def,dead> in the POP.
2452 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
2453 true));
2454 --RegsNeeded;
2455 }
2456
2457 if (RegsNeeded > 0)
2458 return false;
2459
2460 // Finally we know we can profitably perform the optimisation so go
2461 // ahead: strip all existing registers off and add them back again
2462 // in the right order.
2463 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2464 MI->removeOperand(i);
2465
2466 // Add the complete list back in.
2467 MachineInstrBuilder MIB(MF, &*MI);
2468 for (const MachineOperand &MO : llvm::reverse(RegList))
2469 MIB.add(MO);
2470
2471 return true;
2472}
2473
2474bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2475 Register FrameReg, int &Offset,
2476 const ARMBaseInstrInfo &TII) {
2477 unsigned Opcode = MI.getOpcode();
2478 const MCInstrDesc &Desc = MI.getDesc();
2479 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2480 bool isSub = false;
2481
2482 // Memory operands in inline assembly always use AddrMode2.
2483 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2485
2486 if (Opcode == ARM::ADDri) {
2487 Offset += MI.getOperand(FrameRegIdx+1).getImm();
2488 if (Offset == 0) {
2489 // Turn it into a move.
2490 MI.setDesc(TII.get(ARM::MOVr));
2491 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2492 MI.removeOperand(FrameRegIdx+1);
2493 Offset = 0;
2494 return true;
2495 } else if (Offset < 0) {
2496 Offset = -Offset;
2497 isSub = true;
2498 MI.setDesc(TII.get(ARM::SUBri));
2499 }
2500
2501 // Common case: small offset, fits into instruction.
2502 if (ARM_AM::getSOImmVal(Offset) != -1) {
2503 // Replace the FrameIndex with sp / fp
2504 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2505 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2506 Offset = 0;
2507 return true;
2508 }
2509
2510 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2511 // as possible.
2512 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2513 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
2514
2515 // We will handle these bits from offset, clear them.
2516 Offset &= ~ThisImmVal;
2517
2518 // Get the properly encoded SOImmVal field.
2519 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2520 "Bit extraction didn't work?");
2521 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2522 } else {
2523 unsigned ImmIdx = 0;
2524 int InstrOffs = 0;
2525 unsigned NumBits = 0;
2526 unsigned Scale = 1;
2527 switch (AddrMode) {
2529 ImmIdx = FrameRegIdx + 1;
2530 InstrOffs = MI.getOperand(ImmIdx).getImm();
2531 NumBits = 12;
2532 break;
2533 case ARMII::AddrMode2:
2534 ImmIdx = FrameRegIdx+2;
2535 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2536 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2537 InstrOffs *= -1;
2538 NumBits = 12;
2539 break;
2540 case ARMII::AddrMode3:
2541 ImmIdx = FrameRegIdx+2;
2542 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2543 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2544 InstrOffs *= -1;
2545 NumBits = 8;
2546 break;
2547 case ARMII::AddrMode4:
2548 case ARMII::AddrMode6:
2549 // Can't fold any offset even if it's zero.
2550 return false;
2551 case ARMII::AddrMode5:
2552 ImmIdx = FrameRegIdx+1;
2553 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2554 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2555 InstrOffs *= -1;
2556 NumBits = 8;
2557 Scale = 4;
2558 break;
2560 ImmIdx = FrameRegIdx+1;
2561 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2562 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2563 InstrOffs *= -1;
2564 NumBits = 8;
2565 Scale = 2;
2566 break;
2570 ImmIdx = FrameRegIdx+1;
2571 InstrOffs = MI.getOperand(ImmIdx).getImm();
2572 NumBits = 7;
2573 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2574 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2575 break;
2576 default:
2577 llvm_unreachable("Unsupported addressing mode!");
2578 }
2579
2580 Offset += InstrOffs * Scale;
2581 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2582 if (Offset < 0) {
2583 Offset = -Offset;
2584 isSub = true;
2585 }
2586
2587 // Attempt to fold address comp. if opcode has offset bits
2588 if (NumBits > 0) {
2589 // Common case: small offset, fits into instruction.
2590 MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2591 int ImmedOffset = Offset / Scale;
2592 unsigned Mask = (1 << NumBits) - 1;
2593 if ((unsigned)Offset <= Mask * Scale) {
2594 // Replace the FrameIndex with sp
2595 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2596 // FIXME: When addrmode2 goes away, this will simplify (like the
2597 // T2 version), as the LDR.i12 versions don't need the encoding
2598 // tricks for the offset value.
2599 if (isSub) {
2601 ImmedOffset = -ImmedOffset;
2602 else
2603 ImmedOffset |= 1 << NumBits;
2604 }
2605 ImmOp.ChangeToImmediate(ImmedOffset);
2606 Offset = 0;
2607 return true;
2608 }
2609
2610 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2611 ImmedOffset = ImmedOffset & Mask;
2612 if (isSub) {
2614 ImmedOffset = -ImmedOffset;
2615 else
2616 ImmedOffset |= 1 << NumBits;
2617 }
2618 ImmOp.ChangeToImmediate(ImmedOffset);
2619 Offset &= ~(Mask*Scale);
2620 }
2621 }
2622
2623 Offset = (isSub) ? -Offset : Offset;
2624 return Offset == 0;
2625}
2626
2627/// analyzeCompare - For a comparison instruction, return the source registers
2628/// in SrcReg and SrcReg2 if having two register operands, and the value it
2629/// compares against in CmpValue. Return true if the comparison instruction
2630/// can be analyzed.
2632 Register &SrcReg2, int64_t &CmpMask,
2633 int64_t &CmpValue) const {
2634 switch (MI.getOpcode()) {
2635 default: break;
2636 case ARM::CMPri:
2637 case ARM::t2CMPri:
2638 case ARM::tCMPi8:
2639 SrcReg = MI.getOperand(0).getReg();
2640 SrcReg2 = 0;
2641 CmpMask = ~0;
2642 CmpValue = MI.getOperand(1).getImm();
2643 return true;
2644 case ARM::CMPrr:
2645 case ARM::t2CMPrr:
2646 case ARM::tCMPr:
2647 SrcReg = MI.getOperand(0).getReg();
2648 SrcReg2 = MI.getOperand(1).getReg();
2649 CmpMask = ~0;
2650 CmpValue = 0;
2651 return true;
2652 case ARM::TSTri:
2653 case ARM::t2TSTri:
2654 SrcReg = MI.getOperand(0).getReg();
2655 SrcReg2 = 0;
2656 CmpMask = MI.getOperand(1).getImm();
2657 CmpValue = 0;
2658 return true;
2659 }
2660
2661 return false;
2662}
2663
2664/// isSuitableForMask - Identify a suitable 'and' instruction that
2665/// operates on the given source register and applies the same mask
2666/// as a 'tst' instruction. Provide a limited look-through for copies.
2667/// When successful, MI will hold the found instruction.
2669 int CmpMask, bool CommonUse) {
2670 switch (MI->getOpcode()) {
2671 case ARM::ANDri:
2672 case ARM::t2ANDri:
2673 if (CmpMask != MI->getOperand(2).getImm())
2674 return false;
2675 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2676 return true;
2677 break;
2678 }
2679
2680 return false;
2681}
2682
2683/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2684/// the condition code if we modify the instructions such that flags are
2685/// set by ADD(a,b,X).
2687 switch (CC) {
2688 default: return ARMCC::AL;
2689 case ARMCC::HS: return ARMCC::LO;
2690 case ARMCC::LO: return ARMCC::HS;
2691 case ARMCC::VS: return ARMCC::VS;
2692 case ARMCC::VC: return ARMCC::VC;
2693 }
2694}
2695
2696/// isRedundantFlagInstr - check whether the first instruction, whose only
2697/// purpose is to update flags, can be made redundant.
2698/// CMPrr can be made redundant by SUBrr if the operands are the same.
2699/// CMPri can be made redundant by SUBri if the operands are the same.
2700/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2701/// This function can be extended later on.
2702inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2703 Register SrcReg, Register SrcReg2,
2704 int64_t ImmValue,
2705 const MachineInstr *OI,
2706 bool &IsThumb1) {
2707 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2708 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2709 ((OI->getOperand(1).getReg() == SrcReg &&
2710 OI->getOperand(2).getReg() == SrcReg2) ||
2711 (OI->getOperand(1).getReg() == SrcReg2 &&
2712 OI->getOperand(2).getReg() == SrcReg))) {
2713 IsThumb1 = false;
2714 return true;
2715 }
2716
2717 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2718 ((OI->getOperand(2).getReg() == SrcReg &&
2719 OI->getOperand(3).getReg() == SrcReg2) ||
2720 (OI->getOperand(2).getReg() == SrcReg2 &&
2721 OI->getOperand(3).getReg() == SrcReg))) {
2722 IsThumb1 = true;
2723 return true;
2724 }
2725
2726 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2727 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2728 OI->getOperand(1).getReg() == SrcReg &&
2729 OI->getOperand(2).getImm() == ImmValue) {
2730 IsThumb1 = false;
2731 return true;
2732 }
2733
2734 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2735 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2736 OI->getOperand(2).getReg() == SrcReg &&
2737 OI->getOperand(3).getImm() == ImmValue) {
2738 IsThumb1 = true;
2739 return true;
2740 }
2741
2742 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2743 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2744 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2745 OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
2746 OI->getOperand(0).getReg() == SrcReg &&
2747 OI->getOperand(1).getReg() == SrcReg2) {
2748 IsThumb1 = false;
2749 return true;
2750 }
2751
2752 if (CmpI->getOpcode() == ARM::tCMPr &&
2753 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2754 OI->getOpcode() == ARM::tADDrr) &&
2755 OI->getOperand(0).getReg() == SrcReg &&
2756 OI->getOperand(2).getReg() == SrcReg2) {
2757 IsThumb1 = true;
2758 return true;
2759 }
2760
2761 return false;
2762}
2763
2764static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2765 switch (MI->getOpcode()) {
2766 default: return false;
2767 case ARM::tLSLri:
2768 case ARM::tLSRri:
2769 case ARM::tLSLrr:
2770 case ARM::tLSRrr:
2771 case ARM::tSUBrr:
2772 case ARM::tADDrr:
2773 case ARM::tADDi3:
2774 case ARM::tADDi8:
2775 case ARM::tSUBi3:
2776 case ARM::tSUBi8:
2777 case ARM::tMUL:
2778 case ARM::tADC:
2779 case ARM::tSBC:
2780 case ARM::tRSB:
2781 case ARM::tAND:
2782 case ARM::tORR:
2783 case ARM::tEOR:
2784 case ARM::tBIC:
2785 case ARM::tMVN:
2786 case ARM::tASRri:
2787 case ARM::tASRrr:
2788 case ARM::tROR:
2789 IsThumb1 = true;
2790 [[fallthrough]];
2791 case ARM::RSBrr:
2792 case ARM::RSBri:
2793 case ARM::RSCrr:
2794 case ARM::RSCri:
2795 case ARM::ADDrr:
2796 case ARM::ADDri:
2797 case ARM::ADCrr:
2798 case ARM::ADCri:
2799 case ARM::SUBrr:
2800 case ARM::SUBri:
2801 case ARM::SBCrr:
2802 case ARM::SBCri:
2803 case ARM::t2RSBri:
2804 case ARM::t2ADDrr:
2805 case ARM::t2ADDri:
2806 case ARM::t2ADCrr:
2807 case ARM::t2ADCri:
2808 case ARM::t2SUBrr:
2809 case ARM::t2SUBri:
2810 case ARM::t2SBCrr:
2811 case ARM::t2SBCri:
2812 case ARM::ANDrr:
2813 case ARM::ANDri:
2814 case ARM::ANDrsr:
2815 case ARM::ANDrsi:
2816 case ARM::t2ANDrr:
2817 case ARM::t2ANDri:
2818 case ARM::t2ANDrs:
2819 case ARM::ORRrr:
2820 case ARM::ORRri:
2821 case ARM::ORRrsr:
2822 case ARM::ORRrsi:
2823 case ARM::t2ORRrr:
2824 case ARM::t2ORRri:
2825 case ARM::t2ORRrs:
2826 case ARM::EORrr:
2827 case ARM::EORri:
2828 case ARM::EORrsr:
2829 case ARM::EORrsi:
2830 case ARM::t2EORrr:
2831 case ARM::t2EORri:
2832 case ARM::t2EORrs:
2833 case ARM::BICri:
2834 case ARM::BICrr:
2835 case ARM::BICrsi:
2836 case ARM::BICrsr:
2837 case ARM::t2BICri:
2838 case ARM::t2BICrr:
2839 case ARM::t2BICrs:
2840 case ARM::t2LSRri:
2841 case ARM::t2LSRrr:
2842 case ARM::t2LSLri:
2843 case ARM::t2LSLrr:
2844 case ARM::MOVsr:
2845 case ARM::MOVsi:
2846 return true;
2847 }
2848}
2849
2850/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2851/// comparison into one that sets the zero bit in the flags register;
2852/// Remove a redundant Compare instruction if an earlier instruction can set the
2853/// flags in the same way as Compare.
2854/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2855/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2856/// condition code of instructions which use the flags.
2858 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2859 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2860 // Get the unique definition of SrcReg.
2861 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2862 if (!MI) return false;
2863
2864 // Masked compares sometimes use the same register as the corresponding 'and'.
2865 if (CmpMask != ~0) {
2866 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
2867 MI = nullptr;
2869 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
2870 UI != UE; ++UI) {
2871 if (UI->getParent() != CmpInstr.getParent())
2872 continue;
2873 MachineInstr *PotentialAND = &*UI;
2874 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2875 isPredicated(*PotentialAND))
2876 continue;
2877 MI = PotentialAND;
2878 break;
2879 }
2880 if (!MI) return false;
2881 }
2882 }
2883
2884 // Get ready to iterate backward from CmpInstr.
2885 MachineBasicBlock::iterator I = CmpInstr, E = MI,
2886 B = CmpInstr.getParent()->begin();
2887
2888 // Early exit if CmpInstr is at the beginning of the BB.
2889 if (I == B) return false;
2890
2891 // There are two possible candidates which can be changed to set CPSR:
2892 // One is MI, the other is a SUB or ADD instruction.
2893 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2894 // ADDr[ri](r1, r2, X).
2895 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2896 MachineInstr *SubAdd = nullptr;
2897 if (SrcReg2 != 0)
2898 // MI is not a candidate for CMPrr.
2899 MI = nullptr;
2900 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2901 // Conservatively refuse to convert an instruction which isn't in the same
2902 // BB as the comparison.
2903 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2904 // Thus we cannot return here.
2905 if (CmpInstr.getOpcode() == ARM::CMPri ||
2906 CmpInstr.getOpcode() == ARM::t2CMPri ||
2907 CmpInstr.getOpcode() == ARM::tCMPi8)
2908 MI = nullptr;
2909 else
2910 return false;
2911 }
2912
2913 bool IsThumb1 = false;
2914 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2915 return false;
2916
2917 // We also want to do this peephole for cases like this: if (a*b == 0),
2918 // and optimise away the CMP instruction from the generated code sequence:
2919 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2920 // resulting from the select instruction, but these MOVS instructions for
2921 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2922 // However, if we only have MOVS instructions in between the CMP and the
2923 // other instruction (the MULS in this example), then the CPSR is dead so we
2924 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2925 // reordering and then continue the analysis hoping we can eliminate the
2926 // CMP. This peephole works on the vregs, so is still in SSA form. As a
2927 // consequence, the movs won't redefine/kill the MUL operands which would
2928 // make this reordering illegal.
2930 if (MI && IsThumb1) {
2931 --I;
2932 if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
2933 bool CanReorder = true;
2934 for (; I != E; --I) {
2935 if (I->getOpcode() != ARM::tMOVi8) {
2936 CanReorder = false;
2937 break;
2938 }
2939 }
2940 if (CanReorder) {
2941 MI = MI->removeFromParent();
2942 E = CmpInstr;
2943 CmpInstr.getParent()->insert(E, MI);
2944 }
2945 }
2946 I = CmpInstr;
2947 E = MI;
2948 }
2949
2950 // Check that CPSR isn't set between the comparison instruction and the one we
2951 // want to change. At the same time, search for SubAdd.
2952 bool SubAddIsThumb1 = false;
2953 do {
2954 const MachineInstr &Instr = *--I;
2955
2956 // Check whether CmpInstr can be made redundant by the current instruction.
2957 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
2958 SubAddIsThumb1)) {
2959 SubAdd = &*I;
2960 break;
2961 }
2962
2963 // Allow E (which was initially MI) to be SubAdd but do not search before E.
2964 if (I == E)
2965 break;
2966
2967 if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2968 Instr.readsRegister(ARM::CPSR, TRI))
2969 // This instruction modifies or uses CPSR after the one we want to
2970 // change. We can't do this transformation.
2971 return false;
2972
2973 if (I == B) {
2974 // In some cases, we scan the use-list of an instruction for an AND;
2975 // that AND is in the same BB, but may not be scheduled before the
2976 // corresponding TST. In that case, bail out.
2977 //
2978 // FIXME: We could try to reschedule the AND.
2979 return false;
2980 }
2981 } while (true);
2982
2983 // Return false if no candidates exist.
2984 if (!MI && !SubAdd)
2985 return false;
2986
2987 // If we found a SubAdd, use it as it will be closer to the CMP
2988 if (SubAdd) {
2989 MI = SubAdd;
2990 IsThumb1 = SubAddIsThumb1;
2991 }
2992
2993 // We can't use a predicated instruction - it doesn't always write the flags.
2994 if (isPredicated(*MI))
2995 return false;
2996
2997 // Scan forward for the use of CPSR
2998 // When checking against MI: if it's a conditional code that requires
2999 // checking of the V bit or C bit, then this is not safe to do.
3000 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3001 // If we are done with the basic block, we need to check whether CPSR is
3002 // live-out.
3004 OperandsToUpdate;
3005 bool isSafe = false;
3006 I = CmpInstr;
3007 E = CmpInstr.getParent()->end();
3008 while (!isSafe && ++I != E) {
3009 const MachineInstr &Instr = *I;
3010 for (unsigned IO = 0, EO = Instr.getNumOperands();
3011 !isSafe && IO != EO; ++IO) {
3012 const MachineOperand &MO = Instr.getOperand(IO);
3013 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
3014 isSafe = true;
3015 break;
3016 }
3017 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3018 continue;
3019 if (MO.isDef()) {
3020 isSafe = true;
3021 break;
3022 }
3023 // Condition code is after the operand before CPSR except for VSELs.
3025 bool IsInstrVSel = true;
3026 switch (Instr.getOpcode()) {
3027 default:
3028 IsInstrVSel = false;
3029 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
3030 break;
3031 case ARM::VSELEQD:
3032 case ARM::VSELEQS:
3033 case ARM::VSELEQH:
3034 CC = ARMCC::EQ;
3035 break;
3036 case ARM::VSELGTD:
3037 case ARM::VSELGTS:
3038 case ARM::VSELGTH:
3039 CC = ARMCC::GT;
3040 break;
3041 case ARM::VSELGED:
3042 case ARM::VSELGES:
3043 case ARM::VSELGEH:
3044 CC = ARMCC::GE;
3045 break;
3046 case ARM::VSELVSD:
3047 case ARM::VSELVSS:
3048 case ARM::VSELVSH:
3049 CC = ARMCC::VS;
3050 break;
3051 }
3052
3053 if (SubAdd) {
3054 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3055 // on CMP needs to be updated to be based on SUB.
3056 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3057 // needs to be modified.
3058 // Push the condition code operands to OperandsToUpdate.
3059 // If it is safe to remove CmpInstr, the condition code of these
3060 // operands will be modified.
3061 unsigned Opc = SubAdd->getOpcode();
3062 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3063 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3064 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3065 Opc == ARM::tSUBi8;
3066 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3067 if (!IsSub ||
3068 (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
3069 SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
3070 // VSel doesn't support condition code update.
3071 if (IsInstrVSel)
3072 return false;
3073 // Ensure we can swap the condition.
3074 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3075 if (NewCC == ARMCC::AL)
3076 return false;
3077 OperandsToUpdate.push_back(
3078 std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
3079 }
3080 } else {
3081 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3082 switch (CC) {
3083 case ARMCC::EQ: // Z
3084 case ARMCC::NE: // Z
3085 case ARMCC::MI: // N
3086 case ARMCC::PL: // N
3087 case ARMCC::AL: // none
3088 // CPSR can be used multiple times, we should continue.
3089 break;
3090 case ARMCC::HS: // C
3091 case ARMCC::LO: // C
3092 case ARMCC::VS: // V
3093 case ARMCC::VC: // V
3094 case ARMCC::HI: // C Z
3095 case ARMCC::LS: // C Z
3096 case ARMCC::GE: // N V
3097 case ARMCC::LT: // N V
3098 case ARMCC::GT: // Z N V
3099 case ARMCC::LE: // Z N V
3100 // The instruction uses the V bit or C bit which is not safe.
3101 return false;
3102 }
3103 }
3104 }
3105 }
3106
3107 // If CPSR is not killed nor re-defined, we should check whether it is
3108 // live-out. If it is live-out, do not optimize.
3109 if (!isSafe) {
3110 MachineBasicBlock *MBB = CmpInstr.getParent();
3111 for (MachineBasicBlock *Succ : MBB->successors())
3112 if (Succ->isLiveIn(ARM::CPSR))
3113 return false;
3114 }
3115
3116 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3117 // set CPSR so this is represented as an explicit output)
3118 if (!IsThumb1) {
3119 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3120 MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);
3121 MI->getOperand(CPSRRegNum).setIsDef(true);
3122 }
3123 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3124 CmpInstr.eraseFromParent();
3125
3126 // Modify the condition code of operands in OperandsToUpdate.
3127 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3128 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3129 for (auto &[MO, Cond] : OperandsToUpdate)
3130 MO->setImm(Cond);
3131
3132 MI->clearRegisterDeads(ARM::CPSR);
3133
3134 return true;
3135}
3136
3138 // Do not sink MI if it might be used to optimize a redundant compare.
3139 // We heuristically only look at the instruction immediately following MI to
3140 // avoid potentially searching the entire basic block.
3141 if (isPredicated(MI))
3142 return true;
3144 ++Next;
3145 Register SrcReg, SrcReg2;
3146 int64_t CmpMask, CmpValue;
3147 bool IsThumb1;
3148 if (Next != MI.getParent()->end() &&
3149 analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3150 isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
3151 return false;
3152 return true;
3153}
3154
3156 Register Reg,
3157 MachineRegisterInfo *MRI) const {
3158 // Fold large immediates into add, sub, or, xor.
3159 unsigned DefOpc = DefMI.getOpcode();
3160 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3161 DefOpc != ARM::tMOVi32imm)
3162 return false;
3163 if (!DefMI.getOperand(1).isImm())
3164 // Could be t2MOVi32imm @xx
3165 return false;
3166
3167 if (!MRI->hasOneNonDBGUse(Reg))
3168 return false;
3169
3170 const MCInstrDesc &DefMCID = DefMI.getDesc();
3171 if (DefMCID.hasOptionalDef()) {
3172 unsigned NumOps = DefMCID.getNumOperands();
3173 const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
3174 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3175 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3176 // to delete DefMI.
3177 return false;
3178 }
3179
3180 const MCInstrDesc &UseMCID = UseMI.getDesc();
3181 if (UseMCID.hasOptionalDef()) {
3182 unsigned NumOps = UseMCID.getNumOperands();
3183 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
3184 // If the instruction sets the flag, do not attempt this optimization
3185 // since it may change the semantics of the code.
3186 return false;
3187 }
3188
3189 unsigned UseOpc = UseMI.getOpcode();
3190 unsigned NewUseOpc = 0;
3191 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
3192 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3193 bool Commute = false;
3194 switch (UseOpc) {
3195 default: return false;
3196 case ARM::SUBrr:
3197 case ARM::ADDrr:
3198 case ARM::ORRrr:
3199 case ARM::EORrr:
3200 case ARM::t2SUBrr:
3201 case ARM::t2ADDrr:
3202 case ARM::t2ORRrr:
3203 case ARM::t2EORrr: {
3204 Commute = UseMI.getOperand(2).getReg() != Reg;
3205 switch (UseOpc) {
3206 default: break;
3207 case ARM::ADDrr:
3208 case ARM::SUBrr:
3209 if (UseOpc == ARM::SUBrr && Commute)
3210 return false;
3211
3212 // ADD/SUB are special because they're essentially the same operation, so
3213 // we can handle a larger range of immediates.
3214 if (ARM_AM::isSOImmTwoPartVal(ImmVal))
3215 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3216 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
3217 ImmVal = -ImmVal;
3218 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3219 } else
3220 return false;
3221 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3222 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3223 break;
3224 case ARM::ORRrr:
3225 case ARM::EORrr:
3226 if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
3227 return false;
3228 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
3229 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
3230 switch (UseOpc) {
3231 default: break;
3232 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3233 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3234 }
3235 break;
3236 case ARM::t2ADDrr:
3237 case ARM::t2SUBrr: {
3238 if (UseOpc == ARM::t2SUBrr && Commute)
3239 return false;
3240
3241 // ADD/SUB are special because they're essentially the same operation, so
3242 // we can handle a larger range of immediates.
3243 const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;
3244 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3245 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3246 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3247 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3248 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
3249 ImmVal = -ImmVal;
3250 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3251 } else
3252 return false;
3253 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3254 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3255 break;
3256 }
3257 case ARM::t2ORRrr:
3258 case ARM::t2EORrr:
3259 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
3260 return false;
3261 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
3262 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
3263 switch (UseOpc) {
3264 default: break;
3265 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3266 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3267 }
3268 break;
3269 }
3270 }
3271 }
3272
3273 unsigned OpIdx = Commute ? 2 : 1;
3274 Register Reg1 = UseMI.getOperand(OpIdx).getReg();
3275 bool isKill = UseMI.getOperand(OpIdx).isKill();
3276 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3277 Register NewReg = MRI->createVirtualRegister(TRC);
3278 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
3279 NewReg)
3280 .addReg(Reg1, getKillRegState(isKill))
3281 .addImm(SOImmValV1)
3283 .add(condCodeOp());
3284 UseMI.setDesc(get(NewUseOpc));
3285 UseMI.getOperand(1).setReg(NewReg);
3286 UseMI.getOperand(1).setIsKill();
3287 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
3288 DefMI.eraseFromParent();
3289 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3290 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3291 // Then the below code will not be needed, as the input/output register
3292 // classes will be rgpr or gprSP.
3293 // For now, we fix the UseMI operand explicitly here:
3294 switch(NewUseOpc){
3295 case ARM::t2ADDspImm:
3296 case ARM::t2SUBspImm:
3297 case ARM::t2ADDri:
3298 case ARM::t2SUBri:
3299 MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
3300 }
3301 return true;
3302}
3303
3304static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3305 const MachineInstr &MI) {
3306 switch (MI.getOpcode()) {
3307 default: {
3308 const MCInstrDesc &Desc = MI.getDesc();
3309 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
3310 assert(UOps >= 0 && "bad # UOps");
3311 return UOps;
3312 }
3313
3314 case ARM::LDRrs:
3315 case ARM::LDRBrs:
3316 case ARM::STRrs:
3317 case ARM::STRBrs: {
3318 unsigned ShOpVal = MI.getOperand(3).getImm();
3319 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3320 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3321 if (!isSub &&
3322 (ShImm == 0 ||
3323 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3324 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3325 return 1;
3326 return 2;
3327 }
3328
3329 case ARM::LDRH:
3330 case ARM::STRH: {
3331 if (!MI.getOperand(2).getReg())
3332 return 1;
3333
3334 unsigned ShOpVal = MI.getOperand(3).getImm();
3335 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3336 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3337 if (!isSub &&
3338 (ShImm == 0 ||
3339 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3340 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3341 return 1;
3342 return 2;
3343 }
3344
3345 case ARM::LDRSB:
3346 case ARM::LDRSH:
3347 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
3348
3349 case ARM::LDRSB_POST:
3350 case ARM::LDRSH_POST: {
3351 Register Rt = MI.getOperand(0).getReg();
3352 Register Rm = MI.getOperand(3).getReg();
3353 return (Rt == Rm) ? 4 : 3;
3354 }
3355
3356 case ARM::LDR_PRE_REG:
3357 case ARM::LDRB_PRE_REG: {
3358 Register Rt = MI.getOperand(0).getReg();
3359 Register Rm = MI.getOperand(3).getReg();
3360 if (Rt == Rm)
3361 return 3;
3362 unsigned ShOpVal = MI.getOperand(4).getImm();
3363 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3364 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3365 if (!isSub &&
3366 (ShImm == 0 ||
3367 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3368 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3369 return 2;
3370 return 3;
3371 }
3372
3373 case ARM::STR_PRE_REG:
3374 case ARM::STRB_PRE_REG: {
3375 unsigned ShOpVal = MI.getOperand(4).getImm();
3376 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3377 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3378 if (!isSub &&
3379 (ShImm == 0 ||
3380 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3381 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3382 return 2;
3383 return 3;
3384 }
3385
3386 case ARM::LDRH_PRE:
3387 case ARM::STRH_PRE: {
3388 Register Rt = MI.getOperand(0).getReg();
3389 Register Rm = MI.getOperand(3).getReg();
3390 if (!Rm)
3391 return 2;
3392 if (Rt == Rm)
3393 return 3;
3394 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
3395 }
3396
3397 case ARM::LDR_POST_REG:
3398 case ARM::LDRB_POST_REG:
3399 case ARM::LDRH_POST: {
3400 Register Rt = MI.getOperand(0).getReg();
3401 Register Rm = MI.getOperand(3).getReg();
3402 return (Rt == Rm) ? 3 : 2;
3403 }
3404
3405 case ARM::LDR_PRE_IMM:
3406 case ARM::LDRB_PRE_IMM:
3407 case ARM::LDR_POST_IMM:
3408 case ARM::LDRB_POST_IMM:
3409 case ARM::STRB_POST_IMM:
3410 case ARM::STRB_POST_REG:
3411 case ARM::STRB_PRE_IMM:
3412 case ARM::STRH_POST:
3413 case ARM::STR_POST_IMM:
3414 case ARM::STR_POST_REG:
3415 case ARM::STR_PRE_IMM:
3416 return 2;
3417
3418 case ARM::LDRSB_PRE:
3419 case ARM::LDRSH_PRE: {
3420 Register Rm = MI.getOperand(3).getReg();
3421 if (Rm == 0)
3422 return 3;
3423 Register Rt = MI.getOperand(0).getReg();
3424 if (Rt == Rm)
3425 return 4;
3426 unsigned ShOpVal = MI.getOperand(4).getImm();
3427 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3428 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3429 if (!isSub &&
3430 (ShImm == 0 ||
3431 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3432 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3433 return 3;
3434 return 4;
3435 }
3436
3437 case ARM::LDRD: {
3438 Register Rt = MI.getOperand(0).getReg();
3439 Register Rn = MI.getOperand(2).getReg();
3440 Register Rm = MI.getOperand(3).getReg();
3441 if (Rm)
3442 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3443 : 3;
3444 return (Rt == Rn) ? 3 : 2;
3445 }
3446
3447 case ARM::STRD: {
3448 Register Rm = MI.getOperand(3).getReg();
3449 if (Rm)
3450 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
3451 : 3;
3452 return 2;
3453 }
3454
3455 case ARM::LDRD_POST:
3456 case ARM::t2LDRD_POST:
3457 return 3;
3458
3459 case ARM::STRD_POST:
3460 case ARM::t2STRD_POST:
3461 return 4;
3462
3463 case ARM::LDRD_PRE: {
3464 Register Rt = MI.getOperand(0).getReg();
3465 Register Rn = MI.getOperand(3).getReg();
3466 Register Rm = MI.getOperand(4).getReg();
3467 if (Rm)
3468 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3469 : 4;
3470 return (Rt == Rn) ? 4 : 3;
3471 }
3472
3473 case ARM::t2LDRD_PRE: {
3474 Register Rt = MI.getOperand(0).getReg();
3475 Register Rn = MI.getOperand(3).getReg();
3476 return (Rt == Rn) ? 4 : 3;
3477 }
3478
3479 case ARM::STRD_PRE: {
3480 Register Rm = MI.getOperand(4).getReg();
3481 if (Rm)
3482 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
3483 : 4;
3484 return 3;
3485 }
3486
3487 case ARM::t2STRD_PRE:
3488 return 3;
3489
3490 case ARM::t2LDR_POST:
3491 case ARM::t2LDRB_POST:
3492 case ARM::t2LDRB_PRE:
3493 case ARM::t2LDRSBi12:
3494 case ARM::t2LDRSBi8:
3495 case ARM::t2LDRSBpci:
3496 case ARM::t2LDRSBs:
3497 case ARM::t2LDRH_POST:
3498 case ARM::t2LDRH_PRE:
3499 case ARM::t2LDRSBT:
3500 case ARM::t2LDRSB_POST:
3501 case ARM::t2LDRSB_PRE:
3502 case ARM::t2LDRSH_POST:
3503 case ARM::t2LDRSH_PRE:
3504 case ARM::t2LDRSHi12:
3505 case ARM::t2LDRSHi8:
3506 case ARM::t2LDRSHpci:
3507 case ARM::t2LDRSHs:
3508 return 2;
3509
3510 case ARM::t2LDRDi8: {
3511 Register Rt = MI.getOperand(0).getReg();
3512 Register Rn = MI.getOperand(2).getReg();
3513 return (Rt == Rn) ? 3 : 2;
3514 }
3515
3516 case ARM::t2STRB_POST:
3517 case ARM::t2STRB_PRE:
3518 case ARM::t2STRBs:
3519 case ARM::t2STRDi8:
3520 case ARM::t2STRH_POST:
3521 case ARM::t2STRH_PRE:
3522 case ARM::t2STRHs:
3523 case ARM::t2STR_POST:
3524 case ARM::t2STR_PRE:
3525 case ARM::t2STRs:
3526 return 2;
3527 }
3528}
3529
3530// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3531// can't be easily determined return 0 (missing MachineMemOperand).
3532//
3533// FIXME: The current MachineInstr design does not support relying on machine
3534// mem operands to determine the width of a memory access. Instead, we expect
3535// the target to provide this information based on the instruction opcode and
3536// operands. However, using MachineMemOperand is the best solution now for
3537// two reasons:
3538//
3539// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3540// operands. This is much more dangerous than using the MachineMemOperand
3541// sizes because CodeGen passes can insert/remove optional machine operands. In
3542// fact, it's totally incorrect for preRA passes and appears to be wrong for
3543// postRA passes as well.
3544//
3545// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3546// machine model that calls this should handle the unknown (zero size) case.
3547//
3548// Long term, we should require a target hook that verifies MachineMemOperand
3549// sizes during MC lowering. That target hook should be local to MC lowering
3550// because we can't ensure that it is aware of other MI forms. Doing this will
3551// ensure that MachineMemOperands are correctly propagated through all passes.
3553 unsigned Size = 0;
3554 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3555 E = MI.memoperands_end();
3556 I != E; ++I) {
3557 Size += (*I)->getSize().getValue();
3558 }
3559 // FIXME: The scheduler currently can't handle values larger than 16. But
3560 // the values can actually go up to 32 for floating-point load/store
3561 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3562 // operations isn't right; we could end up with "extra" memory operands for
3563 // various reasons, like tail merge merging two memory operations.
3564 return std::min(Size / 4, 16U);
3565}
3566
3568 unsigned NumRegs) {
3569 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3570 switch (Opc) {
3571 default:
3572 break;
3573 case ARM::VLDMDIA_UPD:
3574 case ARM::VLDMDDB_UPD:
3575 case ARM::VLDMSIA_UPD:
3576 case ARM::VLDMSDB_UPD:
3577 case ARM::VSTMDIA_UPD:
3578 case ARM::VSTMDDB_UPD:
3579 case ARM::VSTMSIA_UPD:
3580 case ARM::VSTMSDB_UPD:
3581 case ARM::LDMIA_UPD:
3582 case ARM::LDMDA_UPD:
3583 case ARM::LDMDB_UPD:
3584 case ARM::LDMIB_UPD:
3585 case ARM::STMIA_UPD:
3586 case ARM::STMDA_UPD:
3587 case ARM::STMDB_UPD:
3588 case ARM::STMIB_UPD:
3589 case ARM::tLDMIA_UPD:
3590 case ARM::tSTMIA_UPD:
3591 case ARM::t2LDMIA_UPD:
3592 case ARM::t2LDMDB_UPD:
3593 case ARM::t2STMIA_UPD:
3594 case ARM::t2STMDB_UPD:
3595 ++UOps; // One for base register writeback.
3596 break;
3597 case ARM::LDMIA_RET:
3598 case ARM::tPOP_RET:
3599 case ARM::t2LDMIA_RET:
3600 UOps += 2; // One for base reg wb, one for write to pc.
3601 break;
3602 }
3603 return UOps;
3604}
3605
3607 const MachineInstr &MI) const {
3608 if (!ItinData || ItinData->isEmpty())
3609 return 1;
3610
3611 const MCInstrDesc &Desc = MI.getDesc();
3612 unsigned Class = Desc.getSchedClass();
3613 int ItinUOps = ItinData->getNumMicroOps(Class);
3614 if (ItinUOps >= 0) {
3615 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3616 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3617
3618 return ItinUOps;
3619 }
3620
3621 unsigned Opc = MI.getOpcode();
3622 switch (Opc) {
3623 default:
3624 llvm_unreachable("Unexpected multi-uops instruction!");
3625 case ARM::VLDMQIA:
3626 case ARM::VSTMQIA:
3627 return 2;
3628
3629 // The number of uOps for load / store multiple are determined by the number
3630 // registers.
3631 //
3632 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3633 // same cycle. The scheduling for the first load / store must be done
3634 // separately by assuming the address is not 64-bit aligned.
3635 //
3636 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3637 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3638 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3639 case ARM::VLDMDIA:
3640 case ARM::VLDMDIA_UPD:
3641 case ARM::VLDMDDB_UPD:
3642 case ARM::VLDMSIA:
3643 case ARM::VLDMSIA_UPD:
3644 case ARM::VLDMSDB_UPD:
3645 case ARM::VSTMDIA:
3646 case ARM::VSTMDIA_UPD:
3647 case ARM::VSTMDDB_UPD:
3648 case ARM::VSTMSIA:
3649 case ARM::VSTMSIA_UPD:
3650 case ARM::VSTMSDB_UPD: {
3651 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3652 return (NumRegs / 2) + (NumRegs % 2) + 1;
3653 }
3654
3655 case ARM::LDMIA_RET:
3656 case ARM::LDMIA:
3657 case ARM::LDMDA:
3658 case ARM::LDMDB:
3659 case ARM::LDMIB:
3660 case ARM::LDMIA_UPD:
3661 case ARM::LDMDA_UPD:
3662 case ARM::LDMDB_UPD:
3663 case ARM::LDMIB_UPD:
3664 case ARM::STMIA:
3665 case ARM::STMDA:
3666 case ARM::STMDB:
3667 case ARM::STMIB:
3668 case ARM::STMIA_UPD:
3669 case ARM::STMDA_UPD:
3670 case ARM::STMDB_UPD:
3671 case ARM::STMIB_UPD:
3672 case ARM::tLDMIA:
3673 case ARM::tLDMIA_UPD:
3674 case ARM::tSTMIA_UPD:
3675 case ARM::tPOP_RET:
3676 case ARM::tPOP:
3677 case ARM::tPUSH:
3678 case ARM::t2LDMIA_RET:
3679 case ARM::t2LDMIA:
3680 case ARM::t2LDMDB:
3681 case ARM::t2LDMIA_UPD:
3682 case ARM::t2LDMDB_UPD:
3683 case ARM::t2STMIA:
3684 case ARM::t2STMDB:
3685 case ARM::t2STMIA_UPD:
3686 case ARM::t2STMDB_UPD: {
3687 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3688 switch (Subtarget.getLdStMultipleTiming()) {
3692 // Assume the worst.
3693 return NumRegs;
3695 if (NumRegs < 4)
3696 return 2;
3697 // 4 registers would be issued: 2, 2.
3698 // 5 registers would be issued: 2, 2, 1.
3699 unsigned UOps = (NumRegs / 2);
3700 if (NumRegs % 2)
3701 ++UOps;
3702 return UOps;
3703 }
3705 unsigned UOps = (NumRegs / 2);
3706 // If there are odd number of registers or if it's not 64-bit aligned,
3707 // then it takes an extra AGU (Address Generation Unit) cycle.
3708 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3709 (*MI.memoperands_begin())->getAlign() < Align(8))
3710 ++UOps;
3711 return UOps;
3712 }
3713 }
3714 }
3715 }
3716 llvm_unreachable("Didn't find the number of microops");
3717}
3718
3719std::optional<unsigned>
3720ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3721 const MCInstrDesc &DefMCID, unsigned DefClass,
3722 unsigned DefIdx, unsigned DefAlign) const {
3723 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3724 if (RegNo <= 0)
3725 // Def is the address writeback.
3726 return ItinData->getOperandCycle(DefClass, DefIdx);
3727
3728 unsigned DefCycle;
3729 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3730 // (regno / 2) + (regno % 2) + 1
3731 DefCycle = RegNo / 2 + 1;
3732 if (RegNo % 2)
3733 ++DefCycle;
3734 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3735 DefCycle = RegNo;
3736 bool isSLoad = false;
3737
3738 switch (DefMCID.getOpcode()) {
3739 default: break;
3740 case ARM::VLDMSIA:
3741 case ARM::VLDMSIA_UPD:
3742 case ARM::VLDMSDB_UPD:
3743 isSLoad = true;
3744 break;
3745 }
3746
3747 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3748 // then it takes an extra cycle.
3749 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3750 ++DefCycle;
3751 } else {
3752 // Assume the worst.
3753 DefCycle = RegNo + 2;
3754 }
3755
3756 return DefCycle;
3757}
3758
3759std::optional<unsigned>
3760ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3761 const MCInstrDesc &DefMCID, unsigned DefClass,
3762 unsigned DefIdx, unsigned DefAlign) const {
3763 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3764 if (RegNo <= 0)
3765 // Def is the address writeback.
3766 return ItinData->getOperandCycle(DefClass, DefIdx);
3767
3768 unsigned DefCycle;
3769 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3770 // 4 registers would be issued: 1, 2, 1.
3771 // 5 registers would be issued: 1, 2, 2.
3772 DefCycle = RegNo / 2;
3773 if (DefCycle < 1)
3774 DefCycle = 1;
3775 // Result latency is issue cycle + 2: E2.
3776 DefCycle += 2;
3777 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3778 DefCycle = (RegNo / 2);
3779 // If there are odd number of registers or if it's not 64-bit aligned,
3780 // then it takes an extra AGU (Address Generation Unit) cycle.
3781 if ((RegNo % 2) || DefAlign < 8)
3782 ++DefCycle;
3783 // Result latency is AGU cycles + 2.
3784 DefCycle += 2;
3785 } else {
3786 // Assume the worst.
3787 DefCycle = RegNo + 2;
3788 }
3789
3790 return DefCycle;
3791}
3792
3793std::optional<unsigned>
3794ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3795 const MCInstrDesc &UseMCID, unsigned UseClass,
3796 unsigned UseIdx, unsigned UseAlign) const {
3797 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3798 if (RegNo <= 0)
3799 return ItinData->getOperandCycle(UseClass, UseIdx);
3800
3801 unsigned UseCycle;
3802 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3803 // (regno / 2) + (regno % 2) + 1
3804 UseCycle = RegNo / 2 + 1;
3805 if (RegNo % 2)
3806 ++UseCycle;
3807 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3808 UseCycle = RegNo;
3809 bool isSStore = false;
3810
3811 switch (UseMCID.getOpcode()) {
3812 default: break;
3813 case ARM::VSTMSIA:
3814 case ARM::VSTMSIA_UPD:
3815 case ARM::VSTMSDB_UPD:
3816 isSStore = true;
3817 break;
3818 }
3819
3820 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3821 // then it takes an extra cycle.
3822 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3823 ++UseCycle;
3824 } else {
3825 // Assume the worst.
3826 UseCycle = RegNo + 2;
3827 }
3828
3829 return UseCycle;
3830}
3831
3832std::optional<unsigned>
3833ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3834 const MCInstrDesc &UseMCID, unsigned UseClass,
3835 unsigned UseIdx, unsigned UseAlign) const {
3836 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3837 if (RegNo <= 0)
3838 return ItinData->getOperandCycle(UseClass, UseIdx);
3839
3840 unsigned UseCycle;
3841 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3842 UseCycle = RegNo / 2;
3843 if (UseCycle < 2)
3844 UseCycle = 2;
3845 // Read in E3.
3846 UseCycle += 2;
3847 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3848 UseCycle = (RegNo / 2);
3849 // If there are odd number of registers or if it's not 64-bit aligned,
3850 // then it takes an extra AGU (Address Generation Unit) cycle.
3851 if ((RegNo % 2) || UseAlign < 8)
3852 ++UseCycle;
3853 } else {
3854 // Assume the worst.
3855 UseCycle = 1;
3856 }
3857 return UseCycle;
3858}
3859
3860std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
3861 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
3862 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
3863 unsigned UseIdx, unsigned UseAlign) const {
3864 unsigned DefClass = DefMCID.getSchedClass();
3865 unsigned UseClass = UseMCID.getSchedClass();
3866
3867 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3868 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3869
3870 // This may be a def / use of a variable_ops instruction, the operand
3871 // latency might be determinable dynamically. Let the target try to
3872 // figure it out.
3873 std::optional<unsigned> DefCycle;
3874 bool LdmBypass = false;
3875 switch (DefMCID.getOpcode()) {
3876 default:
3877 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3878 break;
3879
3880 case ARM::VLDMDIA:
3881 case ARM::VLDMDIA_UPD:
3882 case ARM::VLDMDDB_UPD:
3883 case ARM::VLDMSIA:
3884 case ARM::VLDMSIA_UPD:
3885 case ARM::VLDMSDB_UPD:
3886 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3887 break;
3888
3889 case ARM::LDMIA_RET:
3890 case ARM::LDMIA:
3891 case ARM::LDMDA:
3892 case ARM::LDMDB:
3893 case ARM::LDMIB:
3894 case ARM::LDMIA_UPD:
3895 case ARM::LDMDA_UPD:
3896 case ARM::LDMDB_UPD:
3897 case ARM::LDMIB_UPD:
3898 case ARM::tLDMIA:
3899 case ARM::tLDMIA_UPD:
3900 case ARM::tPUSH:
3901 case ARM::t2LDMIA_RET:
3902 case ARM::t2LDMIA:
3903 case ARM::t2LDMDB:
3904 case ARM::t2LDMIA_UPD:
3905 case ARM::t2LDMDB_UPD:
3906 LdmBypass = true;
3907 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3908 break;
3909 }
3910
3911 if (!DefCycle)
3912 // We can't seem to determine the result latency of the def, assume it's 2.
3913 DefCycle = 2;
3914
3915 std::optional<unsigned> UseCycle;
3916 switch (UseMCID.getOpcode()) {
3917 default:
3918 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3919 break;
3920
3921 case ARM::VSTMDIA:
3922 case ARM::VSTMDIA_UPD:
3923 case ARM::VSTMDDB_UPD:
3924 case ARM::VSTMSIA:
3925 case ARM::VSTMSIA_UPD:
3926 case ARM::VSTMSDB_UPD:
3927 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3928 break;
3929
3930 case ARM::STMIA:
3931 case ARM::STMDA:
3932 case ARM::STMDB:
3933 case ARM::STMIB:
3934 case ARM::STMIA_UPD:
3935 case ARM::STMDA_UPD:
3936 case ARM::STMDB_UPD:
3937 case ARM::STMIB_UPD:
3938 case ARM::tSTMIA_UPD:
3939 case ARM::tPOP_RET:
3940 case ARM::tPOP:
3941 case ARM::t2STMIA:
3942 case ARM::t2STMDB:
3943 case ARM::t2STMIA_UPD:
3944 case ARM::t2STMDB_UPD:
3945 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3946 break;
3947 }
3948
3949 if (!UseCycle)
3950 // Assume it's read in the first stage.
3951 UseCycle = 1;
3952
3953 if (UseCycle > *DefCycle + 1)
3954 return std::nullopt;
3955
3956 UseCycle = *DefCycle - *UseCycle + 1;
3957 if (UseCycle > 0u) {
3958 if (LdmBypass) {
3959 // It's a variable_ops instruction so we can't use DefIdx here. Just use
3960 // first def operand.
3961 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3962 UseClass, UseIdx))
3963 UseCycle = *UseCycle - 1;
3964 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3965 UseClass, UseIdx)) {
3966 UseCycle = *UseCycle - 1;
3967 }
3968 }
3969
3970 return UseCycle;
3971}
3972
3974 const MachineInstr *MI, unsigned Reg,
3975 unsigned &DefIdx, unsigned &Dist) {
3976 Dist = 0;
3977
3979 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
3980 assert(II->isInsideBundle() && "Empty bundle?");
3981
3982 int Idx = -1;
3983 while (II->isInsideBundle()) {
3984 Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);
3985 if (Idx != -1)
3986 break;
3987 --II;
3988 ++Dist;
3989 }
3990
3991 assert(Idx != -1 && "Cannot find bundled definition!");
3992 DefIdx = Idx;
3993 return &*II;
3994}
3995
3997 const MachineInstr &MI, unsigned Reg,
3998 unsigned &UseIdx, unsigned &Dist) {
3999 Dist = 0;
4000
4002 assert(II->isInsideBundle() && "Empty bundle?");
4003 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4004
4005 // FIXME: This doesn't properly handle multiple uses.
4006 int Idx = -1;
4007 while (II != E && II->isInsideBundle()) {
4008 Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);
4009 if (Idx != -1)
4010 break;
4011 if (II->getOpcode() != ARM::t2IT)
4012 ++Dist;
4013 ++II;
4014 }
4015
4016 if (Idx == -1) {
4017 Dist = 0;
4018 return nullptr;
4019 }
4020
4021 UseIdx = Idx;
4022 return &*II;
4023}
4024
4025/// Return the number of cycles to add to (or subtract from) the static
4026/// itinerary based on the def opcode and alignment. The caller will ensure that
4027/// adjusted latency is at least one cycle.
4028static int adjustDefLatency(const ARMSubtarget &Subtarget,
4029 const MachineInstr &DefMI,
4030 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4031 int Adjust = 0;
4032 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4033 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4034 // variants are one cycle cheaper.
4035 switch (DefMCID.getOpcode()) {
4036 default: break;
4037 case ARM::LDRrs:
4038 case ARM::LDRBrs: {
4039 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4040 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4041 if (ShImm == 0 ||
4042 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4043 --Adjust;
4044 break;
4045 }
4046 case ARM::t2LDRs:
4047 case ARM::t2LDRBs:
4048 case ARM::t2LDRHs:
4049 case ARM::t2LDRSHs: {
4050 // Thumb2 mode: lsl only.
4051 unsigned ShAmt = DefMI.getOperand(3).getImm();
4052 if (ShAmt == 0 || ShAmt == 2)
4053 --Adjust;
4054 break;
4055 }
4056 }
4057 } else if (Subtarget.isSwift()) {
4058 // FIXME: Properly handle all of the latency adjustments for address
4059 // writeback.
4060 switch (DefMCID.getOpcode()) {
4061 default: break;
4062 case ARM::LDRrs:
4063 case ARM::LDRBrs: {
4064 unsigned ShOpVal = DefMI.getOperand(3).getImm();
4065 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
4066 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4067 if (!isSub &&
4068 (ShImm == 0 ||
4069 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4070 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
4071 Adjust -= 2;
4072 else if (!isSub &&
4073 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4074 --Adjust;
4075 break;
4076 }
4077 case ARM::t2LDRs:
4078 case ARM::t2LDRBs:
4079 case ARM::t2LDRHs:
4080 case ARM::t2LDRSHs: {
4081 // Thumb2 mode: lsl only.
4082 unsigned ShAmt = DefMI.getOperand(3).getImm();
4083 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4084 Adjust -= 2;
4085 break;
4086 }
4087 }
4088 }
4089
4090 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4091 switch (DefMCID.getOpcode()) {
4092 default: break;
4093 case ARM::VLD1q8:
4094 case ARM::VLD1q16:
4095 case ARM::VLD1q32:
4096 case ARM::VLD1q64:
4097 case ARM::VLD1q8wb_fixed:
4098 case ARM::VLD1q16wb_fixed:
4099 case ARM::VLD1q32wb_fixed:
4100 case ARM::VLD1q64wb_fixed:
4101 case ARM::VLD1q8wb_register:
4102 case ARM::VLD1q16wb_register:
4103 case ARM::VLD1q32wb_register:
4104 case ARM::VLD1q64wb_register:
4105 case ARM::VLD2d8:
4106 case ARM::VLD2d16:
4107 case ARM::VLD2d32:
4108 case ARM::VLD2q8:
4109 case ARM::VLD2q16:
4110 case ARM::VLD2q32:
4111 case ARM::VLD2d8wb_fixed:
4112 case ARM::VLD2d16wb_fixed:
4113 case ARM::VLD2d32wb_fixed:
4114 case ARM::VLD2q8wb_fixed:
4115 case ARM::VLD2q16wb_fixed:
4116 case ARM::VLD2q32wb_fixed:
4117 case ARM::VLD2d8wb_register:
4118 case ARM::VLD2d16wb_register:
4119 case ARM::VLD2d32wb_register:
4120 case ARM::VLD2q8wb_register:
4121 case ARM::VLD2q16wb_register:
4122 case ARM::VLD2q32wb_register:
4123 case ARM::VLD3d8:
4124 case ARM::VLD3d16:
4125 case ARM::VLD3d32:
4126 case ARM::VLD1d64T:
4127 case ARM::VLD3d8_UPD:
4128 case ARM::VLD3d16_UPD:
4129 case ARM::VLD3d32_UPD:
4130 case ARM::VLD1d64Twb_fixed:
4131 case ARM::VLD1d64Twb_register:
4132 case ARM::VLD3q8_UPD:
4133 case ARM::VLD3q16_UPD:
4134 case ARM::VLD3q32_UPD:
4135 case ARM::VLD4d8:
4136 case ARM::VLD4d16:
4137 case ARM::VLD4d32:
4138 case ARM::VLD1d64Q:
4139 case ARM::VLD4d8_UPD:
4140 case ARM::VLD4d16_UPD:
4141 case ARM::VLD4d32_UPD:
4142 case ARM::VLD1d64Qwb_fixed:
4143 case ARM::VLD1d64Qwb_register:
4144 case ARM::VLD4q8_UPD:
4145 case ARM::VLD4q16_UPD:
4146 case ARM::VLD4q32_UPD:
4147 case ARM::VLD1DUPq8:
4148 case ARM::VLD1DUPq16:
4149 case ARM::VLD1DUPq32:
4150 case ARM::VLD1DUPq8wb_fixed:
4151 case ARM::VLD1DUPq16wb_fixed:
4152 case ARM::VLD1DUPq32wb_fixed:
4153 case ARM::VLD1DUPq8wb_register:
4154 case ARM::VLD1DUPq16wb_register:
4155 case ARM::VLD1DUPq32wb_register:
4156 case ARM::VLD2DUPd8:
4157 case ARM::VLD2DUPd16:
4158 case ARM::VLD2DUPd32:
4159 case ARM::VLD2DUPd8wb_fixed:
4160 case ARM::VLD2DUPd16wb_fixed:
4161 case ARM::VLD2DUPd32wb_fixed:
4162 case ARM::VLD2DUPd8wb_register:
4163 case ARM::VLD2DUPd16wb_register:
4164 case ARM::VLD2DUPd32wb_register:
4165 case ARM::VLD4DUPd8:
4166 case ARM::VLD4DUPd16:
4167 case ARM::VLD4DUPd32:
4168 case ARM::VLD4DUPd8_UPD:
4169 case ARM::VLD4DUPd16_UPD:
4170 case ARM::VLD4DUPd32_UPD:
4171 case ARM::VLD1LNd8:
4172 case ARM::VLD1LNd16:
4173 case ARM::VLD1LNd32:
4174 case ARM::VLD1LNd8_UPD:
4175 case ARM::VLD1LNd16_UPD:
4176 case ARM::VLD1LNd32_UPD:
4177 case ARM::VLD2LNd8:
4178 case ARM::VLD2LNd16:
4179 case ARM::VLD2LNd32:
4180 case ARM::VLD2LNq16:
4181 case ARM::VLD2LNq32:
4182 case ARM::VLD2LNd8_UPD:
4183 case ARM::VLD2LNd16_UPD:
4184 case ARM::VLD2LNd32_UPD:
4185 case ARM::VLD2LNq16_UPD:
4186 case ARM::VLD2LNq32_UPD:
4187 case ARM::VLD4LNd8:
4188 case ARM::VLD4LNd16:
4189 case ARM::VLD4LNd32:
4190 case ARM::VLD4LNq16:
4191 case ARM::VLD4LNq32:
4192 case ARM::VLD4LNd8_UPD:
4193 case ARM::VLD4LNd16_UPD:
4194 case ARM::VLD4LNd32_UPD:
4195 case ARM::VLD4LNq16_UPD:
4196 case ARM::VLD4LNq32_UPD:
4197 // If the address is not 64-bit aligned, the latencies of these
4198 // instructions increases by one.
4199 ++Adjust;
4200 break;
4201 }
4202 }
4203 return Adjust;
4204}
4205
4207 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4208 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4209 // No operand latency. The caller may fall back to getInstrLatency.
4210 if (!ItinData || ItinData->isEmpty())
4211 return std::nullopt;
4212
4213 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
4214 Register Reg = DefMO.getReg();
4215
4216 const MachineInstr *ResolvedDefMI = &DefMI;
4217 unsigned DefAdj = 0;
4218 if (DefMI.isBundle())
4219 ResolvedDefMI =
4220 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
4221 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4222 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4223 return 1;
4224 }
4225
4226 const MachineInstr *ResolvedUseMI = &UseMI;
4227 unsigned UseAdj = 0;
4228 if (UseMI.isBundle()) {
4229 ResolvedUseMI =
4230 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
4231 if (!ResolvedUseMI)
4232 return std::nullopt;
4233 }
4234
4235 return getOperandLatencyImpl(
4236 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
4237 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
4238}
4239
4240std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4241 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4242 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4243 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4244 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4245 if (Reg == ARM::CPSR) {
4246 if (DefMI.getOpcode() == ARM::FMSTAT) {
4247 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4248 return Subtarget.isLikeA9() ? 1 : 20;
4249 }
4250
4251 // CPSR set and branch can be paired in the same cycle.
4252 if (UseMI.isBranch())
4253 return 0;
4254
4255 // Otherwise it takes the instruction latency (generally one).
4256 unsigned Latency = getInstrLatency(ItinData, DefMI);
4257
4258 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4259 // its uses. Instructions which are otherwise scheduled between them may
4260 // incur a code size penalty (not able to use the CPSR setting 16-bit
4261 // instructions).
4262 if (Latency > 0 && Subtarget.isThumb2()) {
4263 const MachineFunction *MF = DefMI.getParent()->getParent();
4264 if (MF->getFunction().hasOptSize())
4265 --Latency;
4266 }
4267 return Latency;
4268 }
4269
4270 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
4271 return std::nullopt;
4272
4273 unsigned DefAlign = DefMI.hasOneMemOperand()
4274 ? (*DefMI.memoperands_begin())->getAlign().value()
4275 : 0;
4276 unsigned UseAlign = UseMI.hasOneMemOperand()
4277 ? (*UseMI.memoperands_begin())->getAlign().value()
4278 : 0;
4279
4280 // Get the itinerary's latency if possible, and handle variable_ops.
4281 std::optional<unsigned> Latency = getOperandLatency(
4282 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4283 // Unable to find operand latency. The caller may resort to getInstrLatency.
4284 if (!Latency)
4285 return std::nullopt;
4286
4287 // Adjust for IT block position.
4288 int Adj = DefAdj + UseAdj;
4289
4290 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4291 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4292 if (Adj >= 0 || (int)*Latency > -Adj) {
4293 return *Latency + Adj;
4294 }
4295 // Return the itinerary latency, which may be zero but not less than zero.
4296 return Latency;
4297}
4298
4299std::optional<unsigned>
4301 SDNode *DefNode, unsigned DefIdx,
4302 SDNode *UseNode, unsigned UseIdx) const {
4303 if (!DefNode->isMachineOpcode())
4304 return 1;
4305
4306 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
4307
4308 if (isZeroCost(DefMCID.Opcode))
4309 return 0;
4310
4311 if (!ItinData || ItinData->isEmpty())
4312 return DefMCID.mayLoad() ? 3 : 1;
4313
4314 if (!UseNode->isMachineOpcode()) {
4315 std::optional<unsigned> Latency =
4316 ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
4317 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4318 int Threshold = 1 + Adj;
4319 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4320 }
4321
4322 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
4323 auto *DefMN = cast<MachineSDNode>(DefNode);
4324 unsigned DefAlign = !DefMN->memoperands_empty()
4325 ? (*DefMN->memoperands_begin())->getAlign().value()
4326 : 0;
4327 auto *UseMN = cast<MachineSDNode>(UseNode);
4328 unsigned UseAlign = !UseMN->memoperands_empty()
4329 ? (*UseMN->memoperands_begin())->getAlign().value()
4330 : 0;
4331 std::optional<unsigned> Latency = getOperandLatency(
4332 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4333 if (!Latency)
4334 return std::nullopt;
4335
4336 if (Latency > 1U &&
4337 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4338 Subtarget.isCortexA7())) {
4339 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4340 // variants are one cycle cheaper.
4341 switch (DefMCID.getOpcode()) {
4342 default: break;
4343 case ARM::LDRrs:
4344 case ARM::LDRBrs: {
4345 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4346 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4347 if (ShImm == 0 ||
4348 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
4349 Latency = *Latency - 1;
4350 break;
4351 }
4352 case ARM::t2LDRs:
4353 case ARM::t2LDRBs:
4354 case ARM::t2LDRHs:
4355 case ARM::t2LDRSHs: {
4356 // Thumb2 mode: lsl only.
4357 unsigned ShAmt = DefNode->getConstantOperandVal(2);
4358 if (ShAmt == 0 || ShAmt == 2)
4359 Latency = *Latency - 1;
4360 break;
4361 }
4362 }
4363 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4364 // FIXME: Properly handle all of the latency adjustments for address
4365 // writeback.
4366 switch (DefMCID.getOpcode()) {
4367 default: break;
4368 case ARM::LDRrs:
4369 case ARM::LDRBrs: {
4370 unsigned ShOpVal = DefNode->getConstantOperandVal(2);
4371 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
4372 if (ShImm == 0 ||
4373 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4375 Latency = *Latency - 2;
4376 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
4377 Latency = *Latency - 1;
4378 break;
4379 }
4380 case ARM::t2LDRs:
4381 case ARM::t2LDRBs:
4382 case ARM::t2LDRHs:
4383 case ARM::t2LDRSHs:
4384 // Thumb2 mode: lsl 0-3 only.
4385 Latency = *Latency - 2;
4386 break;
4387 }
4388 }
4389
4390 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4391 switch (DefMCID.getOpcode()) {
4392 default: break;
4393 case ARM::VLD1q8:
4394 case ARM::VLD1q16:
4395 case ARM::VLD1q32:
4396 case ARM::VLD1q64:
4397 case ARM::VLD1q8wb_register:
4398 case ARM::VLD1q16wb_register:
4399 case ARM::VLD1q32wb_register:
4400 case ARM::VLD1q64wb_register:
4401 case ARM::VLD1q8wb_fixed:
4402 case ARM::VLD1q16wb_fixed:
4403 case ARM::VLD1q32wb_fixed:
4404 case ARM::VLD1q64wb_fixed:
4405 case ARM::VLD2d8:
4406 case ARM::VLD2d16:
4407 case ARM::VLD2d32:
4408 case ARM::VLD2q8Pseudo:
4409 case ARM::VLD2q16Pseudo:
4410 case ARM::VLD2q32Pseudo:
4411 case ARM::VLD2d8wb_fixed:
4412 case ARM::VLD2d16wb_fixed:
4413 case ARM::VLD2d32wb_fixed:
4414 case ARM::VLD2q8PseudoWB_fixed:
4415 case ARM::VLD2q16PseudoWB_fixed:
4416 case ARM::VLD2q32PseudoWB_fixed:
4417 case ARM::VLD2d8wb_register:
4418 case ARM::VLD2d16wb_register:
4419 case ARM::VLD2d32wb_register:
4420 case ARM::VLD2q8PseudoWB_register:
4421 case ARM::VLD2q16PseudoWB_register:
4422 case ARM::VLD2q32PseudoWB_register:
4423 case ARM::VLD3d8Pseudo:
4424 case ARM::VLD3d16Pseudo:
4425 case ARM::VLD3d32Pseudo:
4426 case ARM::VLD1d8TPseudo:
4427 case ARM::VLD1d16TPseudo:
4428 case ARM::VLD1d32TPseudo:
4429 case ARM::VLD1d64TPseudo:
4430 case ARM::VLD1d64TPseudoWB_fixed:
4431 case ARM::VLD1d64TPseudoWB_register:
4432 case ARM::VLD3d8Pseudo_UPD:
4433 case ARM::VLD3d16Pseudo_UPD:
4434 case ARM::VLD3d32Pseudo_UPD:
4435 case ARM::VLD3q8Pseudo_UPD:
4436 case ARM::VLD3q16Pseudo_UPD:
4437 case ARM::VLD3q32Pseudo_UPD:
4438 case ARM::VLD3q8oddPseudo:
4439 case ARM::VLD3q16oddPseudo:
4440 case ARM::VLD3q32oddPseudo:
4441 case ARM::VLD3q8oddPseudo_UPD:
4442 case ARM::VLD3q16oddPseudo_UPD:
4443 case ARM::VLD3q32oddPseudo_UPD:
4444 case ARM::VLD4d8Pseudo:
4445 case ARM::VLD4d16Pseudo:
4446 case ARM::VLD4d32Pseudo:
4447 case ARM::VLD1d8QPseudo:
4448 case ARM::VLD1d16QPseudo:
4449 case ARM::VLD1d32QPseudo:
4450 case ARM::VLD1d64QPseudo:
4451 case ARM::VLD1d64QPseudoWB_fixed:
4452 case ARM::VLD1d64QPseudoWB_register:
4453 case ARM::VLD1q8HighQPseudo:
4454 case ARM::VLD1q8LowQPseudo_UPD:
4455 case ARM::VLD1q8HighTPseudo:
4456 case ARM::VLD1q8LowTPseudo_UPD:
4457 case ARM::VLD1q16HighQPseudo:
4458 case ARM::VLD1q16LowQPseudo_UPD:
4459 case ARM::VLD1q16HighTPseudo:
4460 case ARM::VLD1q16LowTPseudo_UPD:
4461 case ARM::VLD1q32HighQPseudo:
4462 case ARM::VLD1q32LowQPseudo_UPD:
4463 case ARM::VLD1q32HighTPseudo:
4464 case ARM::VLD1q32LowTPseudo_UPD:
4465 case ARM::VLD1q64HighQPseudo:
4466 case ARM::VLD1q64LowQPseudo_UPD:
4467 case ARM::VLD1q64HighTPseudo:
4468 case ARM::VLD1q64LowTPseudo_UPD:
4469 case ARM::VLD4d8Pseudo_UPD:
4470 case ARM::VLD4d16Pseudo_UPD:
4471 case ARM::VLD4d32Pseudo_UPD:
4472 case ARM::VLD4q8Pseudo_UPD:
4473 case ARM::VLD4q16Pseudo_UPD:
4474 case ARM::VLD4q32Pseudo_UPD:
4475 case ARM::VLD4q8oddPseudo:
4476 case ARM::VLD4q16oddPseudo:
4477 case ARM::VLD4q32oddPseudo:
4478 case ARM::VLD4q8oddPseudo_UPD:
4479 case ARM::VLD4q16oddPseudo_UPD:
4480 case ARM::VLD4q32oddPseudo_UPD:
4481 case ARM::VLD1DUPq8:
4482 case ARM::VLD1DUPq16:
4483 case ARM::VLD1DUPq32:
4484 case ARM::VLD1DUPq8wb_fixed:
4485 case ARM::VLD1DUPq16wb_fixed:
4486 case ARM::VLD1DUPq32wb_fixed:
4487 case ARM::VLD1DUPq8wb_register:
4488 case ARM::VLD1DUPq16wb_register:
4489 case ARM::VLD1DUPq32wb_register:
4490 case ARM::VLD2DUPd8:
4491 case ARM::VLD2DUPd16:
4492 case ARM::VLD2DUPd32:
4493 case ARM::VLD2DUPd8wb_fixed:
4494 case ARM::VLD2DUPd16wb_fixed:
4495 case ARM::VLD2DUPd32wb_fixed:
4496 case ARM::VLD2DUPd8wb_register:
4497 case ARM::VLD2DUPd16wb_register:
4498 case ARM::VLD2DUPd32wb_register:
4499 case ARM::VLD2DUPq8EvenPseudo:
4500 case ARM::VLD2DUPq8OddPseudo:
4501 case ARM::VLD2DUPq16EvenPseudo:
4502 case ARM::VLD2DUPq16OddPseudo:
4503 case ARM::VLD2DUPq32EvenPseudo:
4504 case ARM::VLD2DUPq32OddPseudo:
4505 case ARM::VLD3DUPq8EvenPseudo:
4506 case ARM::VLD3DUPq8OddPseudo:
4507 case ARM::VLD3DUPq16EvenPseudo:
4508 case ARM::VLD3DUPq16OddPseudo:
4509 case ARM::VLD3DUPq32EvenPseudo:
4510 case ARM::VLD3DUPq32OddPseudo:
4511 case ARM::VLD4DUPd8Pseudo:
4512 case ARM::VLD4DUPd16Pseudo:
4513 case ARM::VLD4DUPd32Pseudo:
4514 case ARM::VLD4DUPd8Pseudo_UPD:
4515 case ARM::VLD4DUPd16Pseudo_UPD:
4516 case ARM::VLD4DUPd32Pseudo_UPD:
4517 case ARM::VLD4DUPq8EvenPseudo:
4518 case ARM::VLD4DUPq8OddPseudo:
4519 case ARM::VLD4DUPq16EvenPseudo:
4520 case ARM::VLD4DUPq16OddPseudo:
4521 case ARM::VLD4DUPq32EvenPseudo:
4522 case ARM::VLD4DUPq32OddPseudo:
4523 case ARM::VLD1LNq8Pseudo:
4524 case ARM::VLD1LNq16Pseudo:
4525 case ARM::VLD1LNq32Pseudo:
4526 case ARM::VLD1LNq8Pseudo_UPD:
4527 case ARM::VLD1LNq16Pseudo_UPD:
4528 case ARM::VLD1LNq32Pseudo_UPD:
4529 case ARM::VLD2LNd8Pseudo:
4530 case ARM::VLD2LNd16Pseudo:
4531 case ARM::VLD2LNd32Pseudo:
4532 case ARM::VLD2LNq16Pseudo:
4533 case ARM::VLD2LNq32Pseudo:
4534 case ARM::VLD2LNd8Pseudo_UPD:
4535 case ARM::VLD2LNd16Pseudo_UPD:
4536 case ARM::VLD2LNd32Pseudo_UPD:
4537 case ARM::VLD2LNq16Pseudo_UPD:
4538 case ARM::VLD2LNq32Pseudo_UPD:
4539 case ARM::VLD4LNd8Pseudo:
4540 case ARM::VLD4LNd16Pseudo:
4541 case ARM::VLD4LNd32Pseudo:
4542 case ARM::VLD4LNq16Pseudo:
4543 case ARM::VLD4LNq32Pseudo:
4544 case ARM::VLD4LNd8Pseudo_UPD:
4545 case ARM::VLD4LNd16Pseudo_UPD:
4546 case ARM::VLD4LNd32Pseudo_UPD:
4547 case ARM::VLD4LNq16Pseudo_UPD:
4548 case ARM::VLD4LNq32Pseudo_UPD:
4549 // If the address is not 64-bit aligned, the latencies of these
4550 // instructions increases by one.
4551 Latency = *Latency + 1;
4552 break;
4553 }
4554
4555 return Latency;
4556}
4557
4558unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4559 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4560 MI.isImplicitDef())
4561 return 0;
4562
4563 if (MI.isBundle())
4564 return 0;
4565
4566 const MCInstrDesc &MCID = MI.getDesc();
4567
4568 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4569 !Subtarget.cheapPredicableCPSRDef())) {
4570 // When predicated, CPSR is an additional source operand for CPSR updating
4571 // instructions, this apparently increases their latencies.
4572 return 1;
4573 }
4574 return 0;
4575}
4576
4577unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4578 const MachineInstr &MI,
4579 unsigned *PredCost) const {
4580 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4581 MI.isImplicitDef())
4582 return 1;
4583
4584 // An instruction scheduler typically runs on unbundled instructions, however
4585 // other passes may query the latency of a bundled instruction.
4586 if (MI.isBundle()) {
4587 unsigned Latency = 0;
4589 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4590 while (++I != E && I->isInsideBundle()) {
4591 if (I->getOpcode() != ARM::t2IT)
4592 Latency += getInstrLatency(ItinData, *I, PredCost);
4593 }
4594 return Latency;
4595 }
4596
4597 const MCInstrDesc &MCID = MI.getDesc();
4598 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
4599 !Subtarget.cheapPredicableCPSRDef()))) {
4600 // When predicated, CPSR is an additional source operand for CPSR updating
4601 // instructions, this apparently increases their latencies.
4602 *PredCost = 1;
4603 }
4604 // Be sure to call getStageLatency for an empty itinerary in case it has a
4605 // valid MinLatency property.
4606 if (!ItinData)
4607 return MI.mayLoad() ? 3 : 1;
4608
4609 unsigned Class = MCID.getSchedClass();
4610
4611 // For instructions with variable uops, use uops as latency.
4612 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
4613 return getNumMicroOps(ItinData, MI);
4614
4615 // For the common case, fall back on the itinerary's latency.
4616 unsigned Latency = ItinData->getStageLatency(Class);
4617
4618 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4619 unsigned DefAlign =
4620 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4621 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
4622 if (Adj >= 0 || (int)Latency > -Adj) {
4623 return Latency + Adj;
4624 }
4625 return Latency;
4626}
4627
4628unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4629 SDNode *Node) const {
4630 if (!Node->isMachineOpcode())
4631 return 1;
4632
4633 if (!ItinData || ItinData->isEmpty())
4634 return 1;
4635
4636 unsigned Opcode = Node->getMachineOpcode();
4637 switch (Opcode) {
4638 default:
4639 return ItinData->getStageLatency(get(Opcode).getSchedClass());
4640 case ARM::VLDMQIA:
4641 case ARM::VSTMQIA:
4642 return 2;
4643 }
4644}
4645
4646bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4647 const MachineRegisterInfo *MRI,
4648 const MachineInstr &DefMI,
4649 unsigned DefIdx,
4650 const MachineInstr &UseMI,
4651 unsigned UseIdx) const {
4652 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4653 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4654 if (Subtarget.nonpipelinedVFP() &&
4655 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4656 return true;
4657
4658 // Hoist VFP / NEON instructions with 4 or higher latency.
4659 unsigned Latency =
4660 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
4661 if (Latency <= 3)
4662 return false;
4663 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4664 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4665}
4666
4667bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4668 const MachineInstr &DefMI,
4669 unsigned DefIdx) const {
4670 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4671 if (!ItinData || ItinData->isEmpty())
4672 return false;
4673
4674 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4675 if (DDomain == ARMII::DomainGeneral) {
4676 unsigned DefClass = DefMI.getDesc().getSchedClass();
4677 std::optional<unsigned> DefCycle =
4678 ItinData->getOperandCycle(DefClass, DefIdx);
4679 return DefCycle && DefCycle <= 2U;
4680 }
4681 return false;
4682}
4683
4684bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4685 StringRef &ErrInfo) const {
4686 if (convertAddSubFlagsOpcode(MI.getOpcode())) {
4687 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4688 return false;
4689 }
4690 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4691 // Make sure we don't generate a lo-lo mov that isn't supported.
4692 if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
4693 !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
4694 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4695 return false;
4696 }
4697 }
4698 if (MI.getOpcode() == ARM::tPUSH ||
4699 MI.getOpcode() == ARM::tPOP ||
4700 MI.getOpcode() == ARM::tPOP_RET) {
4701 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
4702 if (MO.isImplicit() || !MO.isReg())
4703 continue;
4704 Register Reg = MO.getReg();
4705 if (Reg < ARM::R0 || Reg > ARM::R7) {
4706 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4707 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4708 ErrInfo = "Unsupported register in Thumb1 push/pop";
4709 return false;
4710 }
4711 }
4712 }
4713 }
4714 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4715 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4716 if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4717 MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4718 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4719 return false;
4720 }
4721 }
4722
4723 // Check the address model by taking the first Imm operand and checking it is
4724 // legal for that addressing mode.
4726 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4727 switch (AddrMode) {
4728 default:
4729 break;
4737 case ARMII::AddrModeT2_i12: {
4738 uint32_t Imm = 0;
4739 for (auto Op : MI.operands()) {
4740 if (Op.isImm()) {
4741 Imm = Op.getImm();
4742 break;
4743 }
4744 }
4745 if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
4746 ErrInfo = "Incorrect AddrMode Imm for instruction";
4747 return false;
4748 }
4749 break;
4750 }
4751 }
4752 return true;
4753}
4754
4756 unsigned LoadImmOpc,
4757 unsigned LoadOpc) const {
4758 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4759 "ROPI/RWPI not currently supported with stack guard");
4760
4761 MachineBasicBlock &MBB = *MI->getParent();
4762 DebugLoc DL = MI->getDebugLoc();
4763 Register Reg = MI->getOperand(0).getReg();
4765 unsigned int Offset = 0;
4766
4767 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4768 assert(!Subtarget.isReadTPSoft() &&
4769 "TLS stack protector requires hardware TLS register");
4770
4771 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4772 .addImm(15)
4773 .addImm(0)
4774 .addImm(13)
4775 .addImm(0)
4776 .addImm(3)
4778
4780 Offset = M.getStackProtectorGuardOffset();
4781 if (Offset & ~0xfffU) {
4782 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4783 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4784 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4785 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4786 BuildMI(MBB, MI, DL, get(AddOpc), Reg)
4787 .addReg(Reg, RegState::Kill)
4788 .addImm(Offset & ~0xfffU)
4790 .addReg(0);
4791 Offset &= 0xfffU;
4792 }
4793 } else {
4794 const GlobalValue *GV =
4795 cast<GlobalValue>((*MI->memoperands_begin())->getValue());
4796 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4797
4798 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4799 if (Subtarget.isTargetMachO()) {
4800 TargetFlags |= ARMII::MO_NONLAZY;
4801 } else if (Subtarget.isTargetCOFF()) {
4802 if (GV->hasDLLImportStorageClass())
4803 TargetFlags |= ARMII::MO_DLLIMPORT;
4804 else if (IsIndirect)
4805 TargetFlags |= ARMII::MO_COFFSTUB;
4806 } else if (IsIndirect) {
4807 TargetFlags |= ARMII::MO_GOT;
4808 }
4809
4810 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4811 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4812 auto APSREncoding =
4813 ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;
4814 BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)
4815 .addImm(APSREncoding)
4817 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4818 .addGlobalAddress(GV, 0, TargetFlags);
4819 BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))
4820 .addImm(APSREncoding)
4821 .addReg(CPSRSaveReg, RegState::Kill)
4823 } else {
4824 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
4825 .addGlobalAddress(GV, 0, TargetFlags);
4826 }
4827
4828 if (IsIndirect) {
4829 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4830 MIB.addReg(Reg, RegState::Kill).addImm(0);
4831 auto Flags = MachineMemOperand::MOLoad |
4835 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
4837 }
4838 }
4839
4840 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
4841 MIB.addReg(Reg, RegState::Kill)
4842 .addImm(Offset)
4843 .cloneMemRefs(*MI)
4845}
4846
4847bool
4848ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4849 unsigned &AddSubOpc,
4850 bool &NegAcc, bool &HasLane) const {
4852 if (I == MLxEntryMap.end())
4853 return false;
4854
4855 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4856 MulOpc = Entry.MulOpc;
4857 AddSubOpc = Entry.AddSubOpc;
4858 NegAcc = Entry.NegAcc;
4859 HasLane = Entry.HasLane;
4860 return true;
4861}
4862
4863//===----------------------------------------------------------------------===//
4864// Execution domains.
4865//===----------------------------------------------------------------------===//
4866//
4867// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4868// and some can go down both. The vmov instructions go down the VFP pipeline,
4869// but they can be changed to vorr equivalents that are executed by the NEON
4870// pipeline.
4871//
4872// We use the following execution domain numbering:
4873//
4877 ExeNEON = 2
4879
4880//
4881// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4882//
4883std::pair<uint16_t, uint16_t>
4885 // If we don't have access to NEON instructions then we won't be able
4886 // to swizzle anything to the NEON domain. Check to make sure.
4887 if (Subtarget.hasNEON()) {
4888 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4889 // if they are not predicated.
4890 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4891 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4892
4893 // CortexA9 is particularly picky about mixing the two and wants these
4894 // converted.
4895 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4896 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4897 MI.getOpcode() == ARM::VMOVS))
4898 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
4899 }
4900 // No other instructions can be swizzled, so just determine their domain.
4901 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4902
4904 return std::make_pair(ExeNEON, 0);
4905
4906 // Certain instructions can go either way on Cortex-A8.
4907 // Treat them as NEON instructions.
4908 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4909 return std::make_pair(ExeNEON, 0);
4910
4912 return std::make_pair(ExeVFP, 0);
4913
4914 return std::make_pair(ExeGeneric, 0);
4915}
4916
4918 unsigned SReg, unsigned &Lane) {
4919 MCRegister DReg =
4920 TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
4921 Lane = 0;
4922
4923 if (DReg)
4924 return DReg;
4925
4926 Lane = 1;
4927 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
4928
4929 assert(DReg && "S-register with no D super-register?");
4930 return DReg;
4931}
4932
4933/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4934/// set ImplicitSReg to a register number that must be marked as implicit-use or
4935/// zero if no register needs to be defined as implicit-use.
4936///
4937/// If the function cannot determine if an SPR should be marked implicit use or
4938/// not, it returns false.
4939///
4940/// This function handles cases where an instruction is being modified from taking
4941/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4942/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4943/// lane of the DPR).
4944///
4945/// If the other SPR is defined, an implicit-use of it should be added. Else,
4946/// (including the case where the DPR itself is defined), it should not.
4947///
4949 MachineInstr &MI, MCRegister DReg,
4950 unsigned Lane,
4951 MCRegister &ImplicitSReg) {
4952 // If the DPR is defined or used already, the other SPR lane will be chained
4953 // correctly, so there is nothing to be done.
4954 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
4955 ImplicitSReg = MCRegister();
4956 return true;
4957 }
4958
4959 // Otherwise we need to go searching to see if the SPR is set explicitly.
4960 ImplicitSReg = TRI->getSubReg(DReg,
4961 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4963 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4964
4965 if (LQR == MachineBasicBlock::LQR_Live)
4966 return true;
4967 else if (LQR == MachineBasicBlock::LQR_Unknown)
4968 return false;
4969
4970 // If the register is known not to be live, there is no need to add an
4971 // implicit-use.
4972 ImplicitSReg = MCRegister();
4973 return true;
4974}
4975
4977 unsigned Domain) const {
4978 unsigned DstReg, SrcReg;
4979 MCRegister DReg;
4980 unsigned Lane;
4981 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4983 switch (MI.getOpcode()) {
4984 default:
4985 llvm_unreachable("cannot handle opcode!");
4986 break;
4987 case ARM::VMOVD:
4988 if (Domain != ExeNEON)
4989 break;
4990
4991 // Zap the predicate operands.
4992 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4993
4994 // Make sure we've got NEON instructions.
4995 assert(Subtarget.hasNEON() && "VORRd requires NEON");
4996
4997 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4998 DstReg = MI.getOperand(0).getReg();
4999 SrcReg = MI.getOperand(1).getReg();
5000
5001 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5002 MI.removeOperand(i - 1);
5003
5004 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5005 MI.setDesc(get(ARM::VORRd));
5006 MIB.addReg(DstReg, RegState::Define)
5007 .addReg(SrcReg)
5008 .addReg(SrcReg)
5010 break;
5011 case ARM::VMOVRS:
5012 if (Domain != ExeNEON)
5013 break;
5014 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5015
5016 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5017 DstReg = MI.getOperand(0).getReg();
5018 SrcReg = MI.getOperand(1).getReg();
5019
5020 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5021 MI.removeOperand(i - 1);
5022
5023 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
5024
5025 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5026 // Note that DSrc has been widened and the other lane may be undef, which
5027 // contaminates the entire register.
5028 MI.setDesc(get(ARM::VGETLNi32));
5029 MIB.addReg(DstReg, RegState::Define)
5030 .addReg(DReg, RegState::Undef)
5031 .addImm(Lane)
5033
5034 // The old source should be an implicit use, otherwise we might think it
5035 // was dead before here.
5036 MIB.addReg(SrcReg, RegState::Implicit);
5037 break;
5038 case ARM::VMOVSR: {
5039 if (Domain != ExeNEON)
5040 break;
5041 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5042
5043 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5044 DstReg = MI.getOperand(0).getReg();
5045 SrcReg = MI.getOperand(1).getReg();
5046
5047 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
5048
5049 MCRegister ImplicitSReg;
5050 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5051 break;
5052
5053 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5054 MI.removeOperand(i - 1);
5055
5056 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5057 // Again DDst may be undefined at the beginning of this instruction.
5058 MI.setDesc(get(ARM::VSETLNi32));
5059 MIB.addReg(DReg, RegState::Define)
5060 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
5061 .addReg(SrcReg)
5062 .addImm(Lane)
5064
5065 // The narrower destination must be marked as set to keep previous chains
5066 // in place.
5068 if (ImplicitSReg)
5069 MIB.addReg(ImplicitSReg, RegState::Implicit);
5070 break;
5071 }
5072 case ARM::VMOVS: {
5073 if (Domain != ExeNEON)
5074 break;
5075
5076 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5077 DstReg = MI.getOperand(0).getReg();
5078 SrcReg = MI.getOperand(1).getReg();
5079
5080 unsigned DstLane = 0, SrcLane = 0;
5081 MCRegister DDst, DSrc;
5082 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
5083 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
5084
5085 MCRegister ImplicitSReg;
5086 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
5087 break;
5088
5089 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5090 MI.removeOperand(i - 1);
5091
5092 if (DSrc == DDst) {
5093 // Destination can be:
5094 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5095 MI.setDesc(get(ARM::VDUPLN32d));
5096 MIB.addReg(DDst, RegState::Define)
5097 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
5098 .addImm(SrcLane)
5100
5101 // Neither the source or the destination are naturally represented any
5102 // more, so add them in manually.
5104 MIB.addReg(SrcReg, RegState::Implicit);
5105 if (ImplicitSReg)
5106 MIB.addReg(ImplicitSReg, RegState::Implicit);
5107 break;
5108 }
5109
5110 // In general there's no single instruction that can perform an S <-> S
5111 // move in NEON space, but a pair of VEXT instructions *can* do the
5112 // job. It turns out that the VEXTs needed will only use DSrc once, with
5113 // the position based purely on the combination of lane-0 and lane-1
5114 // involved. For example
5115 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5116 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5117 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5118 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5119 //
5120 // Pattern of the MachineInstrs is:
5121 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5122 MachineInstrBuilder NewMIB;
5123 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
5124 DDst);
5125
5126 // On the first instruction, both DSrc and DDst may be undef if present.
5127 // Specifically when the original instruction didn't have them as an
5128 // <imp-use>.
5129 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5130 bool CurUndef = !MI.readsRegister(CurReg, TRI);
5131 NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
5132
5133 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5134 CurUndef = !MI.readsRegister(CurReg, TRI);
5135 NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
5136 .addImm(1)
5138
5139 if (SrcLane == DstLane)
5140 NewMIB.addReg(SrcReg, RegState::Implicit);
5141
5142 MI.setDesc(get(ARM::VEXTd32));
5143 MIB.addReg(DDst, RegState::Define);
5144
5145 // On the second instruction, DDst has definitely been defined above, so
5146 // it is not undef. DSrc, if present, can be undef as above.
5147 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5148 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5149 MIB.addReg(CurReg, getUndefRegState(CurUndef));
5150
5151 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5152 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
5153 MIB.addReg(CurReg, getUndefRegState(CurUndef))
5154 .addImm(1)
5156
5157 if (SrcLane != DstLane)
5158 MIB.addReg(SrcReg, RegState::Implicit);
5159
5160 // As before, the original destination is no longer represented, add it
5161 // implicitly.
5163 if (ImplicitSReg != 0)
5164 MIB.addReg(ImplicitSReg, RegState::Implicit);
5165 break;
5166 }
5167 }
5168}
5169
5170//===----------------------------------------------------------------------===//
5171// Partial register updates
5172//===----------------------------------------------------------------------===//
5173//
5174// Swift renames NEON registers with 64-bit granularity. That means any
5175// instruction writing an S-reg implicitly reads the containing D-reg. The
5176// problem is mostly avoided by translating f32 operations to v2f32 operations
5177// on D-registers, but f32 loads are still a problem.
5178//
5179// These instructions can load an f32 into a NEON register:
5180//
5181// VLDRS - Only writes S, partial D update.
5182// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5183// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5184//
5185// FCONSTD can be used as a dependency-breaking instruction.
5187 const MachineInstr &MI, unsigned OpNum,
5188 const TargetRegisterInfo *TRI) const {
5189 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5190 if (!PartialUpdateClearance)
5191 return 0;
5192
5193 assert(TRI && "Need TRI instance");
5194
5195 const MachineOperand &MO = MI.getOperand(OpNum);
5196 if (MO.readsReg())
5197 return 0;
5198 Register Reg = MO.getReg();
5199 int UseOp = -1;
5200
5201 switch (MI.getOpcode()) {
5202 // Normal instructions writing only an S-register.
5203 case ARM::VLDRS:
5204 case ARM::FCONSTS:
5205 case ARM::VMOVSR:
5206 case ARM::VMOVv8i8:
5207 case ARM::VMOVv4i16:
5208 case ARM::VMOVv2i32:
5209 case ARM::VMOVv2f32:
5210 case ARM::VMOVv1i64:
5211 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);
5212 break;
5213
5214 // Explicitly reads the dependency.
5215 case ARM::VLD1LNd32:
5216 UseOp = 3;
5217 break;
5218 default:
5219 return 0;
5220 }
5221
5222 // If this instruction actually reads a value from Reg, there is no unwanted
5223 // dependency.
5224 if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
5225 return 0;
5226
5227 // We must be able to clobber the whole D-reg.
5228 if (Reg.isVirtual()) {
5229 // Virtual register must be a def undef foo:ssub_0 operand.
5230 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5231 return 0;
5232 } else if (ARM::SPRRegClass.contains(Reg)) {
5233 // Physical register: MI must define the full D-reg.
5234 MCRegister DReg =
5235 TRI->getMatchingSuperReg(Reg, ARM::ssub_0, &ARM::DPRRegClass);
5236 if (!DReg || !MI.definesRegister(DReg, TRI))
5237 return 0;
5238 }
5239
5240 // MI has an unwanted D-register dependency.
5241 // Avoid defs in the previous N instructrions.
5242 return PartialUpdateClearance;
5243}
5244
5245// Break a partial register dependency after getPartialRegUpdateClearance
5246// returned non-zero.
5248 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5249 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5250 assert(TRI && "Need TRI instance");
5251
5252 const MachineOperand &MO = MI.getOperand(OpNum);
5253 Register Reg = MO.getReg();
5254 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5255 unsigned DReg = Reg;
5256
5257 // If MI defines an S-reg, find the corresponding D super-register.
5258 if (ARM::SPRRegClass.contains(Reg)) {
5259 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5260 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5261 }
5262
5263 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5264 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5265
5266 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5267 // the full D-register by loading the same value to both lanes. The
5268 // instruction is micro-coded with 2 uops, so don't do this until we can
5269 // properly schedule micro-coded instructions. The dispatcher stalls cause
5270 // too big regressions.
5271
5272 // Insert the dependency-breaking FCONSTD before MI.
5273 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5274 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
5275 .addImm(96)
5277 MI.addRegisterKilled(DReg, TRI, true);
5278}
5279
5281 return Subtarget.hasFeature(ARM::HasV6KOps);
5282}
5283
5285 if (MI->getNumOperands() < 4)
5286 return true;
5287 unsigned ShOpVal = MI->getOperand(3).getImm();
5288 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
5289 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5290 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
5291 ((ShImm == 1 || ShImm == 2) &&
5292 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
5293 return true;
5294
5295 return false;
5296}
5297
5299 const MachineInstr &MI, unsigned DefIdx,
5300 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5301 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5302 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5303
5304 switch (MI.getOpcode()) {
5305 case ARM::VMOVDRR:
5306 // dX = VMOVDRR rY, rZ
5307 // is the same as:
5308 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5309 // Populate the InputRegs accordingly.
5310 // rY
5311 const MachineOperand *MOReg = &MI.getOperand(1);
5312 if (!MOReg->isUndef())
5313 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5314 MOReg->getSubReg(), ARM::ssub_0));
5315 // rZ
5316 MOReg = &MI.getOperand(2);
5317 if (!MOReg->isUndef())
5318 InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
5319 MOReg->getSubReg(), ARM::ssub_1));
5320 return true;
5321 }
5322 llvm_unreachable("Target dependent opcode missing");
5323}
5324
5326 const MachineInstr &MI, unsigned DefIdx,
5327 RegSubRegPairAndIdx &InputReg) const {
5328 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5329 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5330
5331 switch (MI.getOpcode()) {
5332 case ARM::VMOVRRD:
5333 // rX, rY = VMOVRRD dZ
5334 // is the same as:
5335 // rX = EXTRACT_SUBREG dZ, ssub_0
5336 // rY = EXTRACT_SUBREG dZ, ssub_1
5337 const MachineOperand &MOReg = MI.getOperand(2);
5338 if (MOReg.isUndef())
5339 return false;
5340 InputReg.Reg = MOReg.getReg();
5341 InputReg.SubReg = MOReg.getSubReg();
5342 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5343 return true;
5344 }
5345 llvm_unreachable("Target dependent opcode missing");
5346}
5347
5349 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5350 RegSubRegPairAndIdx &InsertedReg) const {
5351 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5352 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5353
5354 switch (MI.getOpcode()) {
5355 case ARM::VSETLNi32:
5356 case ARM::MVE_VMOV_to_lane_32:
5357 // dX = VSETLNi32 dY, rZ, imm
5358 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5359 const MachineOperand &MOBaseReg = MI.getOperand(1);
5360 const MachineOperand &MOInsertedReg = MI.getOperand(2);
5361 if (MOInsertedReg.isUndef())
5362 return false;
5363 const MachineOperand &MOIndex = MI.getOperand(3);
5364 BaseReg.Reg = MOBaseReg.getReg();
5365 BaseReg.SubReg = MOBaseReg.getSubReg();
5366
5367 InsertedReg.Reg = MOInsertedReg.getReg();
5368 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5369 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5370 return true;
5371 }
5372 llvm_unreachable("Target dependent opcode missing");
5373}
5374
5375std::pair<unsigned, unsigned>
5377 const unsigned Mask = ARMII::MO_OPTION_MASK;
5378 return std::make_pair(TF & Mask, TF & ~Mask);
5379}
5380
5383 using namespace ARMII;
5384
5385 static const std::pair<unsigned, const char *> TargetFlags[] = {
5386 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5387 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5388 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5389 };
5390 return ArrayRef(TargetFlags);
5391}
5392
5395 using namespace ARMII;
5396
5397 static const std::pair<unsigned, const char *> TargetFlags[] = {
5398 {MO_COFFSTUB, "arm-coffstub"},
5399 {MO_GOT, "arm-got"},
5400 {MO_SBREL, "arm-sbrel"},
5401 {MO_DLLIMPORT, "arm-dllimport"},
5402 {MO_SECREL, "arm-secrel"},
5403 {MO_NONLAZY, "arm-nonlazy"}};
5404 return ArrayRef(TargetFlags);
5405}
5406
5407std::optional<RegImmPair>
5409 int Sign = 1;
5410 unsigned Opcode = MI.getOpcode();
5411 int64_t Offset = 0;
5412
5413 // TODO: Handle cases where Reg is a super- or sub-register of the
5414 // destination register.
5415 const MachineOperand &Op0 = MI.getOperand(0);
5416 if (!Op0.isReg() || Reg != Op0.getReg())
5417 return std::nullopt;
5418
5419 // We describe SUBri or ADDri instructions.
5420 if (Opcode == ARM::SUBri)
5421 Sign = -1;
5422 else if (Opcode != ARM::ADDri)
5423 return std::nullopt;
5424
5425 // TODO: Third operand can be global address (usually some string). Since
5426 // strings can be relocated we cannot calculate their offsets for
5427 // now.
5428 if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
5429 return std::nullopt;
5430
5431 Offset = MI.getOperand(2).getImm() * Sign;
5432 return RegImmPair{MI.getOperand(1).getReg(), Offset};
5433}
5434
5438 const TargetRegisterInfo *TRI) {
5439 for (auto I = From; I != To; ++I)
5440 if (I->modifiesRegister(Reg, TRI))
5441 return true;
5442 return false;
5443}
5444
5446 const TargetRegisterInfo *TRI) {
5447 // Search backwards to the instruction that defines CSPR. This may or not
5448 // be a CMP, we check that after this loop. If we find another instruction
5449 // that reads cpsr, we return nullptr.
5450 MachineBasicBlock::iterator CmpMI = Br;
5451 while (CmpMI != Br->getParent()->begin()) {
5452 --CmpMI;
5453 if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
5454 break;
5455 if (CmpMI->readsRegister(ARM::CPSR, TRI))
5456 break;
5457 }
5458
5459 // Check that this inst is a CMP r[0-7], #0 and that the register
5460 // is not redefined between the cmp and the br.
5461 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5462 return nullptr;
5463 Register Reg = CmpMI->getOperand(0).getReg();
5464 Register PredReg;
5465 ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
5466 if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
5467 return nullptr;
5468 if (!isARMLowRegister(Reg))
5469 return nullptr;
5470 if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
5471 return nullptr;
5472
5473 return &*CmpMI;
5474}
5475
5477 const ARMSubtarget *Subtarget,
5478 bool ForCodesize) {
5479 if (Subtarget->isThumb()) {
5480 if (Val <= 255) // MOV
5481 return ForCodesize ? 2 : 1;
5482 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5483 ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
5484 ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
5485 return ForCodesize ? 4 : 1;
5486 if (Val <= 510) // MOV + ADDi8
5487 return ForCodesize ? 4 : 2;
5488 if (~Val <= 255) // MOV + MVN
5489 return ForCodesize ? 4 : 2;
5490 if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
5491 return ForCodesize ? 4 : 2;
5492 } else {
5493 if (ARM_AM::getSOImmVal(Val) != -1) // MOV
5494 return ForCodesize ? 4 : 1;
5495 if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
5496 return ForCodesize ? 4 : 1;
5497 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5498 return ForCodesize ? 4 : 1;
5499 if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
5500 return ForCodesize ? 8 : 2;
5501 if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
5502 return ForCodesize ? 8 : 2;
5503 }
5504 if (Subtarget->useMovt()) // MOVW + MOVT
5505 return ForCodesize ? 8 : 2;
5506 return ForCodesize ? 8 : 3; // Literal pool load
5507}
5508
5509bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5510 const ARMSubtarget *Subtarget,
5511 bool ForCodesize) {
5512 // Check with ForCodesize
5513 unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
5514 unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
5515 if (Cost1 < Cost2)
5516 return true;
5517 if (Cost1 > Cost2)
5518 return false;
5519
5520 // If they are equal, try with !ForCodesize
5521 return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
5522 ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
5523}
5524
5525/// Constants defining how certain sequences should be outlined.
5526/// This encompasses how an outlined function should be called, and what kind of
5527/// frame should be emitted for that outlined function.
5528///
5529/// \p MachineOutlinerTailCall implies that the function is being created from
5530/// a sequence of instructions ending in a return.
5531///
5532/// That is,
5533///
5534/// I1 OUTLINED_FUNCTION:
5535/// I2 --> B OUTLINED_FUNCTION I1
5536/// BX LR I2
5537/// BX LR
5538///
5539/// +-------------------------+--------+-----+
5540/// | | Thumb2 | ARM |
5541/// +-------------------------+--------+-----+
5542/// | Call overhead in Bytes | 4 | 4 |
5543/// | Frame overhead in Bytes | 0 | 0 |
5544/// | Stack fixup required | No | No |
5545/// +-------------------------+--------+-----+
5546///
5547/// \p MachineOutlinerThunk implies that the function is being created from
5548/// a sequence of instructions ending in a call. The outlined function is
5549/// called with a BL instruction, and the outlined function tail-calls the
5550/// original call destination.
5551///
5552/// That is,
5553///
5554/// I1 OUTLINED_FUNCTION:
5555/// I2 --> BL OUTLINED_FUNCTION I1
5556/// BL f I2
5557/// B f
5558///
5559/// +-------------------------+--------+-----+
5560/// | | Thumb2 | ARM |
5561/// +-------------------------+--------+-----+
5562/// | Call overhead in Bytes | 4 | 4 |
5563/// | Frame overhead in Bytes | 0 | 0 |
5564/// | Stack fixup required | No | No |
5565/// +-------------------------+--------+-----+
5566///
5567/// \p MachineOutlinerNoLRSave implies that the function should be called using
5568/// a BL instruction, but doesn't require LR to be saved and restored. This
5569/// happens when LR is known to be dead.
5570///
5571/// That is,
5572///
5573/// I1 OUTLINED_FUNCTION:
5574/// I2 --> BL OUTLINED_FUNCTION I1
5575/// I3 I2
5576/// I3
5577/// BX LR
5578///
5579/// +-------------------------+--------+-----+
5580/// | | Thumb2 | ARM |
5581/// +-------------------------+--------+-----+
5582/// | Call overhead in Bytes | 4 | 4 |
5583/// | Frame overhead in Bytes | 2 | 4 |
5584/// | Stack fixup required | No | No |
5585/// +-------------------------+--------+-----+
5586///
5587/// \p MachineOutlinerRegSave implies that the function should be called with a
5588/// save and restore of LR to an available register. This allows us to avoid
5589/// stack fixups. Note that this outlining variant is compatible with the
5590/// NoLRSave case.
5591///
5592/// That is,
5593///
5594/// I1 Save LR OUTLINED_FUNCTION:
5595/// I2 --> BL OUTLINED_FUNCTION I1
5596/// I3 Restore LR I2
5597/// I3
5598/// BX LR
5599///
5600/// +-------------------------+--------+-----+
5601/// | | Thumb2 | ARM |
5602/// +-------------------------+--------+-----+
5603/// | Call overhead in Bytes | 8 | 12 |
5604/// | Frame overhead in Bytes | 2 | 4 |
5605/// | Stack fixup required | No | No |
5606/// +-------------------------+--------+-----+
5607///
5608/// \p MachineOutlinerDefault implies that the function should be called with
5609/// a save and restore of LR to the stack.
5610///
5611/// That is,
5612///
5613/// I1 Save LR OUTLINED_FUNCTION:
5614/// I2 --> BL OUTLINED_FUNCTION I1
5615/// I3 Restore LR I2
5616/// I3
5617/// BX LR
5618///
5619/// +-------------------------+--------+-----+
5620/// | | Thumb2 | ARM |
5621/// +-------------------------+--------+-----+
5622/// | Call overhead in Bytes | 8 | 12 |
5623/// | Frame overhead in Bytes | 2 | 4 |
5624/// | Stack fixup required | Yes | Yes |
5625/// +-------------------------+--------+-----+
5626
5634
5638 UnsafeRegsDead = 0x8
5640
5653
5655 : CallTailCall(target.isThumb() ? 4 : 4),
5656 FrameTailCall(target.isThumb() ? 0 : 0),
5657 CallThunk(target.isThumb() ? 4 : 4),
5658 FrameThunk(target.isThumb() ? 0 : 0),
5659 CallNoLRSave(target.isThumb() ? 4 : 4),
5660 FrameNoLRSave(target.isThumb() ? 2 : 4),
5661 CallRegSave(target.isThumb() ? 8 : 12),
5662 FrameRegSave(target.isThumb() ? 2 : 4),
5663 CallDefault(target.isThumb() ? 8 : 12),
5664 FrameDefault(target.isThumb() ? 2 : 4),
5665 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5666};
5667
5669ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5670 MachineFunction *MF = C.getMF();
5672 const ARMBaseRegisterInfo *ARI =
5673 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5674
5675 BitVector regsReserved = ARI->getReservedRegs(*MF);
5676 // Check if there is an available register across the sequence that we can
5677 // use.
5678 for (Register Reg : ARM::rGPRRegClass) {
5679 if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
5680 Reg != ARM::LR && // LR is not reserved, but don't use it.
5681 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5682 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5683 C.isAvailableInsideSeq(Reg, TRI))
5684 return Reg;
5685 }
5686 return Register();
5687}
5688
5689// Compute liveness of LR at the point after the interval [I, E), which
5690// denotes a *backward* iteration through instructions. Used only for return
5691// basic blocks, which do not end with a tail call.
5695 // At the end of the function LR dead.
5696 bool Live = false;
5697 for (; I != E; ++I) {
5698 const MachineInstr &MI = *I;
5699
5700 // Check defs of LR.
5701 if (MI.modifiesRegister(ARM::LR, &TRI))
5702 Live = false;
5703
5704 // Check uses of LR.
5705 unsigned Opcode = MI.getOpcode();
5706 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5707 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5708 Opcode == ARM::tBXNS_RET) {
5709 // These instructions use LR, but it's not an (explicit or implicit)
5710 // operand.
5711 Live = true;
5712 continue;
5713 }
5714 if (MI.readsRegister(ARM::LR, &TRI))
5715 Live = true;
5716 }
5717 return !Live;
5718}
5719
5720std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5722 const MachineModuleInfo &MMI,
5723 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5724 unsigned MinRepeats) const {
5725 unsigned SequenceSize = 0;
5726 for (auto &MI : RepeatedSequenceLocs[0])
5727 SequenceSize += getInstSizeInBytes(MI);
5728
5729 // Properties about candidate MBBs that hold for all of them.
5730 unsigned FlagsSetInAll = 0xF;
5731
5732 // Compute liveness information for each candidate, and set FlagsSetInAll.
5734 for (outliner::Candidate &C : RepeatedSequenceLocs)
5735 FlagsSetInAll &= C.Flags;
5736
5737 // According to the ARM Procedure Call Standard, the following are
5738 // undefined on entry/exit from a function call:
5739 //
5740 // * Register R12(IP),
5741 // * Condition codes (and thus the CPSR register)
5742 //
5743 // Since we control the instructions which are part of the outlined regions
5744 // we don't need to be fully compliant with the AAPCS, but we have to
5745 // guarantee that if a veneer is inserted at link time the code is still
5746 // correct. Because of this, we can't outline any sequence of instructions
5747 // where one of these registers is live into/across it. Thus, we need to
5748 // delete those candidates.
5749 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5750 // If the unsafe registers in this block are all dead, then we don't need
5751 // to compute liveness here.
5752 if (C.Flags & UnsafeRegsDead)
5753 return false;
5754 return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
5755 };
5756
5757 // Are there any candidates where those registers are live?
5758 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5759 // Erase every candidate that violates the restrictions above. (It could be
5760 // true that we have viable candidates, so it's not worth bailing out in
5761 // the case that, say, 1 out of 20 candidates violate the restructions.)
5762 llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
5763
5764 // If the sequence doesn't have enough candidates left, then we're done.
5765 if (RepeatedSequenceLocs.size() < MinRepeats)
5766 return std::nullopt;
5767 }
5768
5769 // We expect the majority of the outlining candidates to be in consensus with
5770 // regard to return address sign and authentication, and branch target
5771 // enforcement, in other words, partitioning according to all the four
5772 // possible combinations of PAC-RET and BTI is going to yield one big subset
5773 // and three small (likely empty) subsets. That allows us to cull incompatible
5774 // candidates separately for PAC-RET and BTI.
5775
5776 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5777 // disabled. Remove the candidates from the smaller set. If they are the same
5778 // number prefer the non-BTI ones for outlining, since they have less
5779 // overhead.
5780 auto NoBTI =
5781 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5782 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5783 return AFI.branchTargetEnforcement();
5784 });
5785 if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
5786 std::distance(NoBTI, RepeatedSequenceLocs.end()))
5787 RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
5788 else
5789 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
5790
5791 if (RepeatedSequenceLocs.size() < MinRepeats)
5792 return std::nullopt;
5793
5794 // Likewise, partition the candidates according to PAC-RET enablement.
5795 auto NoPAC =
5796 llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
5797 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5798 // If the function happens to not spill the LR, do not disqualify it
5799 // from the outlining.
5800 return AFI.shouldSignReturnAddress(true);
5801 });
5802 if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >
5803 std::distance(NoPAC, RepeatedSequenceLocs.end()))
5804 RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());
5805 else
5806 RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
5807
5808 if (RepeatedSequenceLocs.size() < MinRepeats)
5809 return std::nullopt;
5810
5811 // At this point, we have only "safe" candidates to outline. Figure out
5812 // frame + call instruction information.
5813
5814 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5815
5816 // Helper lambda which sets call information for every candidate.
5817 auto SetCandidateCallInfo =
5818 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5819 for (outliner::Candidate &C : RepeatedSequenceLocs)
5820 C.setCallInfo(CallID, NumBytesForCall);
5821 };
5822
5823 OutlinerCosts Costs(Subtarget);
5824
5825 const auto &SomeMFI =
5826 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5827 // Adjust costs to account for the BTI instructions.
5828 if (SomeMFI.branchTargetEnforcement()) {
5829 Costs.FrameDefault += 4;
5830 Costs.FrameNoLRSave += 4;
5831 Costs.FrameRegSave += 4;
5832 Costs.FrameTailCall += 4;
5833 Costs.FrameThunk += 4;
5834 }
5835
5836 // Adjust costs to account for sign and authentication instructions.
5837 if (SomeMFI.shouldSignReturnAddress(true)) {
5838 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5839 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5840 }
5841
5842 unsigned FrameID = MachineOutlinerDefault;
5843 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5844
5845 // If the last instruction in any candidate is a terminator, then we should
5846 // tail call all of the candidates.
5847 if (RepeatedSequenceLocs[0].back().isTerminator()) {
5848 FrameID = MachineOutlinerTailCall;
5849 NumBytesToCreateFrame = Costs.FrameTailCall;
5850 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
5851 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5852 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
5853 LastInstrOpcode == ARM::tBLXr ||
5854 LastInstrOpcode == ARM::tBLXr_noip ||
5855 LastInstrOpcode == ARM::tBLXi) {
5856 FrameID = MachineOutlinerThunk;
5857 NumBytesToCreateFrame = Costs.FrameThunk;
5858 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
5859 } else {
5860 // We need to decide how to emit calls + frames. We can always emit the same
5861 // frame if we don't need to save to the stack. If we have to save to the
5862 // stack, then we need a different frame.
5863 unsigned NumBytesNoStackCalls = 0;
5864 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5865
5866 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5867 // LR liveness is overestimated in return blocks, unless they end with a
5868 // tail call.
5869 const auto Last = C.getMBB()->rbegin();
5870 const bool LRIsAvailable =
5871 C.getMBB()->isReturnBlock() && !Last->isCall()
5874 : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
5875 if (LRIsAvailable) {
5876 FrameID = MachineOutlinerNoLRSave;
5877 NumBytesNoStackCalls += Costs.CallNoLRSave;
5878 C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
5879 CandidatesWithoutStackFixups.push_back(C);
5880 }
5881
5882 // Is an unused register available? If so, we won't modify the stack, so
5883 // we can outline with the same frame type as those that don't save LR.
5884 else if (findRegisterToSaveLRTo(C)) {
5885 FrameID = MachineOutlinerRegSave;
5886 NumBytesNoStackCalls += Costs.CallRegSave;
5887 C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
5888 CandidatesWithoutStackFixups.push_back(C);
5889 }
5890
5891 // Is SP used in the sequence at all? If not, we don't have to modify
5892 // the stack, so we are guaranteed to get the same frame.
5893 else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
5894 NumBytesNoStackCalls += Costs.CallDefault;
5895 C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5896 CandidatesWithoutStackFixups.push_back(C);
5897 }
5898
5899 // If we outline this, we need to modify the stack. Pretend we don't
5900 // outline this by saving all of its bytes.
5901 else
5902 NumBytesNoStackCalls += SequenceSize;
5903 }
5904
5905 // If there are no places where we have to save LR, then note that we don't
5906 // have to update the stack. Otherwise, give every candidate the default
5907 // call type
5908 if (NumBytesNoStackCalls <=
5909 RepeatedSequenceLocs.size() * Costs.CallDefault) {
5910 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5911 FrameID = MachineOutlinerNoLRSave;
5912 if (RepeatedSequenceLocs.size() < MinRepeats)
5913 return std::nullopt;
5914 } else
5915 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5916 }
5917
5918 // Does every candidate's MBB contain a call? If so, then we might have a
5919 // call in the range.
5920 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5921 // check if the range contains a call. These require a save + restore of
5922 // the link register.
5923 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5924 if (any_of(drop_end(FirstCand),
5925 [](const MachineInstr &MI) { return MI.isCall(); }))
5926 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5927
5928 // Handle the last instruction separately. If it is tail call, then the
5929 // last instruction is a call, we don't want to save + restore in this
5930 // case. However, it could be possible that the last instruction is a
5931 // call without it being valid to tail call this sequence. We should
5932 // consider this as well.
5933 else if (FrameID != MachineOutlinerThunk &&
5934 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
5935 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5936 }
5937
5938 return std::make_unique<outliner::OutlinedFunction>(
5939 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
5940}
5941
5942bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
5943 int64_t Fixup,
5944 bool Updt) const {
5945 int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);
5946 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
5947 if (SPIdx < 0)
5948 // No SP operand
5949 return true;
5950 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
5951 // If SP is not the base register we can't do much
5952 return false;
5953
5954 // Stack might be involved but addressing mode doesn't handle any offset.
5955 // Rq: AddrModeT1_[1|2|4] don't operate on SP
5956 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
5957 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
5958 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
5959 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
5960 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
5961 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
5962 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
5963 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
5964 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
5966 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
5967 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
5968 return false;
5969
5970 unsigned NumOps = MI->getDesc().getNumOperands();
5971 unsigned ImmIdx = NumOps - 3;
5972
5973 const MachineOperand &Offset = MI->getOperand(ImmIdx);
5974 assert(Offset.isImm() && "Is not an immediate");
5975 int64_t OffVal = Offset.getImm();
5976
5977 if (OffVal < 0)
5978 // Don't override data if the are below SP.
5979 return false;
5980
5981 unsigned NumBits = 0;
5982 unsigned Scale = 1;
5983
5984 switch (AddrMode) {
5985 case ARMII::AddrMode3:
5986 if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
5987 return false;
5988 OffVal = ARM_AM::getAM3Offset(OffVal);
5989 NumBits = 8;
5990 break;
5991 case ARMII::AddrMode5:
5992 if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
5993 return false;
5994 OffVal = ARM_AM::getAM5Offset(OffVal);
5995 NumBits = 8;
5996 Scale = 4;
5997 break;
5999 if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
6000 return false;
6001 OffVal = ARM_AM::getAM5FP16Offset(OffVal);
6002 NumBits = 8;
6003 Scale = 2;
6004 break;
6006 NumBits = 8;
6007 break;
6009 // FIXME: Values are already scaled in this addressing mode.
6010 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6011 NumBits = 10;
6012 break;
6014 NumBits = 8;
6015 Scale = 4;
6016 break;
6019 NumBits = 12;
6020 break;
6021 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6022 NumBits = 8;
6023 Scale = 4;
6024 break;
6025 default:
6026 llvm_unreachable("Unsupported addressing mode!");
6027 }
6028 // Make sure the offset is encodable for instructions that scale the
6029 // immediate.
6030 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6031 "Can't encode this offset!");
6032 OffVal += Fixup / Scale;
6033
6034 unsigned Mask = (1 << NumBits) - 1;
6035
6036 if (OffVal <= Mask) {
6037 if (Updt)
6038 MI->getOperand(ImmIdx).setImm(OffVal);
6039 return true;
6040 }
6041
6042 return false;
6043}
6044
6046 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6047 outliner::Candidate &C = Candidates.front();
6048 // branch-target-enforcement is guaranteed to be consistent between all
6049 // candidates, so we only need to look at one.
6050 const Function &CFn = C.getMF()->getFunction();
6051 if (CFn.hasFnAttribute("branch-target-enforcement"))
6052 F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
6053
6054 if (CFn.hasFnAttribute("sign-return-address"))
6055 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
6056
6057 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6058}
6059
6061 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6062 const Function &F = MF.getFunction();
6063
6064 // Can F be deduplicated by the linker? If it can, don't outline from it.
6065 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6066 return false;
6067
6068 // Don't outline from functions with section markings; the program could
6069 // expect that all the code is in the named section.
6070 // FIXME: Allow outlining from multiple functions with the same section
6071 // marking.
6072 if (F.hasSection())
6073 return false;
6074
6075 // FIXME: Thumb1 outlining is not handled
6077 return false;
6078
6079 // It's safe to outline from MF.
6080 return true;
6081}
6082
6084 unsigned &Flags) const {
6085 // Check if LR is available through all of the MBB. If it's not, then set
6086 // a flag.
6088 "Suitable Machine Function for outlining must track liveness");
6089
6091
6093 LRU.accumulate(MI);
6094
6095 // Check if each of the unsafe registers are available...
6096 bool R12AvailableInBlock = LRU.available(ARM::R12);
6097 bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
6098
6099 // If all of these are dead (and not live out), we know we don't have to check
6100 // them later.
6101 if (R12AvailableInBlock && CPSRAvailableInBlock)
6102 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6103
6104 // Now, add the live outs to the set.
6105 LRU.addLiveOuts(MBB);
6106
6107 // If any of these registers is available in the MBB, but also a live out of
6108 // the block, then we know outlining is unsafe.
6109 if (R12AvailableInBlock && !LRU.available(ARM::R12))
6110 return false;
6111 if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
6112 return false;
6113
6114 // Check if there's a call inside this MachineBasicBlock. If there is, then
6115 // set a flag.
6116 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6117 Flags |= MachineOutlinerMBBFlags::HasCalls;
6118
6119 // LR liveness is overestimated in return blocks.
6120
6121 bool LRIsAvailable =
6122 MBB.isReturnBlock() && !MBB.back().isCall()
6124 : LRU.available(ARM::LR);
6125 if (!LRIsAvailable)
6126 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6127
6128 return true;
6129}
6130
6134 unsigned Flags) const {
6135 MachineInstr &MI = *MIT;
6137
6138 // PIC instructions contain labels, outlining them would break offset
6139 // computing. unsigned Opc = MI.getOpcode();
6140 unsigned Opc = MI.getOpcode();
6141 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6142 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6143 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6144 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6145 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6146 Opc == ARM::t2MOV_ga_pcrel)
6148
6149 // Be conservative with ARMv8.1 MVE instructions.
6150 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6151 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6152 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6153 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6154 Opc == ARM::t2LoopEndDec)
6156
6157 const MCInstrDesc &MCID = MI.getDesc();
6158 uint64_t MIFlags = MCID.TSFlags;
6159 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6161
6162 // Is this a terminator for a basic block?
6163 if (MI.isTerminator())
6164 // TargetInstrInfo::getOutliningType has already filtered out anything
6165 // that would break this, so we can allow it here.
6167
6168 // Don't outline if link register or program counter value are used.
6169 if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
6171
6172 if (MI.isCall()) {
6173 // Get the function associated with the call. Look at each operand and find
6174 // the one that represents the calle and get its name.
6175 const Function *Callee = nullptr;
6176 for (const MachineOperand &MOP : MI.operands()) {
6177 if (MOP.isGlobal()) {
6178 Callee = dyn_cast<Function>(MOP.getGlobal());
6179 break;
6180 }
6181 }
6182
6183 // Dont't outline calls to "mcount" like functions, in particular Linux
6184 // kernel function tracing relies on it.
6185 if (Callee &&
6186 (Callee->getName() == "\01__gnu_mcount_nc" ||
6187 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6189
6190 // If we don't know anything about the callee, assume it depends on the
6191 // stack layout of the caller. In that case, it's only legal to outline
6192 // as a tail-call. Explicitly list the call instructions we know about so
6193 // we don't get unexpected results with call pseudo-instructions.
6194 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6195 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6196 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6197 Opc == ARM::tBLXi)
6198 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6199
6200 if (!Callee)
6201 return UnknownCallOutlineType;
6202
6203 // We have a function we have information about. Check if it's something we
6204 // can safely outline.
6205 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
6206
6207 // We don't know what's going on with the callee at all. Don't touch it.
6208 if (!CalleeMF)
6209 return UnknownCallOutlineType;
6210
6211 // Check if we know anything about the callee saves on the function. If we
6212 // don't, then don't touch it, since that implies that we haven't computed
6213 // anything about its stack frame yet.
6214 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6215 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6216 MFI.getNumObjects() > 0)
6217 return UnknownCallOutlineType;
6218
6219 // At this point, we can say that CalleeMF ought to not pass anything on the
6220 // stack. Therefore, we can outline it.
6222 }
6223
6224 // Since calls are handled, don't touch LR or PC
6225 if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
6227
6228 // Does this use the stack?
6229 if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
6230 // True if there is no chance that any outlined candidate from this range
6231 // could require stack fixups. That is, both
6232 // * LR is available in the range (No save/restore around call)
6233 // * The range doesn't include calls (No save/restore in outlined frame)
6234 // are true.
6235 // These conditions also ensure correctness of the return address
6236 // authentication - we insert sign and authentication instructions only if
6237 // we save/restore LR on stack, but then this condition ensures that the
6238 // outlined range does not modify the SP, therefore the SP value used for
6239 // signing is the same as the one used for authentication.
6240 // FIXME: This is very restrictive; the flags check the whole block,
6241 // not just the bit we will try to outline.
6242 bool MightNeedStackFixUp =
6243 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
6244 MachineOutlinerMBBFlags::HasCalls));
6245
6246 if (!MightNeedStackFixUp)
6248
6249 // Any modification of SP will break our code to save/restore LR.
6250 // FIXME: We could handle some instructions which add a constant offset to
6251 // SP, with a bit more work.
6252 if (MI.modifiesRegister(ARM::SP, TRI))
6254
6255 // At this point, we have a stack instruction that we might need to fix up.
6256 // up. We'll handle it if it's a load or store.
6257 if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
6258 false))
6260
6261 // We can't fix it up, so don't outline it.
6263 }
6264
6265 // Be conservative with IT blocks.
6266 if (MI.readsRegister(ARM::ITSTATE, TRI) ||
6267 MI.modifiesRegister(ARM::ITSTATE, TRI))
6269
6270 // Don't outline CFI instructions.
6271 if (MI.isCFIInstruction())
6273
6275}
6276
6277void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6278 for (MachineInstr &MI : MBB) {
6279 checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
6280 }
6281}
6282
6283void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6284 MachineBasicBlock::iterator It, bool CFI,
6285 bool Auth) const {
6286 int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));
6287 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6288 assert(Align >= 8 && Align <= 256);
6289 if (Auth) {
6290 assert(Subtarget.isThumb2());
6291 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6292 // sequence.
6293 BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);
6294 BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)
6295 .addReg(ARM::R12, RegState::Kill)
6296 .addReg(ARM::LR, RegState::Kill)
6297 .addReg(ARM::SP)
6298 .addImm(-Align)
6300 .setMIFlags(MIFlags);
6301 } else {
6302 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6303 BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
6304 .addReg(ARM::LR, RegState::Kill)
6305 .addReg(ARM::SP)
6306 .addImm(-Align)
6308 .setMIFlags(MIFlags);
6309 }
6310
6311 if (!CFI)
6312 return;
6313
6314 // Add a CFI, saying CFA is offset by Align bytes from SP.
6316 CFIBuilder.buildDefCFAOffset(Align);
6317
6318 // Add a CFI saying that the LR that we want to find is now higher than
6319 // before.
6320 int LROffset = Auth ? Align - 4 : Align;
6321 CFIBuilder.buildOffset(ARM::LR, -LROffset);
6322 if (Auth) {
6323 // Add a CFI for the location of the return adddress PAC.
6324 CFIBuilder.buildOffset(ARM::RA_AUTH_CODE, -Align);
6325 }
6326}
6327
6328void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6330 bool CFI, bool Auth) const {
6331 int Align = Subtarget.getStackAlignment().value();
6332 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6333 if (Auth) {
6334 assert(Subtarget.isThumb2());
6335 // Restore return address PAC and LR.
6336 BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))
6337 .addReg(ARM::R12, RegState::Define)
6338 .addReg(ARM::LR, RegState::Define)
6339 .addReg(ARM::SP, RegState::Define)
6340 .addReg(ARM::SP)
6341 .addImm(Align)
6343 .setMIFlags(MIFlags);
6344 // LR authentication is after the CFI instructions, below.
6345 } else {
6346 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6347 MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
6348 .addReg(ARM::SP, RegState::Define)
6349 .addReg(ARM::SP);
6350 if (!Subtarget.isThumb())
6351 MIB.addReg(0);
6352 MIB.addImm(Subtarget.getStackAlignment().value())
6354 .setMIFlags(MIFlags);
6355 }
6356
6357 if (CFI) {
6358 // Now stack has moved back up and we have restored LR.
6360 CFIBuilder.buildDefCFAOffset(0);
6361 CFIBuilder.buildRestore(ARM::LR);
6362 if (Auth)
6363 CFIBuilder.buildUndefined(ARM::RA_AUTH_CODE);
6364 }
6365
6366 if (Auth)
6367 BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));
6368}
6369
6372 const outliner::OutlinedFunction &OF) const {
6373 // For thunk outlining, rewrite the last instruction from a call to a
6374 // tail-call.
6375 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6376 MachineInstr *Call = &*--MBB.instr_end();
6377 bool isThumb = Subtarget.isThumb();
6378 unsigned FuncOp = isThumb ? 2 : 0;
6379 unsigned Opc = Call->getOperand(FuncOp).isReg()
6380 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6381 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6382 : ARM::tTAILJMPdND
6383 : ARM::TAILJMPd;
6385 .add(Call->getOperand(FuncOp));
6386 if (isThumb && !Call->getOperand(FuncOp).isReg())
6387 MIB.add(predOps(ARMCC::AL));
6388 Call->eraseFromParent();
6389 }
6390
6391 // Is there a call in the outlined range?
6392 auto IsNonTailCall = [](MachineInstr &MI) {
6393 return MI.isCall() && !MI.isReturn();
6394 };
6395 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
6398
6399 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6400 OF.FrameConstructionID == MachineOutlinerThunk)
6401 Et = std::prev(MBB.end());
6402
6403 // We have to save and restore LR, we need to add it to the liveins if it
6404 // is not already part of the set. This is suffient since outlined
6405 // functions only have one block.
6406 if (!MBB.isLiveIn(ARM::LR))
6407 MBB.addLiveIn(ARM::LR);
6408
6409 // Insert a save before the outlined region
6410 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true);
6411 saveLROnStack(MBB, It, true, Auth);
6412
6413 // Fix up the instructions in the range, since we're going to modify the
6414 // stack.
6415 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6416 "Can only fix up stack references once");
6417 fixupPostOutline(MBB);
6418
6419 // Insert a restore before the terminator for the function. Restore LR.
6420 restoreLRFromStack(MBB, Et, true, Auth);
6421 }
6422
6423 // If this is a tail call outlined function, then there's already a return.
6424 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6425 OF.FrameConstructionID == MachineOutlinerThunk)
6426 return;
6427
6428 // Here we have to insert the return ourselves. Get the correct opcode from
6429 // current feature set.
6430 BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
6432
6433 // Did we have to modify the stack by saving the link register?
6434 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6435 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6436 return;
6437
6438 // We modified the stack.
6439 // Walk over the basic block and fix up all the stack accesses.
6440 fixupPostOutline(MBB);
6441}
6442
6448 unsigned Opc;
6449 bool isThumb = Subtarget.isThumb();
6450
6451 // Are we tail calling?
6452 if (C.CallConstructionID == MachineOutlinerTailCall) {
6453 // If yes, then we can just branch to the label.
6454 Opc = isThumb
6455 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6456 : ARM::TAILJMPd;
6457 MIB = BuildMI(MF, DebugLoc(), get(Opc))
6458 .addGlobalAddress(M.getNamedValue(MF.getName()));
6459 if (isThumb)
6460 MIB.add(predOps(ARMCC::AL));
6461 It = MBB.insert(It, MIB);
6462 return It;
6463 }
6464
6465 // Create the call instruction.
6466 Opc = isThumb ? ARM::tBL : ARM::BL;
6467 MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
6468 if (isThumb)
6469 CallMIB.add(predOps(ARMCC::AL));
6470 CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
6471
6472 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6473 C.CallConstructionID == MachineOutlinerThunk) {
6474 // No, so just insert the call.
6475 It = MBB.insert(It, CallMIB);
6476 return It;
6477 }
6478
6479 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6480 // Can we save to a register?
6481 if (C.CallConstructionID == MachineOutlinerRegSave) {
6482 Register Reg = findRegisterToSaveLRTo(C);
6483 assert(Reg != 0 && "No callee-saved register available?");
6484
6485 // Save and restore LR from that register.
6486 copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
6487 if (!AFI.isLRSpilled())
6489 .buildRegister(ARM::LR, Reg);
6490 CallPt = MBB.insert(It, CallMIB);
6491 copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
6492 if (!AFI.isLRSpilled())
6494 It--;
6495 return CallPt;
6496 }
6497 // We have the default case. Save and restore from SP.
6498 if (!MBB.isLiveIn(ARM::LR))
6499 MBB.addLiveIn(ARM::LR);
6500 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);
6501 saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);
6502 CallPt = MBB.insert(It, CallMIB);
6503 restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);
6504 It--;
6505 return CallPt;
6506}
6507
6509 MachineFunction &MF) const {
6510 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6511}
6512
6513bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6514 const MachineInstr &MI) const {
6515 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6516 // the tail predication conversion. This means that the element count
6517 // register has to be live for longer, but that has to be better than
6518 // spill/restore and VPT predication.
6519 return (isVCTP(&MI) && !isPredicated(MI)) ||
6521}
6522
6524 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6525 : ARM::BLX;
6526}
6527
6529 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6530 : ARM::tBLXr;
6531}
6532
6534 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6535 : ARM::BLX_pred;
6536}
6537
6538namespace {
6539class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6540 MachineInstr *EndLoop, *LoopCount;
6541 MachineFunction *MF;
6542 const TargetInstrInfo *TII;
6543
6544 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6545 // [LAST_IS_USE] : last reference to register in schedule is a use
6546 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6547 static int constexpr MAX_STAGES = 30;
6548 static int constexpr LAST_IS_USE = MAX_STAGES;
6549 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6550 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6551 typedef std::map<unsigned, IterNeed> IterNeeds;
6552
6553 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6554 const IterNeeds &CIN);
6555 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6556
6557 // Meanings of the various stuff with loop types:
6558 // t2Bcc:
6559 // EndLoop = branch at end of original BB that will become a kernel
6560 // LoopCount = CC setter live into branch
6561 // t2LoopEnd:
6562 // EndLoop = branch at end of original BB
6563 // LoopCount = t2LoopDec
6564public:
6565 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6566 : EndLoop(EndLoop), LoopCount(LoopCount),
6567 MF(EndLoop->getParent()->getParent()),
6568 TII(MF->getSubtarget().getInstrInfo()) {}
6569
6570 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6571 // Only ignore the terminator.
6572 return MI == EndLoop || MI == LoopCount;
6573 }
6574
6575 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6576 if (tooMuchRegisterPressure(SSD, SMS))
6577 return false;
6578
6579 return true;
6580 }
6581
6582 std::optional<bool> createTripCountGreaterCondition(
6583 int TC, MachineBasicBlock &MBB,
6585
6586 if (isCondBranchOpcode(EndLoop->getOpcode())) {
6587 Cond.push_back(EndLoop->getOperand(1));
6588 Cond.push_back(EndLoop->getOperand(2));
6589 if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
6591 }
6592 return {};
6593 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6594 // General case just lets the unrolled t2LoopDec do the subtraction and
6595 // therefore just needs to check if zero has been reached.
6596 MachineInstr *LoopDec = nullptr;
6597 for (auto &I : MBB.instrs())
6598 if (I.getOpcode() == ARM::t2LoopDec)
6599 LoopDec = &I;
6600 assert(LoopDec && "Unable to find copied LoopDec");
6601 // Check if we're done with the loop.
6602 BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
6603 .addReg(LoopDec->getOperand(0).getReg())
6604 .addImm(0)
6606 .addReg(ARM::NoRegister);
6608 Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
6609 return {};
6610 } else
6611 llvm_unreachable("Unknown EndLoop");
6612 }
6613
6614 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6615
6616 void adjustTripCount(int TripCountAdjust) override {}
6617};
6618
6619void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6620 const IterNeeds &CIN) {
6621 // Increase pressure by the amounts in CrossIterationNeeds
6622 for (const auto &N : CIN) {
6623 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6624 for (int I = 0; I < Cnt; ++I)
6627 }
6628 // Decrease pressure by the amounts in CrossIterationNeeds
6629 for (const auto &N : CIN) {
6630 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6631 for (int I = 0; I < Cnt; ++I)
6634 }
6635}
6636
6637bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6638 SMSchedule &SMS) {
6639 IterNeeds CrossIterationNeeds;
6640
6641 // Determine which values will be loop-carried after the schedule is
6642 // applied
6643
6644 for (auto &SU : SSD.SUnits) {
6645 const MachineInstr *MI = SU.getInstr();
6646 int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
6647 for (auto &S : SU.Succs)
6648 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6649 Register Reg = S.getReg();
6650 if (Reg.isVirtual())
6651 CrossIterationNeeds[Reg.id()].set(0);
6652 } else if (S.isAssignedRegDep()) {
6653 int OStg = SMS.stageScheduled(S.getSUnit());
6654 if (OStg >= 0 && OStg != Stg) {
6655 Register Reg = S.getReg();
6656 if (Reg.isVirtual())
6657 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6658 }
6659 }
6660 }
6661
6662 // Determine more-or-less what the proposed schedule (reversed) is going to
6663 // be; it might not be quite the same because the within-cycle ordering
6664 // created by SMSchedule depends upon changes to help with address offsets and
6665 // the like.
6666 std::vector<SUnit *> ProposedSchedule;
6667 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6668 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6669 ++Stage) {
6670 std::deque<SUnit *> Instrs =
6671 SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
6672 std::sort(Instrs.begin(), Instrs.end(),
6673 [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6674 llvm::append_range(ProposedSchedule, Instrs);
6675 }
6676
6677 // Learn whether the last use/def of each cross-iteration register is a use or
6678 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6679 // and we do not have to add the pressure.
6680 for (auto *SU : ProposedSchedule)
6681 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6682 ++OperI) {
6683 auto MO = *OperI;
6684 if (!MO.isReg() || !MO.getReg())
6685 continue;
6686 Register Reg = MO.getReg();
6687 auto CIter = CrossIterationNeeds.find(Reg.id());
6688 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6689 CIter->second[SEEN_AS_LIVE])
6690 continue;
6691 if (MO.isDef() && !MO.isDead())
6692 CIter->second.set(SEEN_AS_LIVE);
6693 else if (MO.isUse())
6694 CIter->second.set(LAST_IS_USE);
6695 }
6696 for (auto &CI : CrossIterationNeeds)
6697 CI.second.reset(LAST_IS_USE);
6698
6699 RegionPressure RecRegPressure;
6700 RegPressureTracker RPTracker(RecRegPressure);
6701 RegisterClassInfo RegClassInfo;
6702 RegClassInfo.runOnMachineFunction(*MF);
6703 RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
6704 EndLoop->getParent()->end(), false, false);
6705
6706 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6707
6708 for (auto *SU : ProposedSchedule) {
6709 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6710 RPTracker.setPos(std::next(CurInstI));
6711 RPTracker.recede();
6712
6713 // Track what cross-iteration registers would be seen as live
6714 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6715 auto MO = *OperI;
6716 if (!MO.isReg() || !MO.getReg())
6717 continue;
6718 Register Reg = MO.getReg();
6719 if (MO.isDef() && !MO.isDead()) {
6720 auto CIter = CrossIterationNeeds.find(Reg.id());
6721 if (CIter != CrossIterationNeeds.end()) {
6722 CIter->second.reset(0);
6723 CIter->second.reset(SEEN_AS_LIVE);
6724 }
6725 }
6726 }
6727 for (auto &S : SU->Preds) {
6728 auto Stg = SMS.stageScheduled(SU);
6729 if (S.isAssignedRegDep()) {
6730 Register Reg = S.getReg();
6731 auto CIter = CrossIterationNeeds.find(Reg.id());
6732 if (CIter != CrossIterationNeeds.end()) {
6733 auto Stg2 = SMS.stageScheduled(S.getSUnit());
6734 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6735 if (Stg - Stg2 < MAX_STAGES)
6736 CIter->second.set(Stg - Stg2);
6737 CIter->second.set(SEEN_AS_LIVE);
6738 }
6739 }
6740 }
6741
6742 bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
6743 }
6744
6745 auto &P = RPTracker.getPressure().MaxSetPressure;
6746 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6747 // Exclude some Neon register classes.
6748 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6749 I == ARM::DTriple_with_qsub_0_in_QPR)
6750 continue;
6751
6752 if (P[I] > RegClassInfo.getRegPressureSetLimit(I)) {
6753 return true;
6754 }
6755 }
6756 return false;
6757}
6758
6759} // namespace
6760
6761std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6764 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6765 if (Preheader == LoopBB)
6766 Preheader = *std::next(LoopBB->pred_begin());
6767
6768 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6769 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6770 // block. We need to determine the reaching definition of CPSR so that
6771 // it can be marked as non-pipelineable, allowing the pipeliner to force
6772 // it into stage 0 or give up if it cannot or will not do so.
6773 MachineInstr *CCSetter = nullptr;
6774 for (auto &L : LoopBB->instrs()) {
6775 if (L.isCall())
6776 return nullptr;
6777 if (isCPSRDefined(L))
6778 CCSetter = &L;
6779 }
6780 if (CCSetter)
6781 return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
6782 else
6783 return nullptr; // Unable to find the CC setter, so unable to guarantee
6784 // that pipeline will work
6785 }
6786
6787 // Recognize:
6788 // preheader:
6789 // %1 = t2DoopLoopStart %0
6790 // loop:
6791 // %2 = phi %1, <not loop>, %..., %loop
6792 // %3 = t2LoopDec %2, <imm>
6793 // t2LoopEnd %3, %loop
6794
6795 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
6796 for (auto &L : LoopBB->instrs())
6797 if (L.isCall())
6798 return nullptr;
6799 else if (isVCTP(&L))
6800 return nullptr;
6801 Register LoopDecResult = I->getOperand(0).getReg();
6803 MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
6804 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
6805 return nullptr;
6806 MachineInstr *LoopStart = nullptr;
6807 for (auto &J : Preheader->instrs())
6808 if (J.getOpcode() == ARM::t2DoLoopStart)
6809 LoopStart = &J;
6810 if (!LoopStart)
6811 return nullptr;
6812 return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
6813 }
6814 return nullptr;
6815}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, MachineInstr &MI, MCRegister DReg, unsigned Lane, MCRegister &ImplicitSReg)
getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, set ImplicitSReg to a register n...
static const MachineInstr * getBundledUseMI(const TargetRegisterInfo *TRI, const MachineInstr &MI, unsigned Reg, unsigned &UseIdx, unsigned &Dist)
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI)
Create a copy of a const pool value.
static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, int CmpMask, bool CommonUse)
isSuitableForMask - Identify a suitable 'and' instruction that operates on the given source register ...
static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr &DefMI, const MCInstrDesc &DefMCID, unsigned DefAlign)
Return the number of cycles to add to (or subtract from) the static itinerary based on the def opcode...
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, const MachineInstr &MI)
static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, unsigned SReg, unsigned &Lane)
static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[]
static bool isEligibleForITBlock(const MachineInstr *MI)
static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC)
getCmpToAddCondition - assume the flags are set by CMP(a,b), return the condition code if we modify t...
static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1)
static bool isLRAvailable(const TargetRegisterInfo &TRI, MachineBasicBlock::reverse_iterator I, MachineBasicBlock::reverse_iterator E)
@ ExeGeneric
static const ARM_MLxEntry ARM_MLxTable[]
static bool isRedundantFlagInstr(const MachineInstr *CmpI, Register SrcReg, Register SrcReg2, int64_t ImmValue, const MachineInstr *OI, bool &IsThumb1)
isRedundantFlagInstr - check whether the first instruction, whose only purpose is to update flags,...
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, unsigned NumRegs)
static const MachineInstr * getBundledDefMI(const TargetRegisterInfo *TRI, const MachineInstr *MI, unsigned Reg, unsigned &DefIdx, unsigned &Dist)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Forward Handle Accesses
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is Live
This file defines the DenseMap class.
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define P(N)
PowerPC TLS Dynamic Call Fixup
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static bool isCPSRDefined(const MachineInstr &MI)
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction to set the zero flag so that we can remove a "comparis...
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
const MachineInstrBuilder & AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister DestReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &, unsigned, const TargetRegisterInfo *) const override
unsigned getNumLDMAddresses(const MachineInstr &MI) const
Get the number of addresses by LDM or VLDM or zero for unknown.
MachineInstr * optimizeSelect(MachineInstr &MI, SmallPtrSetImpl< MachineInstr * > &SeenMIs, bool) const override
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Returns the size of the specified MachineInstr.
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MCRegister SrcReg, bool KillSrc, const ARMSubtarget &Subtarget) const
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
ARM supports the MachineOutliner.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
Enable outlining by default at -Oz.
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
ARMBaseInstrInfo(const ARMSubtarget &STI)
std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isPredicated(const MachineInstr &MI) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const
bool isPredicable(const MachineInstr &MI) const override
isPredicable - Return true if the specified instruction can be predicated.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
Specialization of TargetInstrInfo::describeLoadedValue, used to enhance debug entry value description...
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, unsigned NumInsts) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool getRegSequenceLikeInputs(const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl< RegSubRegPairAndIdx > &InputRegs) const override
Build the equivalent inputs of a REG_SEQUENCE for the given MI and DefIdx.
unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override
bool getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override
Build the equivalent inputs of a INSERT_SUBREG for the given MI and DefIdx.
bool expandPostRAPseudo(MachineInstr &MI) const override
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
VFP/NEON execution domains.
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override
bool isFpMLxInstruction(unsigned Opcode) const
isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS instruction.
bool isSwiftFastImmShift(const MachineInstr *MI) const
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void breakPartialRegDependency(MachineInstr &, unsigned, const TargetRegisterInfo *TRI) const override
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
const ARMSubtarget & getSubtarget() const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool analyzeSelect(const MachineInstr &MI, SmallVectorImpl< MachineOperand > &Cond, unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override
bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, RegSubRegPairAndIdx &InputReg) const override
Build the equivalent inputs of a EXTRACT_SUBREG for the given MI and DefIdx.
bool shouldSink(const MachineInstr &MI) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
ARMConstantPoolConstant - ARM-specific constant pool values for Constants, Functions,...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic block.
ARMConstantPoolSymbol - ARM-specific constantpool values for external symbols.
ARMConstantPoolValue - ARM specific constantpool value.
ARMCP::ARMCPModifier getModifier() const
virtual bool hasSameValue(ARMConstantPoolValue *ACPV)
hasSameValue - Return true if this ARM constpool value can share the same constantpool entry as anoth...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
bool isTargetMachO() const
Definition: ARMSubtarget.h:346
bool isCortexA7() const
Definition: ARMSubtarget.h:284
bool useMovt() const
bool isSwift() const
Definition: ARMSubtarget.h:288
ARMLdStMultipleTiming getLdStMultipleTiming() const
Definition: ARMSubtarget.h:451
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:235
bool isThumb1Only() const
Definition: ARMSubtarget.h:375
bool isCortexM7() const
Definition: ARMSubtarget.h:291
bool isThumb2() const
Definition: ARMSubtarget.h:376
bool isReadTPSoft() const
Definition: ARMSubtarget.h:361
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned getMispredictionPenalty() const
bool isLikeA9() const
Definition: ARMSubtarget.h:293
unsigned getReturnOpcode() const
Returns the correct return opcode for the current feature set.
Definition: ARMSubtarget.h:471
Align getStackAlignment() const
getStackAlignment - Returns the minimum alignment known to hold of the stack frame on entry to the fu...
Definition: ARMSubtarget.h:440
bool hasVFP2Base() const
Definition: ARMSubtarget.h:304
bool isROPI() const
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool isTargetCOFF() const
Definition: ARMSubtarget.h:344
unsigned getPartialUpdateClearance() const
Definition: ARMSubtarget.h:449
bool hasMinSize() const
Definition: ARMSubtarget.h:374
bool isCortexA8() const
Definition: ARMSubtarget.h:285
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
Definition: ARMSubtarget.h:76
@ SingleIssue
Can load/store 1 register/cycle.
Definition: ARMSubtarget.h:78
@ DoubleIssue
Can load/store 2 registers/cycle.
Definition: ARMSubtarget.h:73
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
Definition: ARMSubtarget.h:81
int getPreISelOperandLatencyAdjustment() const
Definition: ARMSubtarget.h:455
bool isRWPI() const
bool isMClass() const
Definition: ARMSubtarget.h:377
bool restrictIT() const
Definition: ARMSubtarget.h:403
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool test(unsigned Idx) const
Definition: BitVector.h:461
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:159
LLVM_ABI uint64_t scale(uint64_t Num) const
Scale a large integer.
BranchProbability getCompl() const
Helper class for creating CFI instructions and inserting them into MIR.
void buildRegister(MCRegister Reg1, MCRegister Reg2) const
void buildRestore(MCRegister Reg) const
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
iterator end()
Definition: DenseMap.h:87
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
A possibly irreducible generalization of a Loop.
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:280
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
Itinerary data supplied by a subtarget to be used by a target.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
unsigned getStageLatency(unsigned ItinClassIndx) const
Return the total stage latency of the given class.
std::optional< unsigned > getOperandLatency(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Compute and return the use operand latency of a given itinerary class and operand index if the value ...
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx, unsigned UseClass, unsigned UseIdx) const
Return true if there is a pipeline forwarding between instructions of itinerary classes DefClass and ...
bool isEmpty() const
Returns true if there are no itineraries.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:117
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:64
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:603
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:238
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:240
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:440
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:266
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:249
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:289
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:607
unsigned short Opcode
Definition: MCInstrDesc.h:206
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:231
LLVM_ABI bool hasImplicitDefOfPhysReg(MCRegister Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool isValid() const
isValid - Returns true until all the operands have been visited.
unsigned pred_size() const
instr_iterator instr_begin()
reverse_iterator rend()
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
Instructions::iterator instr_iterator
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
@ LQR_Live
Register is known to be (at least partially) live.
@ LQR_Unknown
Register liveness not decidable from local neighborhood.
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
This class is a data container for one entry in a MachineConstantPool.
union llvm::MachineConstantPoolEntry::@205 Val
The constant itself.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
MachineConstantPoolValue * MachineCPVal
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:948
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
LLVM_ABI int findFirstPredOperandIdx() const
Find the index of the first operand in the operand list that is used to represent the predicate.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:584
bool isRegSequence() const
bool isInsertSubreg() const
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:922
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
LLVM_ABI void clearKillInfo()
Clears kill flags on all operands.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
const TargetRegisterInfo * getTargetRegisterInfo() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
void AddHazardRecognizer(std::unique_ptr< ScheduleHazardRecognizer > &&)
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
LLVM_ABI void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask)
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:55
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:56
This class represents the scheduled code.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual std::string createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, const TargetRegisterInfo *TRI) const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const InstrItineraryData * getInstrItineraries() const
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ ThumbArithFlagSetting
Definition: ARMBaseInfo.h:414
@ MO_OPTION_MASK
MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects just that part of the flag set.
Definition: ARMBaseInfo.h:258
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:185
unsigned char getAM3Offset(unsigned AM3Opc)
unsigned char getAM5FP16Offset(unsigned AM5Opc)
unsigned getSORegOffset(unsigned Op)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
ShiftOpc getAM2ShiftOpc(unsigned AM2Opc)
unsigned getAM2Offset(unsigned AM2Opc)
unsigned getSOImmValRotate(unsigned Imm)
getSOImmValRotate - Try to handle Imm with an immediate shifter operand, computing the rotate amount ...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
ShiftOpc getSORegShOp(unsigned Op)
AddrOpc getAM5Op(unsigned AM5Opc)
bool isSOImmTwoPartValNeg(unsigned V)
isSOImmTwoPartValNeg - Return true if the specified value can be obtained by two SOImmVal,...
unsigned getSOImmTwoPartSecond(unsigned V)
getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, return the second chunk of ...
bool isSOImmTwoPartVal(unsigned V)
isSOImmTwoPartVal - Return true if the specified value can be obtained by or'ing together two SOImmVa...
AddrOpc getAM5FP16Op(unsigned AM5Opc)
unsigned getT2SOImmTwoPartSecond(unsigned Imm)
unsigned getT2SOImmTwoPartFirst(unsigned Imm)
bool isT2SOImmTwoPartVal(unsigned Imm)
unsigned char getAM5Offset(unsigned AM5Opc)
unsigned getSOImmTwoPartFirst(unsigned V)
getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, return the first chunk of it...
AddrOpc getAM2Op(unsigned AM2Opc)
AddrOpc getAM3Op(unsigned AM3Opc)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
static bool isIndirectCall(const MachineInstr &MI)
MachineInstr * findCMPToFoldIntoCBZ(MachineInstr *Br, const TargetRegisterInfo *TRI)
Search backwards from a tBcc to find a tCMPi8 against 0, meaning we can convert them to a tCBZ or tCB...
static bool isCondBranchOpcode(int Opc)
MaybeAlign getAlign(const CallInst &I, unsigned Index)
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
static bool isPushOpcode(int Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond)
static bool isVCTP(const MachineInstr *MI)
bool IsCPSRDead< MachineInstr >(const MachineInstr *MI)
unsigned getBLXpredOpcode(const MachineFunction &MF)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
static bool isIndirectBranchOpcode(int Opc)
bool isLegalAddressImm(unsigned Opcode, int Imm, const TargetInstrInfo *TII)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
static bool isCalleeSavedRegister(MCRegister Reg, const MCPhysReg *CSRegs)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
static bool isJumpTableBranchOpcode(int Opc)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: SPIRVUtils.cpp:976
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
static bool isPopOpcode(int Opc)
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive)
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:399
unsigned getUndefRegState(bool B)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
unsigned getKillRegState(bool B)
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Register FrameReg, int &Offset, const ARMBaseInstrInfo &TII)
rewriteARMFrameIndex / rewriteT2FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
static bool isIndirectControlFlowNotComingBack(const MachineInstr &MI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
unsigned getMatchingCondBranchOpcode(unsigned Opc)
static bool isUncondBranchOpcode(int Opc)
auto partition(R &&Range, UnaryPredicate P)
Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1994
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
static const char * ARMCondCodeToString(ARMCC::CondCodes CC)
Definition: ARMBaseInfo.h:146
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned gettBLXrOpcode(const MachineFunction &MF)
static bool isSpeculationBarrierEndBBOpcode(int Opc)
unsigned getBLXOpcode(const MachineFunction &MF)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
bool isV8EligibleForIT(const InstrType *Instr)
Definition: ARMFeatures.h:24
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
#define N
ARM_MLxEntry - Record information about MLA / MLS instructions.
Map pseudo instructions that imply an 'S' bit onto real opcodes.
OutlinerCosts(const ARMSubtarget &target)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Description of the encoding of one expression Op.
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.