LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
350 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
358 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
386 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
443 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
503 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
601 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewVReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcOpNum = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::ADDSXri:
714 case AArch64::ADDSWri:
715 // if NZCV is used, do not fold.
716 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
717 true) == -1)
718 return 0;
719 // fall-through to ADDXri and ADDWri.
720 [[fallthrough]];
721 case AArch64::ADDXri:
722 case AArch64::ADDWri:
723 // add x, 1 -> csinc.
724 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
725 DefMI->getOperand(3).getImm() != 0)
726 return 0;
727 SrcOpNum = 1;
728 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729 break;
730
731 case AArch64::ORNXrr:
732 case AArch64::ORNWrr: {
733 // not x -> csinv, represented as orn dst, xzr, src.
734 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
735 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736 return 0;
737 SrcOpNum = 2;
738 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739 break;
740 }
741
742 case AArch64::SUBSXrr:
743 case AArch64::SUBSWrr:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to SUBXrr and SUBWrr.
749 [[fallthrough]];
750 case AArch64::SUBXrr:
751 case AArch64::SUBWrr: {
752 // neg x -> csneg, represented as sub dst, xzr, src.
753 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
754 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755 return 0;
756 SrcOpNum = 2;
757 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758 break;
759 }
760 default:
761 return 0;
762 }
763 assert(Opc && SrcOpNum && "Missing parameters");
764
765 if (NewVReg)
766 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
767 return Opc;
768}
769
772 Register DstReg, Register TrueReg,
773 Register FalseReg, int &CondCycles,
774 int &TrueCycles,
775 int &FalseCycles) const {
776 // Check register classes.
777 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
778 const TargetRegisterClass *RC =
779 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
780 if (!RC)
781 return false;
782
783 // Also need to check the dest regclass, in case we're trying to optimize
784 // something like:
785 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
786 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
787 return false;
788
789 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
790 unsigned ExtraCondLat = Cond.size() != 1;
791
792 // GPRs are handled by csel.
793 // FIXME: Fold in x+1, -x, and ~x when applicable.
794 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
795 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
796 // Single-cycle csel, csinc, csinv, and csneg.
797 CondCycles = 1 + ExtraCondLat;
798 TrueCycles = FalseCycles = 1;
799 if (canFoldIntoCSel(MRI, TrueReg))
800 TrueCycles = 0;
801 else if (canFoldIntoCSel(MRI, FalseReg))
802 FalseCycles = 0;
803 return true;
804 }
805
806 // Scalar floating point is handled by fcsel.
807 // FIXME: Form fabs, fmin, and fmax when applicable.
808 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
809 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
810 CondCycles = 5 + ExtraCondLat;
811 TrueCycles = FalseCycles = 2;
812 return true;
813 }
814
815 // Can't do vectors.
816 return false;
817}
818
821 const DebugLoc &DL, Register DstReg,
823 Register TrueReg, Register FalseReg) const {
824 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
825
826 // Parse the condition code, see parseCondBranch() above.
828 switch (Cond.size()) {
829 default:
830 llvm_unreachable("Unknown condition opcode in Cond");
831 case 1: // b.cc
833 break;
834 case 3: { // cbz/cbnz
835 // We must insert a compare against 0.
836 bool Is64Bit;
837 switch (Cond[1].getImm()) {
838 default:
839 llvm_unreachable("Unknown branch opcode in Cond");
840 case AArch64::CBZW:
841 Is64Bit = false;
842 CC = AArch64CC::EQ;
843 break;
844 case AArch64::CBZX:
845 Is64Bit = true;
846 CC = AArch64CC::EQ;
847 break;
848 case AArch64::CBNZW:
849 Is64Bit = false;
850 CC = AArch64CC::NE;
851 break;
852 case AArch64::CBNZX:
853 Is64Bit = true;
854 CC = AArch64CC::NE;
855 break;
856 }
857 Register SrcReg = Cond[2].getReg();
858 if (Is64Bit) {
859 // cmp reg, #0 is actually subs xzr, reg, #0.
860 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
861 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
862 .addReg(SrcReg)
863 .addImm(0)
864 .addImm(0);
865 } else {
866 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
867 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
868 .addReg(SrcReg)
869 .addImm(0)
870 .addImm(0);
871 }
872 break;
873 }
874 case 4: { // tbz/tbnz
875 // We must insert a tst instruction.
876 switch (Cond[1].getImm()) {
877 default:
878 llvm_unreachable("Unknown branch opcode in Cond");
879 case AArch64::TBZW:
880 case AArch64::TBZX:
881 CC = AArch64CC::EQ;
882 break;
883 case AArch64::TBNZW:
884 case AArch64::TBNZX:
885 CC = AArch64CC::NE;
886 break;
887 }
888 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
889 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
890 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
891 .addReg(Cond[2].getReg())
892 .addImm(
894 else
895 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
896 .addReg(Cond[2].getReg())
897 .addImm(
899 break;
900 }
901 case 5: { // cb
902 // We must insert a cmp, that is a subs
903 // 0 1 2 3 4
904 // Cond is { -1, Opcode, CC, Op0, Op1 }
905 unsigned SUBSOpC, SUBSDestReg;
906 bool IsImm = false;
907 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
908 switch (Cond[1].getImm()) {
909 default:
910 llvm_unreachable("Unknown branch opcode in Cond");
911 case AArch64::CBWPri:
912 SUBSOpC = AArch64::SUBSWri;
913 SUBSDestReg = AArch64::WZR;
914 IsImm = true;
915 break;
916 case AArch64::CBXPri:
917 SUBSOpC = AArch64::SUBSXri;
918 SUBSDestReg = AArch64::XZR;
919 IsImm = true;
920 break;
921 case AArch64::CBWPrr:
922 SUBSOpC = AArch64::SUBSWrr;
923 SUBSDestReg = AArch64::WZR;
924 IsImm = false;
925 break;
926 case AArch64::CBXPrr:
927 SUBSOpC = AArch64::SUBSXrr;
928 SUBSDestReg = AArch64::XZR;
929 IsImm = false;
930 break;
931 }
932
933 if (IsImm)
934 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
935 .addReg(Cond[3].getReg())
936 .addImm(Cond[4].getImm())
937 .addImm(0);
938 else
939 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
940 .addReg(Cond[3].getReg())
941 .addReg(Cond[4].getReg());
942 }
943 }
944
945 unsigned Opc = 0;
946 const TargetRegisterClass *RC = nullptr;
947 bool TryFold = false;
948 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
949 RC = &AArch64::GPR64RegClass;
950 Opc = AArch64::CSELXr;
951 TryFold = true;
952 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
953 RC = &AArch64::GPR32RegClass;
954 Opc = AArch64::CSELWr;
955 TryFold = true;
956 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
957 RC = &AArch64::FPR64RegClass;
958 Opc = AArch64::FCSELDrrr;
959 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
960 RC = &AArch64::FPR32RegClass;
961 Opc = AArch64::FCSELSrrr;
962 }
963 assert(RC && "Unsupported regclass");
964
965 // Try folding simple instructions into the csel.
966 if (TryFold) {
967 unsigned NewVReg = 0;
968 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
969 if (FoldedOpc) {
970 // The folded opcodes csinc, csinc and csneg apply the operation to
971 // FalseReg, so we need to invert the condition.
973 TrueReg = FalseReg;
974 } else
975 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
976
977 // Fold the operation. Leave any dead instructions for DCE to clean up.
978 if (FoldedOpc) {
979 FalseReg = NewVReg;
980 Opc = FoldedOpc;
981 // The extends the live range of NewVReg.
982 MRI.clearKillFlags(NewVReg);
983 }
984 }
985
986 // Pull all virtual register into the appropriate class.
987 MRI.constrainRegClass(TrueReg, RC);
988 MRI.constrainRegClass(FalseReg, RC);
989
990 // Insert the csel.
991 BuildMI(MBB, I, DL, get(Opc), DstReg)
992 .addReg(TrueReg)
993 .addReg(FalseReg)
994 .addImm(CC);
995}
996
997// Return true if Imm can be loaded into a register by a "cheap" sequence of
998// instructions. For now, "cheap" means at most two instructions.
999static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1000 if (BitSize == 32)
1001 return true;
1002
1003 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1004 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1006 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1007
1008 return Is.size() <= 2;
1009}
1010
1011// FIXME: this implementation should be micro-architecture dependent, so a
1012// micro-architecture target hook should be introduced here in future.
1014 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1015 if (isExynosCheapAsMove(MI))
1016 return true;
1017 return MI.isAsCheapAsAMove();
1018 }
1019
1020 switch (MI.getOpcode()) {
1021 default:
1022 return MI.isAsCheapAsAMove();
1023
1024 case AArch64::ADDWrs:
1025 case AArch64::ADDXrs:
1026 case AArch64::SUBWrs:
1027 case AArch64::SUBXrs:
1028 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1029
1030 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1031 // ORRXri, it is as cheap as MOV.
1032 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1033 case AArch64::MOVi32imm:
1034 return isCheapImmediate(MI, 32);
1035 case AArch64::MOVi64imm:
1036 return isCheapImmediate(MI, 64);
1037 }
1038}
1039
1040bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1041 switch (MI.getOpcode()) {
1042 default:
1043 return false;
1044
1045 case AArch64::ADDWrs:
1046 case AArch64::ADDXrs:
1047 case AArch64::ADDSWrs:
1048 case AArch64::ADDSXrs: {
1049 unsigned Imm = MI.getOperand(3).getImm();
1050 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1051 if (ShiftVal == 0)
1052 return true;
1053 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1054 }
1055
1056 case AArch64::ADDWrx:
1057 case AArch64::ADDXrx:
1058 case AArch64::ADDXrx64:
1059 case AArch64::ADDSWrx:
1060 case AArch64::ADDSXrx:
1061 case AArch64::ADDSXrx64: {
1062 unsigned Imm = MI.getOperand(3).getImm();
1063 switch (AArch64_AM::getArithExtendType(Imm)) {
1064 default:
1065 return false;
1066 case AArch64_AM::UXTB:
1067 case AArch64_AM::UXTH:
1068 case AArch64_AM::UXTW:
1069 case AArch64_AM::UXTX:
1070 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1071 }
1072 }
1073
1074 case AArch64::SUBWrs:
1075 case AArch64::SUBSWrs: {
1076 unsigned Imm = MI.getOperand(3).getImm();
1077 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1078 return ShiftVal == 0 ||
1079 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1080 }
1081
1082 case AArch64::SUBXrs:
1083 case AArch64::SUBSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 return ShiftVal == 0 ||
1087 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1088 }
1089
1090 case AArch64::SUBWrx:
1091 case AArch64::SUBXrx:
1092 case AArch64::SUBXrx64:
1093 case AArch64::SUBSWrx:
1094 case AArch64::SUBSXrx:
1095 case AArch64::SUBSXrx64: {
1096 unsigned Imm = MI.getOperand(3).getImm();
1097 switch (AArch64_AM::getArithExtendType(Imm)) {
1098 default:
1099 return false;
1100 case AArch64_AM::UXTB:
1101 case AArch64_AM::UXTH:
1102 case AArch64_AM::UXTW:
1103 case AArch64_AM::UXTX:
1104 return AArch64_AM::getArithShiftValue(Imm) == 0;
1105 }
1106 }
1107
1108 case AArch64::LDRBBroW:
1109 case AArch64::LDRBBroX:
1110 case AArch64::LDRBroW:
1111 case AArch64::LDRBroX:
1112 case AArch64::LDRDroW:
1113 case AArch64::LDRDroX:
1114 case AArch64::LDRHHroW:
1115 case AArch64::LDRHHroX:
1116 case AArch64::LDRHroW:
1117 case AArch64::LDRHroX:
1118 case AArch64::LDRQroW:
1119 case AArch64::LDRQroX:
1120 case AArch64::LDRSBWroW:
1121 case AArch64::LDRSBWroX:
1122 case AArch64::LDRSBXroW:
1123 case AArch64::LDRSBXroX:
1124 case AArch64::LDRSHWroW:
1125 case AArch64::LDRSHWroX:
1126 case AArch64::LDRSHXroW:
1127 case AArch64::LDRSHXroX:
1128 case AArch64::LDRSWroW:
1129 case AArch64::LDRSWroX:
1130 case AArch64::LDRSroW:
1131 case AArch64::LDRSroX:
1132 case AArch64::LDRWroW:
1133 case AArch64::LDRWroX:
1134 case AArch64::LDRXroW:
1135 case AArch64::LDRXroX:
1136 case AArch64::PRFMroW:
1137 case AArch64::PRFMroX:
1138 case AArch64::STRBBroW:
1139 case AArch64::STRBBroX:
1140 case AArch64::STRBroW:
1141 case AArch64::STRBroX:
1142 case AArch64::STRDroW:
1143 case AArch64::STRDroX:
1144 case AArch64::STRHHroW:
1145 case AArch64::STRHHroX:
1146 case AArch64::STRHroW:
1147 case AArch64::STRHroX:
1148 case AArch64::STRQroW:
1149 case AArch64::STRQroX:
1150 case AArch64::STRSroW:
1151 case AArch64::STRSroX:
1152 case AArch64::STRWroW:
1153 case AArch64::STRWroX:
1154 case AArch64::STRXroW:
1155 case AArch64::STRXroX: {
1156 unsigned IsSigned = MI.getOperand(3).getImm();
1157 return !IsSigned;
1158 }
1159 }
1160}
1161
1162bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1163 unsigned Opc = MI.getOpcode();
1164 switch (Opc) {
1165 default:
1166 return false;
1167 case AArch64::SEH_StackAlloc:
1168 case AArch64::SEH_SaveFPLR:
1169 case AArch64::SEH_SaveFPLR_X:
1170 case AArch64::SEH_SaveReg:
1171 case AArch64::SEH_SaveReg_X:
1172 case AArch64::SEH_SaveRegP:
1173 case AArch64::SEH_SaveRegP_X:
1174 case AArch64::SEH_SaveFReg:
1175 case AArch64::SEH_SaveFReg_X:
1176 case AArch64::SEH_SaveFRegP:
1177 case AArch64::SEH_SaveFRegP_X:
1178 case AArch64::SEH_SetFP:
1179 case AArch64::SEH_AddFP:
1180 case AArch64::SEH_Nop:
1181 case AArch64::SEH_PrologEnd:
1182 case AArch64::SEH_EpilogStart:
1183 case AArch64::SEH_EpilogEnd:
1184 case AArch64::SEH_PACSignLR:
1185 case AArch64::SEH_SaveAnyRegQP:
1186 case AArch64::SEH_SaveAnyRegQPX:
1187 case AArch64::SEH_AllocZ:
1188 case AArch64::SEH_SaveZReg:
1189 case AArch64::SEH_SavePReg:
1190 return true;
1191 }
1192}
1193
1195 Register &SrcReg, Register &DstReg,
1196 unsigned &SubIdx) const {
1197 switch (MI.getOpcode()) {
1198 default:
1199 return false;
1200 case AArch64::SBFMXri: // aka sxtw
1201 case AArch64::UBFMXri: // aka uxtw
1202 // Check for the 32 -> 64 bit extension case, these instructions can do
1203 // much more.
1204 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1205 return false;
1206 // This is a signed or unsigned 32 -> 64 bit extension.
1207 SrcReg = MI.getOperand(1).getReg();
1208 DstReg = MI.getOperand(0).getReg();
1209 SubIdx = AArch64::sub_32;
1210 return true;
1211 }
1212}
1213
1215 const MachineInstr &MIa, const MachineInstr &MIb) const {
1217 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1218 int64_t OffsetA = 0, OffsetB = 0;
1219 TypeSize WidthA(0, false), WidthB(0, false);
1220 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1221
1222 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1223 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1224
1227 return false;
1228
1229 // Retrieve the base, offset from the base and width. Width
1230 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1231 // base are identical, and the offset of a lower memory access +
1232 // the width doesn't overlap the offset of a higher memory access,
1233 // then the memory accesses are different.
1234 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1235 // are assumed to have the same scale (vscale).
1236 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1237 WidthA, TRI) &&
1238 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1239 WidthB, TRI)) {
1240 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1241 OffsetAIsScalable == OffsetBIsScalable) {
1242 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1243 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1244 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1245 if (LowWidth.isScalable() == OffsetAIsScalable &&
1246 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1247 return true;
1248 }
1249 }
1250 return false;
1251}
1252
1254 const MachineBasicBlock *MBB,
1255 const MachineFunction &MF) const {
1257 return true;
1258
1259 // Do not move an instruction that can be recognized as a branch target.
1260 if (hasBTISemantics(MI))
1261 return true;
1262
1263 switch (MI.getOpcode()) {
1264 case AArch64::HINT:
1265 // CSDB hints are scheduling barriers.
1266 if (MI.getOperand(0).getImm() == 0x14)
1267 return true;
1268 break;
1269 case AArch64::DSB:
1270 case AArch64::ISB:
1271 // DSB and ISB also are scheduling barriers.
1272 return true;
1273 case AArch64::MSRpstatesvcrImm1:
1274 // SMSTART and SMSTOP are also scheduling barriers.
1275 return true;
1276 default:;
1277 }
1278 if (isSEHInstruction(MI))
1279 return true;
1280 auto Next = std::next(MI.getIterator());
1281 return Next != MBB->end() && Next->isCFIInstruction();
1282}
1283
1284/// analyzeCompare - For a comparison instruction, return the source registers
1285/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1286/// Return true if the comparison instruction can be analyzed.
1288 Register &SrcReg2, int64_t &CmpMask,
1289 int64_t &CmpValue) const {
1290 // The first operand can be a frame index where we'd normally expect a
1291 // register.
1292 // FIXME: Pass subregisters out of analyzeCompare
1293 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1294 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1295 return false;
1296
1297 switch (MI.getOpcode()) {
1298 default:
1299 break;
1300 case AArch64::PTEST_PP:
1301 case AArch64::PTEST_PP_ANY:
1302 case AArch64::PTEST_PP_FIRST:
1303 SrcReg = MI.getOperand(0).getReg();
1304 SrcReg2 = MI.getOperand(1).getReg();
1305 if (MI.getOperand(2).getSubReg())
1306 return false;
1307
1308 // Not sure about the mask and value for now...
1309 CmpMask = ~0;
1310 CmpValue = 0;
1311 return true;
1312 case AArch64::SUBSWrr:
1313 case AArch64::SUBSWrs:
1314 case AArch64::SUBSWrx:
1315 case AArch64::SUBSXrr:
1316 case AArch64::SUBSXrs:
1317 case AArch64::SUBSXrx:
1318 case AArch64::ADDSWrr:
1319 case AArch64::ADDSWrs:
1320 case AArch64::ADDSWrx:
1321 case AArch64::ADDSXrr:
1322 case AArch64::ADDSXrs:
1323 case AArch64::ADDSXrx:
1324 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1325 SrcReg = MI.getOperand(1).getReg();
1326 SrcReg2 = MI.getOperand(2).getReg();
1327
1328 // FIXME: Pass subregisters out of analyzeCompare
1329 if (MI.getOperand(2).getSubReg())
1330 return false;
1331
1332 CmpMask = ~0;
1333 CmpValue = 0;
1334 return true;
1335 case AArch64::SUBSWri:
1336 case AArch64::ADDSWri:
1337 case AArch64::SUBSXri:
1338 case AArch64::ADDSXri:
1339 SrcReg = MI.getOperand(1).getReg();
1340 SrcReg2 = 0;
1341 CmpMask = ~0;
1342 CmpValue = MI.getOperand(2).getImm();
1343 return true;
1344 case AArch64::ANDSWri:
1345 case AArch64::ANDSXri:
1346 // ANDS does not use the same encoding scheme as the others xxxS
1347 // instructions.
1348 SrcReg = MI.getOperand(1).getReg();
1349 SrcReg2 = 0;
1350 CmpMask = ~0;
1352 MI.getOperand(2).getImm(),
1353 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1354 return true;
1355 }
1356
1357 return false;
1358}
1359
1361 MachineBasicBlock *MBB = Instr.getParent();
1362 assert(MBB && "Can't get MachineBasicBlock here");
1363 MachineFunction *MF = MBB->getParent();
1364 assert(MF && "Can't get MachineFunction here");
1368
1369 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1370 ++OpIdx) {
1371 MachineOperand &MO = Instr.getOperand(OpIdx);
1372 const TargetRegisterClass *OpRegCstraints =
1373 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1374
1375 // If there's no constraint, there's nothing to do.
1376 if (!OpRegCstraints)
1377 continue;
1378 // If the operand is a frame index, there's nothing to do here.
1379 // A frame index operand will resolve correctly during PEI.
1380 if (MO.isFI())
1381 continue;
1382
1383 assert(MO.isReg() &&
1384 "Operand has register constraints without being a register!");
1385
1386 Register Reg = MO.getReg();
1387 if (Reg.isPhysical()) {
1388 if (!OpRegCstraints->contains(Reg))
1389 return false;
1390 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1391 !MRI->constrainRegClass(Reg, OpRegCstraints))
1392 return false;
1393 }
1394
1395 return true;
1396}
1397
1398/// Return the opcode that does not set flags when possible - otherwise
1399/// return the original opcode. The caller is responsible to do the actual
1400/// substitution and legality checking.
1402 // Don't convert all compare instructions, because for some the zero register
1403 // encoding becomes the sp register.
1404 bool MIDefinesZeroReg = false;
1405 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1406 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1407 MIDefinesZeroReg = true;
1408
1409 switch (MI.getOpcode()) {
1410 default:
1411 return MI.getOpcode();
1412 case AArch64::ADDSWrr:
1413 return AArch64::ADDWrr;
1414 case AArch64::ADDSWri:
1415 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1416 case AArch64::ADDSWrs:
1417 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1418 case AArch64::ADDSWrx:
1419 return AArch64::ADDWrx;
1420 case AArch64::ADDSXrr:
1421 return AArch64::ADDXrr;
1422 case AArch64::ADDSXri:
1423 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1424 case AArch64::ADDSXrs:
1425 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1426 case AArch64::ADDSXrx:
1427 return AArch64::ADDXrx;
1428 case AArch64::SUBSWrr:
1429 return AArch64::SUBWrr;
1430 case AArch64::SUBSWri:
1431 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1432 case AArch64::SUBSWrs:
1433 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1434 case AArch64::SUBSWrx:
1435 return AArch64::SUBWrx;
1436 case AArch64::SUBSXrr:
1437 return AArch64::SUBXrr;
1438 case AArch64::SUBSXri:
1439 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1440 case AArch64::SUBSXrs:
1441 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1442 case AArch64::SUBSXrx:
1443 return AArch64::SUBXrx;
1444 }
1445}
1446
1447enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1448
1449/// True when condition flags are accessed (either by writing or reading)
1450/// on the instruction trace starting at From and ending at To.
1451///
1452/// Note: If From and To are from different blocks it's assumed CC are accessed
1453/// on the path.
1456 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1457 // Early exit if To is at the beginning of the BB.
1458 if (To == To->getParent()->begin())
1459 return true;
1460
1461 // Check whether the instructions are in the same basic block
1462 // If not, assume the condition flags might get modified somewhere.
1463 if (To->getParent() != From->getParent())
1464 return true;
1465
1466 // From must be above To.
1467 assert(std::any_of(
1468 ++To.getReverse(), To->getParent()->rend(),
1469 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1470
1471 // We iterate backward starting at \p To until we hit \p From.
1472 for (const MachineInstr &Instr :
1474 if (((AccessToCheck & AK_Write) &&
1475 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1476 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1477 return true;
1478 }
1479 return false;
1480}
1481
1482std::optional<unsigned>
1483AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1484 MachineInstr *Pred,
1485 const MachineRegisterInfo *MRI) const {
1486 unsigned MaskOpcode = Mask->getOpcode();
1487 unsigned PredOpcode = Pred->getOpcode();
1488 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1489 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1490
1491 if (PredIsWhileLike) {
1492 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1493 // instruction and the condition is "any" since WHILcc does an implicit
1494 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1495 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1496 return PredOpcode;
1497
1498 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1499 // redundant since WHILE performs an implicit PTEST with an all active
1500 // mask.
1501 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1502 getElementSizeForOpcode(MaskOpcode) ==
1503 getElementSizeForOpcode(PredOpcode))
1504 return PredOpcode;
1505
1506 return {};
1507 }
1508
1509 if (PredIsPTestLike) {
1510 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1511 // instruction that sets the flags as PTEST would and the condition is
1512 // "any" since PG is always a subset of the governing predicate of the
1513 // ptest-like instruction.
1514 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1515 return PredOpcode;
1516
1517 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1518
1519 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1520 // to look through a copy and try again. This is because some instructions
1521 // take a predicate whose register class is a subset of its result class.
1522 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1523 PTestLikeMask->getOperand(1).getReg().isVirtual())
1524 PTestLikeMask =
1525 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1526
1527 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1528 // the element size matches and either the PTEST_LIKE instruction uses
1529 // the same all active mask or the condition is "any".
1530 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1531 getElementSizeForOpcode(MaskOpcode) ==
1532 getElementSizeForOpcode(PredOpcode)) {
1533 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1534 return PredOpcode;
1535 }
1536
1537 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1538 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1539 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1540 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1541 // performed by the compare could consider fewer lanes for these element
1542 // sizes.
1543 //
1544 // For example, consider
1545 //
1546 // ptrue p0.b ; P0=1111-1111-1111-1111
1547 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1548 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1549 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1550 // ; ^ last active
1551 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1552 // ; ^ last active
1553 //
1554 // where the compare generates a canonical all active 32-bit predicate
1555 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1556 // active flag, whereas the PTEST instruction with the same mask doesn't.
1557 // For PTEST_ANY this doesn't apply as the flags in this case would be
1558 // identical regardless of element size.
1559 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1560 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1561 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1562 return PredOpcode;
1563
1564 return {};
1565 }
1566
1567 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1568 // opcode so the PTEST becomes redundant.
1569 switch (PredOpcode) {
1570 case AArch64::AND_PPzPP:
1571 case AArch64::BIC_PPzPP:
1572 case AArch64::EOR_PPzPP:
1573 case AArch64::NAND_PPzPP:
1574 case AArch64::NOR_PPzPP:
1575 case AArch64::ORN_PPzPP:
1576 case AArch64::ORR_PPzPP:
1577 case AArch64::BRKA_PPzP:
1578 case AArch64::BRKPA_PPzPP:
1579 case AArch64::BRKB_PPzP:
1580 case AArch64::BRKPB_PPzPP:
1581 case AArch64::RDFFR_PPz: {
1582 // Check to see if our mask is the same. If not the resulting flag bits
1583 // may be different and we can't remove the ptest.
1584 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1585 if (Mask != PredMask)
1586 return {};
1587 break;
1588 }
1589 case AArch64::BRKN_PPzP: {
1590 // BRKN uses an all active implicit mask to set flags unlike the other
1591 // flag-setting instructions.
1592 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1593 if ((MaskOpcode != AArch64::PTRUE_B) ||
1594 (Mask->getOperand(1).getImm() != 31))
1595 return {};
1596 break;
1597 }
1598 case AArch64::PTRUE_B:
1599 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1600 break;
1601 default:
1602 // Bail out if we don't recognize the input
1603 return {};
1604 }
1605
1606 return convertToFlagSettingOpc(PredOpcode);
1607}
1608
1609/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1610/// operation which could set the flags in an identical manner
1611bool AArch64InstrInfo::optimizePTestInstr(
1612 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1613 const MachineRegisterInfo *MRI) const {
1614 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1615 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1616
1617 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1618 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1619 // before the branch to extract each subregister.
1620 auto Op = Pred->getOperand(1);
1621 if (Op.isReg() && Op.getReg().isVirtual() &&
1622 Op.getSubReg() == AArch64::psub0)
1623 Pred = MRI->getUniqueVRegDef(Op.getReg());
1624 }
1625
1626 unsigned PredOpcode = Pred->getOpcode();
1627 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1628 if (!NewOp)
1629 return false;
1630
1631 const TargetRegisterInfo *TRI = &getRegisterInfo();
1632
1633 // If another instruction between Pred and PTest accesses flags, don't remove
1634 // the ptest or update the earlier instruction to modify them.
1635 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1636 return false;
1637
1638 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1639 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1640 // operand to be replaced with an equivalent instruction that also sets the
1641 // flags.
1642 PTest->eraseFromParent();
1643 if (*NewOp != PredOpcode) {
1644 Pred->setDesc(get(*NewOp));
1645 bool succeeded = UpdateOperandRegClass(*Pred);
1646 (void)succeeded;
1647 assert(succeeded && "Operands have incompatible register classes!");
1648 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1649 }
1650
1651 // Ensure that the flags def is live.
1652 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1653 unsigned i = 0, e = Pred->getNumOperands();
1654 for (; i != e; ++i) {
1655 MachineOperand &MO = Pred->getOperand(i);
1656 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1657 MO.setIsDead(false);
1658 break;
1659 }
1660 }
1661 }
1662 return true;
1663}
1664
1665/// Try to optimize a compare instruction. A compare instruction is an
1666/// instruction which produces AArch64::NZCV. It can be truly compare
1667/// instruction
1668/// when there are no uses of its destination register.
1669///
1670/// The following steps are tried in order:
1671/// 1. Convert CmpInstr into an unconditional version.
1672/// 2. Remove CmpInstr if above there is an instruction producing a needed
1673/// condition code or an instruction which can be converted into such an
1674/// instruction.
1675/// Only comparison with zero is supported.
1677 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1678 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1679 assert(CmpInstr.getParent());
1680 assert(MRI);
1681
1682 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1683 int DeadNZCVIdx =
1684 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1685 if (DeadNZCVIdx != -1) {
1686 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1687 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1688 CmpInstr.eraseFromParent();
1689 return true;
1690 }
1691 unsigned Opc = CmpInstr.getOpcode();
1692 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1693 if (NewOpc == Opc)
1694 return false;
1695 const MCInstrDesc &MCID = get(NewOpc);
1696 CmpInstr.setDesc(MCID);
1697 CmpInstr.removeOperand(DeadNZCVIdx);
1698 bool succeeded = UpdateOperandRegClass(CmpInstr);
1699 (void)succeeded;
1700 assert(succeeded && "Some operands reg class are incompatible!");
1701 return true;
1702 }
1703
1704 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1705 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1706 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1707 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1708
1709 if (SrcReg2 != 0)
1710 return false;
1711
1712 // CmpInstr is a Compare instruction if destination register is not used.
1713 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1714 return false;
1715
1716 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1717 return true;
1718 return (CmpValue == 0 || CmpValue == 1) &&
1719 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1720}
1721
1722/// Get opcode of S version of Instr.
1723/// If Instr is S version its opcode is returned.
1724/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1725/// or we are not interested in it.
1726static unsigned sForm(MachineInstr &Instr) {
1727 switch (Instr.getOpcode()) {
1728 default:
1729 return AArch64::INSTRUCTION_LIST_END;
1730
1731 case AArch64::ADDSWrr:
1732 case AArch64::ADDSWri:
1733 case AArch64::ADDSXrr:
1734 case AArch64::ADDSXri:
1735 case AArch64::SUBSWrr:
1736 case AArch64::SUBSWri:
1737 case AArch64::SUBSXrr:
1738 case AArch64::SUBSXri:
1739 return Instr.getOpcode();
1740
1741 case AArch64::ADDWrr:
1742 return AArch64::ADDSWrr;
1743 case AArch64::ADDWri:
1744 return AArch64::ADDSWri;
1745 case AArch64::ADDXrr:
1746 return AArch64::ADDSXrr;
1747 case AArch64::ADDXri:
1748 return AArch64::ADDSXri;
1749 case AArch64::ADCWr:
1750 return AArch64::ADCSWr;
1751 case AArch64::ADCXr:
1752 return AArch64::ADCSXr;
1753 case AArch64::SUBWrr:
1754 return AArch64::SUBSWrr;
1755 case AArch64::SUBWri:
1756 return AArch64::SUBSWri;
1757 case AArch64::SUBXrr:
1758 return AArch64::SUBSXrr;
1759 case AArch64::SUBXri:
1760 return AArch64::SUBSXri;
1761 case AArch64::SBCWr:
1762 return AArch64::SBCSWr;
1763 case AArch64::SBCXr:
1764 return AArch64::SBCSXr;
1765 case AArch64::ANDWri:
1766 return AArch64::ANDSWri;
1767 case AArch64::ANDXri:
1768 return AArch64::ANDSXri;
1769 }
1770}
1771
1772/// Check if AArch64::NZCV should be alive in successors of MBB.
1774 for (auto *BB : MBB->successors())
1775 if (BB->isLiveIn(AArch64::NZCV))
1776 return true;
1777 return false;
1778}
1779
1780/// \returns The condition code operand index for \p Instr if it is a branch
1781/// or select and -1 otherwise.
1782static int
1784 switch (Instr.getOpcode()) {
1785 default:
1786 return -1;
1787
1788 case AArch64::Bcc: {
1789 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1790 assert(Idx >= 2);
1791 return Idx - 2;
1792 }
1793
1794 case AArch64::CSINVWr:
1795 case AArch64::CSINVXr:
1796 case AArch64::CSINCWr:
1797 case AArch64::CSINCXr:
1798 case AArch64::CSELWr:
1799 case AArch64::CSELXr:
1800 case AArch64::CSNEGWr:
1801 case AArch64::CSNEGXr:
1802 case AArch64::FCSELSrrr:
1803 case AArch64::FCSELDrrr: {
1804 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1805 assert(Idx >= 1);
1806 return Idx - 1;
1807 }
1808 }
1809}
1810
1811/// Find a condition code used by the instruction.
1812/// Returns AArch64CC::Invalid if either the instruction does not use condition
1813/// codes or we don't optimize CmpInstr in the presence of such instructions.
1816 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1817 Instr.getOperand(CCIdx).getImm())
1819}
1820
1823 UsedNZCV UsedFlags;
1824 switch (CC) {
1825 default:
1826 break;
1827
1828 case AArch64CC::EQ: // Z set
1829 case AArch64CC::NE: // Z clear
1830 UsedFlags.Z = true;
1831 break;
1832
1833 case AArch64CC::HI: // Z clear and C set
1834 case AArch64CC::LS: // Z set or C clear
1835 UsedFlags.Z = true;
1836 [[fallthrough]];
1837 case AArch64CC::HS: // C set
1838 case AArch64CC::LO: // C clear
1839 UsedFlags.C = true;
1840 break;
1841
1842 case AArch64CC::MI: // N set
1843 case AArch64CC::PL: // N clear
1844 UsedFlags.N = true;
1845 break;
1846
1847 case AArch64CC::VS: // V set
1848 case AArch64CC::VC: // V clear
1849 UsedFlags.V = true;
1850 break;
1851
1852 case AArch64CC::GT: // Z clear, N and V the same
1853 case AArch64CC::LE: // Z set, N and V differ
1854 UsedFlags.Z = true;
1855 [[fallthrough]];
1856 case AArch64CC::GE: // N and V the same
1857 case AArch64CC::LT: // N and V differ
1858 UsedFlags.N = true;
1859 UsedFlags.V = true;
1860 break;
1861 }
1862 return UsedFlags;
1863}
1864
1865/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1866/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1867/// \returns std::nullopt otherwise.
1868///
1869/// Collect instructions using that flags in \p CCUseInstrs if provided.
1870std::optional<UsedNZCV>
1872 const TargetRegisterInfo &TRI,
1873 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1874 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1875 if (MI.getParent() != CmpParent)
1876 return std::nullopt;
1877
1878 if (areCFlagsAliveInSuccessors(CmpParent))
1879 return std::nullopt;
1880
1881 UsedNZCV NZCVUsedAfterCmp;
1883 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1884 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1886 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1887 return std::nullopt;
1888 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1889 if (CCUseInstrs)
1890 CCUseInstrs->push_back(&Instr);
1891 }
1892 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1893 break;
1894 }
1895 return NZCVUsedAfterCmp;
1896}
1897
1898static bool isADDSRegImm(unsigned Opcode) {
1899 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1900}
1901
1902static bool isSUBSRegImm(unsigned Opcode) {
1903 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1904}
1905
1906/// Check if CmpInstr can be substituted by MI.
1907///
1908/// CmpInstr can be substituted:
1909/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1910/// - and, MI and CmpInstr are from the same MachineBB
1911/// - and, condition flags are not alive in successors of the CmpInstr parent
1912/// - and, if MI opcode is the S form there must be no defs of flags between
1913/// MI and CmpInstr
1914/// or if MI opcode is not the S form there must be neither defs of flags
1915/// nor uses of flags between MI and CmpInstr.
1916/// - and, if C/V flags are not used after CmpInstr
1917/// or if N flag is used but MI produces poison value if signed overflow
1918/// occurs.
1920 const TargetRegisterInfo &TRI) {
1921 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1922 // that may or may not set flags.
1923 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1924
1925 const unsigned CmpOpcode = CmpInstr.getOpcode();
1926 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1927 return false;
1928
1929 assert((CmpInstr.getOperand(2).isImm() &&
1930 CmpInstr.getOperand(2).getImm() == 0) &&
1931 "Caller guarantees that CmpInstr compares with constant 0");
1932
1933 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1934 if (!NZVCUsed || NZVCUsed->C)
1935 return false;
1936
1937 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1938 // '%vreg = add ...' or '%vreg = sub ...'.
1939 // Condition flag V is used to indicate signed overflow.
1940 // 1) MI and CmpInstr set N and V to the same value.
1941 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1942 // signed overflow occurs, so CmpInstr could still be simplified away.
1943 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1944 return false;
1945
1946 AccessKind AccessToCheck = AK_Write;
1947 if (sForm(MI) != MI.getOpcode())
1948 AccessToCheck = AK_All;
1949 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1950}
1951
1952/// Substitute an instruction comparing to zero with another instruction
1953/// which produces needed condition flags.
1954///
1955/// Return true on success.
1956bool AArch64InstrInfo::substituteCmpToZero(
1957 MachineInstr &CmpInstr, unsigned SrcReg,
1958 const MachineRegisterInfo &MRI) const {
1959 // Get the unique definition of SrcReg.
1960 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1961 if (!MI)
1962 return false;
1963
1964 const TargetRegisterInfo &TRI = getRegisterInfo();
1965
1966 unsigned NewOpc = sForm(*MI);
1967 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1968 return false;
1969
1970 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1971 return false;
1972
1973 // Update the instruction to set NZCV.
1974 MI->setDesc(get(NewOpc));
1975 CmpInstr.eraseFromParent();
1977 (void)succeeded;
1978 assert(succeeded && "Some operands reg class are incompatible!");
1979 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1980 return true;
1981}
1982
1983/// \returns True if \p CmpInstr can be removed.
1984///
1985/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1986/// codes used in \p CCUseInstrs must be inverted.
1988 int CmpValue, const TargetRegisterInfo &TRI,
1990 bool &IsInvertCC) {
1991 assert((CmpValue == 0 || CmpValue == 1) &&
1992 "Only comparisons to 0 or 1 considered for removal!");
1993
1994 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1995 unsigned MIOpc = MI.getOpcode();
1996 if (MIOpc == AArch64::CSINCWr) {
1997 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1998 MI.getOperand(2).getReg() != AArch64::WZR)
1999 return false;
2000 } else if (MIOpc == AArch64::CSINCXr) {
2001 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2002 MI.getOperand(2).getReg() != AArch64::XZR)
2003 return false;
2004 } else {
2005 return false;
2006 }
2008 if (MICC == AArch64CC::Invalid)
2009 return false;
2010
2011 // NZCV needs to be defined
2012 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2013 return false;
2014
2015 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2016 const unsigned CmpOpcode = CmpInstr.getOpcode();
2017 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2018 if (CmpValue && !IsSubsRegImm)
2019 return false;
2020 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2021 return false;
2022
2023 // MI conditions allowed: eq, ne, mi, pl
2024 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2025 if (MIUsedNZCV.C || MIUsedNZCV.V)
2026 return false;
2027
2028 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2029 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2030 // Condition flags are not used in CmpInstr basic block successors and only
2031 // Z or N flags allowed to be used after CmpInstr within its basic block
2032 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2033 return false;
2034 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2035 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2036 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2037 return false;
2038 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2039 if (MIUsedNZCV.N && !CmpValue)
2040 return false;
2041
2042 // There must be no defs of flags between MI and CmpInstr
2043 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2044 return false;
2045
2046 // Condition code is inverted in the following cases:
2047 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2048 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2049 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2050 (!CmpValue && MICC == AArch64CC::NE);
2051 return true;
2052}
2053
2054/// Remove comparison in csinc-cmp sequence
2055///
2056/// Examples:
2057/// 1. \code
2058/// csinc w9, wzr, wzr, ne
2059/// cmp w9, #0
2060/// b.eq
2061/// \endcode
2062/// to
2063/// \code
2064/// csinc w9, wzr, wzr, ne
2065/// b.ne
2066/// \endcode
2067///
2068/// 2. \code
2069/// csinc x2, xzr, xzr, mi
2070/// cmp x2, #1
2071/// b.pl
2072/// \endcode
2073/// to
2074/// \code
2075/// csinc x2, xzr, xzr, mi
2076/// b.pl
2077/// \endcode
2078///
2079/// \param CmpInstr comparison instruction
2080/// \return True when comparison removed
2081bool AArch64InstrInfo::removeCmpToZeroOrOne(
2082 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2083 const MachineRegisterInfo &MRI) const {
2084 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2085 if (!MI)
2086 return false;
2087 const TargetRegisterInfo &TRI = getRegisterInfo();
2088 SmallVector<MachineInstr *, 4> CCUseInstrs;
2089 bool IsInvertCC = false;
2090 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2091 IsInvertCC))
2092 return false;
2093 // Make transformation
2094 CmpInstr.eraseFromParent();
2095 if (IsInvertCC) {
2096 // Invert condition codes in CmpInstr CC users
2097 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2098 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2099 assert(Idx >= 0 && "Unexpected instruction using CC.");
2100 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2102 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2103 CCOperand.setImm(CCUse);
2104 }
2105 }
2106 return true;
2107}
2108
2109bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2110 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2111 MI.getOpcode() != AArch64::CATCHRET)
2112 return false;
2113
2114 MachineBasicBlock &MBB = *MI.getParent();
2115 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2116 auto TRI = Subtarget.getRegisterInfo();
2117 DebugLoc DL = MI.getDebugLoc();
2118
2119 if (MI.getOpcode() == AArch64::CATCHRET) {
2120 // Skip to the first instruction before the epilog.
2121 const TargetInstrInfo *TII =
2123 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2125 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2126 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2127 FirstEpilogSEH != MBB.begin())
2128 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2129 if (FirstEpilogSEH != MBB.begin())
2130 FirstEpilogSEH = std::next(FirstEpilogSEH);
2131 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2132 .addReg(AArch64::X0, RegState::Define)
2133 .addMBB(TargetMBB);
2134 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2135 .addReg(AArch64::X0, RegState::Define)
2136 .addReg(AArch64::X0)
2137 .addMBB(TargetMBB)
2138 .addImm(0);
2139 TargetMBB->setMachineBlockAddressTaken();
2140 return true;
2141 }
2142
2143 Register Reg = MI.getOperand(0).getReg();
2145 if (M.getStackProtectorGuard() == "sysreg") {
2146 const AArch64SysReg::SysReg *SrcReg =
2147 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2148 if (!SrcReg)
2149 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2150
2151 // mrs xN, sysreg
2152 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2154 .addImm(SrcReg->Encoding);
2155 int Offset = M.getStackProtectorGuardOffset();
2156 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2157 // ldr xN, [xN, #offset]
2158 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2159 .addDef(Reg)
2161 .addImm(Offset / 8);
2162 } else if (Offset >= -256 && Offset <= 255) {
2163 // ldur xN, [xN, #offset]
2164 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2165 .addDef(Reg)
2167 .addImm(Offset);
2168 } else if (Offset >= -4095 && Offset <= 4095) {
2169 if (Offset > 0) {
2170 // add xN, xN, #offset
2171 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2172 .addDef(Reg)
2174 .addImm(Offset)
2175 .addImm(0);
2176 } else {
2177 // sub xN, xN, #offset
2178 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2179 .addDef(Reg)
2181 .addImm(-Offset)
2182 .addImm(0);
2183 }
2184 // ldr xN, [xN]
2185 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2186 .addDef(Reg)
2188 .addImm(0);
2189 } else {
2190 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2191 // than 23760.
2192 // It might be nice to use AArch64::MOVi32imm here, which would get
2193 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2194 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2195 // AArch64FrameLowering might help us find such a scratch register
2196 // though. If we failed to find a scratch register, we could emit a
2197 // stream of add instructions to build up the immediate. Or, we could try
2198 // to insert a AArch64::MOVi32imm before register allocation so that we
2199 // didn't need to scavenge for a scratch register.
2200 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2201 }
2202 MBB.erase(MI);
2203 return true;
2204 }
2205
2206 const GlobalValue *GV =
2207 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2208 const TargetMachine &TM = MBB.getParent()->getTarget();
2209 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2210 const unsigned char MO_NC = AArch64II::MO_NC;
2211
2212 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2213 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2214 .addGlobalAddress(GV, 0, OpFlags);
2215 if (Subtarget.isTargetILP32()) {
2216 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2217 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2218 .addDef(Reg32, RegState::Dead)
2220 .addImm(0)
2221 .addMemOperand(*MI.memoperands_begin())
2223 } else {
2224 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2226 .addImm(0)
2227 .addMemOperand(*MI.memoperands_begin());
2228 }
2229 } else if (TM.getCodeModel() == CodeModel::Large) {
2230 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2231 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2232 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2233 .addImm(0);
2234 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2236 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2237 .addImm(16);
2238 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2240 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2241 .addImm(32);
2242 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2245 .addImm(48);
2246 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2248 .addImm(0)
2249 .addMemOperand(*MI.memoperands_begin());
2250 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2251 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2252 .addGlobalAddress(GV, 0, OpFlags);
2253 } else {
2254 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2255 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2256 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2257 if (Subtarget.isTargetILP32()) {
2258 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2259 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2260 .addDef(Reg32, RegState::Dead)
2262 .addGlobalAddress(GV, 0, LoFlags)
2263 .addMemOperand(*MI.memoperands_begin())
2265 } else {
2266 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2268 .addGlobalAddress(GV, 0, LoFlags)
2269 .addMemOperand(*MI.memoperands_begin());
2270 }
2271 }
2272
2273 MBB.erase(MI);
2274
2275 return true;
2276}
2277
2278// Return true if this instruction simply sets its single destination register
2279// to zero. This is equivalent to a register rename of the zero-register.
2281 switch (MI.getOpcode()) {
2282 default:
2283 break;
2284 case AArch64::MOVZWi:
2285 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2286 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2287 assert(MI.getDesc().getNumOperands() == 3 &&
2288 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2289 return true;
2290 }
2291 break;
2292 case AArch64::ANDWri: // and Rd, Rzr, #imm
2293 return MI.getOperand(1).getReg() == AArch64::WZR;
2294 case AArch64::ANDXri:
2295 return MI.getOperand(1).getReg() == AArch64::XZR;
2296 case TargetOpcode::COPY:
2297 return MI.getOperand(1).getReg() == AArch64::WZR;
2298 }
2299 return false;
2300}
2301
2302// Return true if this instruction simply renames a general register without
2303// modifying bits.
2305 switch (MI.getOpcode()) {
2306 default:
2307 break;
2308 case TargetOpcode::COPY: {
2309 // GPR32 copies will by lowered to ORRXrs
2310 Register DstReg = MI.getOperand(0).getReg();
2311 return (AArch64::GPR32RegClass.contains(DstReg) ||
2312 AArch64::GPR64RegClass.contains(DstReg));
2313 }
2314 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2315 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2316 assert(MI.getDesc().getNumOperands() == 4 &&
2317 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2318 return true;
2319 }
2320 break;
2321 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2322 if (MI.getOperand(2).getImm() == 0) {
2323 assert(MI.getDesc().getNumOperands() == 4 &&
2324 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2325 return true;
2326 }
2327 break;
2328 }
2329 return false;
2330}
2331
2332// Return true if this instruction simply renames a general register without
2333// modifying bits.
2335 switch (MI.getOpcode()) {
2336 default:
2337 break;
2338 case TargetOpcode::COPY: {
2339 Register DstReg = MI.getOperand(0).getReg();
2340 return AArch64::FPR128RegClass.contains(DstReg);
2341 }
2342 case AArch64::ORRv16i8:
2343 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2344 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2345 "invalid ORRv16i8 operands");
2346 return true;
2347 }
2348 break;
2349 }
2350 return false;
2351}
2352
2354 int &FrameIndex) const {
2355 switch (MI.getOpcode()) {
2356 default:
2357 break;
2358 case AArch64::LDRWui:
2359 case AArch64::LDRXui:
2360 case AArch64::LDRBui:
2361 case AArch64::LDRHui:
2362 case AArch64::LDRSui:
2363 case AArch64::LDRDui:
2364 case AArch64::LDRQui:
2365 case AArch64::LDR_PXI:
2366 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2367 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2368 FrameIndex = MI.getOperand(1).getIndex();
2369 return MI.getOperand(0).getReg();
2370 }
2371 break;
2372 }
2373
2374 return 0;
2375}
2376
2378 int &FrameIndex) const {
2379 switch (MI.getOpcode()) {
2380 default:
2381 break;
2382 case AArch64::STRWui:
2383 case AArch64::STRXui:
2384 case AArch64::STRBui:
2385 case AArch64::STRHui:
2386 case AArch64::STRSui:
2387 case AArch64::STRDui:
2388 case AArch64::STRQui:
2389 case AArch64::STR_PXI:
2390 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2391 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2392 FrameIndex = MI.getOperand(1).getIndex();
2393 return MI.getOperand(0).getReg();
2394 }
2395 break;
2396 }
2397 return 0;
2398}
2399
2400/// Check all MachineMemOperands for a hint to suppress pairing.
2402 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2403 return MMO->getFlags() & MOSuppressPair;
2404 });
2405}
2406
2407/// Set a flag on the first MachineMemOperand to suppress pairing.
2409 if (MI.memoperands_empty())
2410 return;
2411 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2412}
2413
2414/// Check all MachineMemOperands for a hint that the load/store is strided.
2416 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2417 return MMO->getFlags() & MOStridedAccess;
2418 });
2419}
2420
2422 switch (Opc) {
2423 default:
2424 return false;
2425 case AArch64::STURSi:
2426 case AArch64::STRSpre:
2427 case AArch64::STURDi:
2428 case AArch64::STRDpre:
2429 case AArch64::STURQi:
2430 case AArch64::STRQpre:
2431 case AArch64::STURBBi:
2432 case AArch64::STURHHi:
2433 case AArch64::STURWi:
2434 case AArch64::STRWpre:
2435 case AArch64::STURXi:
2436 case AArch64::STRXpre:
2437 case AArch64::LDURSi:
2438 case AArch64::LDRSpre:
2439 case AArch64::LDURDi:
2440 case AArch64::LDRDpre:
2441 case AArch64::LDURQi:
2442 case AArch64::LDRQpre:
2443 case AArch64::LDURWi:
2444 case AArch64::LDRWpre:
2445 case AArch64::LDURXi:
2446 case AArch64::LDRXpre:
2447 case AArch64::LDRSWpre:
2448 case AArch64::LDURSWi:
2449 case AArch64::LDURHHi:
2450 case AArch64::LDURBBi:
2451 case AArch64::LDURSBWi:
2452 case AArch64::LDURSHWi:
2453 return true;
2454 }
2455}
2456
2457std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2458 switch (Opc) {
2459 default: return {};
2460 case AArch64::PRFMui: return AArch64::PRFUMi;
2461 case AArch64::LDRXui: return AArch64::LDURXi;
2462 case AArch64::LDRWui: return AArch64::LDURWi;
2463 case AArch64::LDRBui: return AArch64::LDURBi;
2464 case AArch64::LDRHui: return AArch64::LDURHi;
2465 case AArch64::LDRSui: return AArch64::LDURSi;
2466 case AArch64::LDRDui: return AArch64::LDURDi;
2467 case AArch64::LDRQui: return AArch64::LDURQi;
2468 case AArch64::LDRBBui: return AArch64::LDURBBi;
2469 case AArch64::LDRHHui: return AArch64::LDURHHi;
2470 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2471 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2472 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2473 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2474 case AArch64::LDRSWui: return AArch64::LDURSWi;
2475 case AArch64::STRXui: return AArch64::STURXi;
2476 case AArch64::STRWui: return AArch64::STURWi;
2477 case AArch64::STRBui: return AArch64::STURBi;
2478 case AArch64::STRHui: return AArch64::STURHi;
2479 case AArch64::STRSui: return AArch64::STURSi;
2480 case AArch64::STRDui: return AArch64::STURDi;
2481 case AArch64::STRQui: return AArch64::STURQi;
2482 case AArch64::STRBBui: return AArch64::STURBBi;
2483 case AArch64::STRHHui: return AArch64::STURHHi;
2484 }
2485}
2486
2488 switch (Opc) {
2489 default:
2490 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2491 case AArch64::ADDG:
2492 case AArch64::LDAPURBi:
2493 case AArch64::LDAPURHi:
2494 case AArch64::LDAPURi:
2495 case AArch64::LDAPURSBWi:
2496 case AArch64::LDAPURSBXi:
2497 case AArch64::LDAPURSHWi:
2498 case AArch64::LDAPURSHXi:
2499 case AArch64::LDAPURSWi:
2500 case AArch64::LDAPURXi:
2501 case AArch64::LDR_PPXI:
2502 case AArch64::LDR_PXI:
2503 case AArch64::LDR_ZXI:
2504 case AArch64::LDR_ZZXI:
2505 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2506 case AArch64::LDR_ZZZXI:
2507 case AArch64::LDR_ZZZZXI:
2508 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2509 case AArch64::LDRBBui:
2510 case AArch64::LDRBui:
2511 case AArch64::LDRDui:
2512 case AArch64::LDRHHui:
2513 case AArch64::LDRHui:
2514 case AArch64::LDRQui:
2515 case AArch64::LDRSBWui:
2516 case AArch64::LDRSBXui:
2517 case AArch64::LDRSHWui:
2518 case AArch64::LDRSHXui:
2519 case AArch64::LDRSui:
2520 case AArch64::LDRSWui:
2521 case AArch64::LDRWui:
2522 case AArch64::LDRXui:
2523 case AArch64::LDURBBi:
2524 case AArch64::LDURBi:
2525 case AArch64::LDURDi:
2526 case AArch64::LDURHHi:
2527 case AArch64::LDURHi:
2528 case AArch64::LDURQi:
2529 case AArch64::LDURSBWi:
2530 case AArch64::LDURSBXi:
2531 case AArch64::LDURSHWi:
2532 case AArch64::LDURSHXi:
2533 case AArch64::LDURSi:
2534 case AArch64::LDURSWi:
2535 case AArch64::LDURWi:
2536 case AArch64::LDURXi:
2537 case AArch64::PRFMui:
2538 case AArch64::PRFUMi:
2539 case AArch64::ST2Gi:
2540 case AArch64::STGi:
2541 case AArch64::STLURBi:
2542 case AArch64::STLURHi:
2543 case AArch64::STLURWi:
2544 case AArch64::STLURXi:
2545 case AArch64::StoreSwiftAsyncContext:
2546 case AArch64::STR_PPXI:
2547 case AArch64::STR_PXI:
2548 case AArch64::STR_ZXI:
2549 case AArch64::STR_ZZXI:
2550 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2551 case AArch64::STR_ZZZXI:
2552 case AArch64::STR_ZZZZXI:
2553 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2554 case AArch64::STRBBui:
2555 case AArch64::STRBui:
2556 case AArch64::STRDui:
2557 case AArch64::STRHHui:
2558 case AArch64::STRHui:
2559 case AArch64::STRQui:
2560 case AArch64::STRSui:
2561 case AArch64::STRWui:
2562 case AArch64::STRXui:
2563 case AArch64::STURBBi:
2564 case AArch64::STURBi:
2565 case AArch64::STURDi:
2566 case AArch64::STURHHi:
2567 case AArch64::STURHi:
2568 case AArch64::STURQi:
2569 case AArch64::STURSi:
2570 case AArch64::STURWi:
2571 case AArch64::STURXi:
2572 case AArch64::STZ2Gi:
2573 case AArch64::STZGi:
2574 case AArch64::TAGPstack:
2575 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2576 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
2577 return 2;
2578 case AArch64::LD1B_D_IMM:
2579 case AArch64::LD1B_H_IMM:
2580 case AArch64::LD1B_IMM:
2581 case AArch64::LD1B_S_IMM:
2582 case AArch64::LD1D_IMM:
2583 case AArch64::LD1H_D_IMM:
2584 case AArch64::LD1H_IMM:
2585 case AArch64::LD1H_S_IMM:
2586 case AArch64::LD1RB_D_IMM:
2587 case AArch64::LD1RB_H_IMM:
2588 case AArch64::LD1RB_IMM:
2589 case AArch64::LD1RB_S_IMM:
2590 case AArch64::LD1RD_IMM:
2591 case AArch64::LD1RH_D_IMM:
2592 case AArch64::LD1RH_IMM:
2593 case AArch64::LD1RH_S_IMM:
2594 case AArch64::LD1RSB_D_IMM:
2595 case AArch64::LD1RSB_H_IMM:
2596 case AArch64::LD1RSB_S_IMM:
2597 case AArch64::LD1RSH_D_IMM:
2598 case AArch64::LD1RSH_S_IMM:
2599 case AArch64::LD1RSW_IMM:
2600 case AArch64::LD1RW_D_IMM:
2601 case AArch64::LD1RW_IMM:
2602 case AArch64::LD1SB_D_IMM:
2603 case AArch64::LD1SB_H_IMM:
2604 case AArch64::LD1SB_S_IMM:
2605 case AArch64::LD1SH_D_IMM:
2606 case AArch64::LD1SH_S_IMM:
2607 case AArch64::LD1SW_D_IMM:
2608 case AArch64::LD1W_D_IMM:
2609 case AArch64::LD1W_IMM:
2610 case AArch64::LD2B_IMM:
2611 case AArch64::LD2D_IMM:
2612 case AArch64::LD2H_IMM:
2613 case AArch64::LD2W_IMM:
2614 case AArch64::LD3B_IMM:
2615 case AArch64::LD3D_IMM:
2616 case AArch64::LD3H_IMM:
2617 case AArch64::LD3W_IMM:
2618 case AArch64::LD4B_IMM:
2619 case AArch64::LD4D_IMM:
2620 case AArch64::LD4H_IMM:
2621 case AArch64::LD4W_IMM:
2622 case AArch64::LDG:
2623 case AArch64::LDNF1B_D_IMM:
2624 case AArch64::LDNF1B_H_IMM:
2625 case AArch64::LDNF1B_IMM:
2626 case AArch64::LDNF1B_S_IMM:
2627 case AArch64::LDNF1D_IMM:
2628 case AArch64::LDNF1H_D_IMM:
2629 case AArch64::LDNF1H_IMM:
2630 case AArch64::LDNF1H_S_IMM:
2631 case AArch64::LDNF1SB_D_IMM:
2632 case AArch64::LDNF1SB_H_IMM:
2633 case AArch64::LDNF1SB_S_IMM:
2634 case AArch64::LDNF1SH_D_IMM:
2635 case AArch64::LDNF1SH_S_IMM:
2636 case AArch64::LDNF1SW_D_IMM:
2637 case AArch64::LDNF1W_D_IMM:
2638 case AArch64::LDNF1W_IMM:
2639 case AArch64::LDNPDi:
2640 case AArch64::LDNPQi:
2641 case AArch64::LDNPSi:
2642 case AArch64::LDNPWi:
2643 case AArch64::LDNPXi:
2644 case AArch64::LDNT1B_ZRI:
2645 case AArch64::LDNT1D_ZRI:
2646 case AArch64::LDNT1H_ZRI:
2647 case AArch64::LDNT1W_ZRI:
2648 case AArch64::LDPDi:
2649 case AArch64::LDPQi:
2650 case AArch64::LDPSi:
2651 case AArch64::LDPWi:
2652 case AArch64::LDPXi:
2653 case AArch64::LDRBBpost:
2654 case AArch64::LDRBBpre:
2655 case AArch64::LDRBpost:
2656 case AArch64::LDRBpre:
2657 case AArch64::LDRDpost:
2658 case AArch64::LDRDpre:
2659 case AArch64::LDRHHpost:
2660 case AArch64::LDRHHpre:
2661 case AArch64::LDRHpost:
2662 case AArch64::LDRHpre:
2663 case AArch64::LDRQpost:
2664 case AArch64::LDRQpre:
2665 case AArch64::LDRSpost:
2666 case AArch64::LDRSpre:
2667 case AArch64::LDRWpost:
2668 case AArch64::LDRWpre:
2669 case AArch64::LDRXpost:
2670 case AArch64::LDRXpre:
2671 case AArch64::ST1B_D_IMM:
2672 case AArch64::ST1B_H_IMM:
2673 case AArch64::ST1B_IMM:
2674 case AArch64::ST1B_S_IMM:
2675 case AArch64::ST1D_IMM:
2676 case AArch64::ST1H_D_IMM:
2677 case AArch64::ST1H_IMM:
2678 case AArch64::ST1H_S_IMM:
2679 case AArch64::ST1W_D_IMM:
2680 case AArch64::ST1W_IMM:
2681 case AArch64::ST2B_IMM:
2682 case AArch64::ST2D_IMM:
2683 case AArch64::ST2H_IMM:
2684 case AArch64::ST2W_IMM:
2685 case AArch64::ST3B_IMM:
2686 case AArch64::ST3D_IMM:
2687 case AArch64::ST3H_IMM:
2688 case AArch64::ST3W_IMM:
2689 case AArch64::ST4B_IMM:
2690 case AArch64::ST4D_IMM:
2691 case AArch64::ST4H_IMM:
2692 case AArch64::ST4W_IMM:
2693 case AArch64::STGPi:
2694 case AArch64::STGPreIndex:
2695 case AArch64::STZGPreIndex:
2696 case AArch64::ST2GPreIndex:
2697 case AArch64::STZ2GPreIndex:
2698 case AArch64::STGPostIndex:
2699 case AArch64::STZGPostIndex:
2700 case AArch64::ST2GPostIndex:
2701 case AArch64::STZ2GPostIndex:
2702 case AArch64::STNPDi:
2703 case AArch64::STNPQi:
2704 case AArch64::STNPSi:
2705 case AArch64::STNPWi:
2706 case AArch64::STNPXi:
2707 case AArch64::STNT1B_ZRI:
2708 case AArch64::STNT1D_ZRI:
2709 case AArch64::STNT1H_ZRI:
2710 case AArch64::STNT1W_ZRI:
2711 case AArch64::STPDi:
2712 case AArch64::STPQi:
2713 case AArch64::STPSi:
2714 case AArch64::STPWi:
2715 case AArch64::STPXi:
2716 case AArch64::STRBBpost:
2717 case AArch64::STRBBpre:
2718 case AArch64::STRBpost:
2719 case AArch64::STRBpre:
2720 case AArch64::STRDpost:
2721 case AArch64::STRDpre:
2722 case AArch64::STRHHpost:
2723 case AArch64::STRHHpre:
2724 case AArch64::STRHpost:
2725 case AArch64::STRHpre:
2726 case AArch64::STRQpost:
2727 case AArch64::STRQpre:
2728 case AArch64::STRSpost:
2729 case AArch64::STRSpre:
2730 case AArch64::STRWpost:
2731 case AArch64::STRWpre:
2732 case AArch64::STRXpost:
2733 case AArch64::STRXpre:
2734 return 3;
2735 case AArch64::LDPDpost:
2736 case AArch64::LDPDpre:
2737 case AArch64::LDPQpost:
2738 case AArch64::LDPQpre:
2739 case AArch64::LDPSpost:
2740 case AArch64::LDPSpre:
2741 case AArch64::LDPWpost:
2742 case AArch64::LDPWpre:
2743 case AArch64::LDPXpost:
2744 case AArch64::LDPXpre:
2745 case AArch64::STGPpre:
2746 case AArch64::STGPpost:
2747 case AArch64::STPDpost:
2748 case AArch64::STPDpre:
2749 case AArch64::STPQpost:
2750 case AArch64::STPQpre:
2751 case AArch64::STPSpost:
2752 case AArch64::STPSpre:
2753 case AArch64::STPWpost:
2754 case AArch64::STPWpre:
2755 case AArch64::STPXpost:
2756 case AArch64::STPXpre:
2757 return 4;
2758 }
2759}
2760
2762 switch (MI.getOpcode()) {
2763 default:
2764 return false;
2765 // Scaled instructions.
2766 case AArch64::STRSui:
2767 case AArch64::STRDui:
2768 case AArch64::STRQui:
2769 case AArch64::STRXui:
2770 case AArch64::STRWui:
2771 case AArch64::LDRSui:
2772 case AArch64::LDRDui:
2773 case AArch64::LDRQui:
2774 case AArch64::LDRXui:
2775 case AArch64::LDRWui:
2776 case AArch64::LDRSWui:
2777 // Unscaled instructions.
2778 case AArch64::STURSi:
2779 case AArch64::STRSpre:
2780 case AArch64::STURDi:
2781 case AArch64::STRDpre:
2782 case AArch64::STURQi:
2783 case AArch64::STRQpre:
2784 case AArch64::STURWi:
2785 case AArch64::STRWpre:
2786 case AArch64::STURXi:
2787 case AArch64::STRXpre:
2788 case AArch64::LDURSi:
2789 case AArch64::LDRSpre:
2790 case AArch64::LDURDi:
2791 case AArch64::LDRDpre:
2792 case AArch64::LDURQi:
2793 case AArch64::LDRQpre:
2794 case AArch64::LDURWi:
2795 case AArch64::LDRWpre:
2796 case AArch64::LDURXi:
2797 case AArch64::LDRXpre:
2798 case AArch64::LDURSWi:
2799 case AArch64::LDRSWpre:
2800 // SVE instructions.
2801 case AArch64::LDR_ZXI:
2802 case AArch64::STR_ZXI:
2803 return true;
2804 }
2805}
2806
2808 switch (MI.getOpcode()) {
2809 default:
2810 assert((!MI.isCall() || !MI.isReturn()) &&
2811 "Unexpected instruction - was a new tail call opcode introduced?");
2812 return false;
2813 case AArch64::TCRETURNdi:
2814 case AArch64::TCRETURNri:
2815 case AArch64::TCRETURNrix16x17:
2816 case AArch64::TCRETURNrix17:
2817 case AArch64::TCRETURNrinotx16:
2818 case AArch64::TCRETURNriALL:
2819 case AArch64::AUTH_TCRETURN:
2820 case AArch64::AUTH_TCRETURN_BTI:
2821 return true;
2822 }
2823}
2824
2826 switch (Opc) {
2827 default:
2828 llvm_unreachable("Opcode has no flag setting equivalent!");
2829 // 32-bit cases:
2830 case AArch64::ADDWri:
2831 return AArch64::ADDSWri;
2832 case AArch64::ADDWrr:
2833 return AArch64::ADDSWrr;
2834 case AArch64::ADDWrs:
2835 return AArch64::ADDSWrs;
2836 case AArch64::ADDWrx:
2837 return AArch64::ADDSWrx;
2838 case AArch64::ANDWri:
2839 return AArch64::ANDSWri;
2840 case AArch64::ANDWrr:
2841 return AArch64::ANDSWrr;
2842 case AArch64::ANDWrs:
2843 return AArch64::ANDSWrs;
2844 case AArch64::BICWrr:
2845 return AArch64::BICSWrr;
2846 case AArch64::BICWrs:
2847 return AArch64::BICSWrs;
2848 case AArch64::SUBWri:
2849 return AArch64::SUBSWri;
2850 case AArch64::SUBWrr:
2851 return AArch64::SUBSWrr;
2852 case AArch64::SUBWrs:
2853 return AArch64::SUBSWrs;
2854 case AArch64::SUBWrx:
2855 return AArch64::SUBSWrx;
2856 // 64-bit cases:
2857 case AArch64::ADDXri:
2858 return AArch64::ADDSXri;
2859 case AArch64::ADDXrr:
2860 return AArch64::ADDSXrr;
2861 case AArch64::ADDXrs:
2862 return AArch64::ADDSXrs;
2863 case AArch64::ADDXrx:
2864 return AArch64::ADDSXrx;
2865 case AArch64::ANDXri:
2866 return AArch64::ANDSXri;
2867 case AArch64::ANDXrr:
2868 return AArch64::ANDSXrr;
2869 case AArch64::ANDXrs:
2870 return AArch64::ANDSXrs;
2871 case AArch64::BICXrr:
2872 return AArch64::BICSXrr;
2873 case AArch64::BICXrs:
2874 return AArch64::BICSXrs;
2875 case AArch64::SUBXri:
2876 return AArch64::SUBSXri;
2877 case AArch64::SUBXrr:
2878 return AArch64::SUBSXrr;
2879 case AArch64::SUBXrs:
2880 return AArch64::SUBSXrs;
2881 case AArch64::SUBXrx:
2882 return AArch64::SUBSXrx;
2883 // SVE instructions:
2884 case AArch64::AND_PPzPP:
2885 return AArch64::ANDS_PPzPP;
2886 case AArch64::BIC_PPzPP:
2887 return AArch64::BICS_PPzPP;
2888 case AArch64::EOR_PPzPP:
2889 return AArch64::EORS_PPzPP;
2890 case AArch64::NAND_PPzPP:
2891 return AArch64::NANDS_PPzPP;
2892 case AArch64::NOR_PPzPP:
2893 return AArch64::NORS_PPzPP;
2894 case AArch64::ORN_PPzPP:
2895 return AArch64::ORNS_PPzPP;
2896 case AArch64::ORR_PPzPP:
2897 return AArch64::ORRS_PPzPP;
2898 case AArch64::BRKA_PPzP:
2899 return AArch64::BRKAS_PPzP;
2900 case AArch64::BRKPA_PPzPP:
2901 return AArch64::BRKPAS_PPzPP;
2902 case AArch64::BRKB_PPzP:
2903 return AArch64::BRKBS_PPzP;
2904 case AArch64::BRKPB_PPzPP:
2905 return AArch64::BRKPBS_PPzPP;
2906 case AArch64::BRKN_PPzP:
2907 return AArch64::BRKNS_PPzP;
2908 case AArch64::RDFFR_PPz:
2909 return AArch64::RDFFRS_PPz;
2910 case AArch64::PTRUE_B:
2911 return AArch64::PTRUES_B;
2912 }
2913}
2914
2915// Is this a candidate for ld/st merging or pairing? For example, we don't
2916// touch volatiles or load/stores that have a hint to avoid pair formation.
2918
2919 bool IsPreLdSt = isPreLdSt(MI);
2920
2921 // If this is a volatile load/store, don't mess with it.
2922 if (MI.hasOrderedMemoryRef())
2923 return false;
2924
2925 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2926 // For Pre-inc LD/ST, the operand is shifted by one.
2927 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2928 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2929 "Expected a reg or frame index operand.");
2930
2931 // For Pre-indexed addressing quadword instructions, the third operand is the
2932 // immediate value.
2933 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2934
2935 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2936 return false;
2937
2938 // Can't merge/pair if the instruction modifies the base register.
2939 // e.g., ldr x0, [x0]
2940 // This case will never occur with an FI base.
2941 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2942 // STR<S,D,Q,W,X>pre, it can be merged.
2943 // For example:
2944 // ldr q0, [x11, #32]!
2945 // ldr q1, [x11, #16]
2946 // to
2947 // ldp q0, q1, [x11, #32]!
2948 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2949 Register BaseReg = MI.getOperand(1).getReg();
2951 if (MI.modifiesRegister(BaseReg, TRI))
2952 return false;
2953 }
2954
2955 // Pairing SVE fills/spills is only valid for little-endian targets that
2956 // implement VLS 128.
2957 switch (MI.getOpcode()) {
2958 default:
2959 break;
2960 case AArch64::LDR_ZXI:
2961 case AArch64::STR_ZXI:
2962 if (!Subtarget.isLittleEndian() ||
2963 Subtarget.getSVEVectorSizeInBits() != 128)
2964 return false;
2965 }
2966
2967 // Check if this load/store has a hint to avoid pair formation.
2968 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2970 return false;
2971
2972 // Do not pair any callee-save store/reload instructions in the
2973 // prologue/epilogue if the CFI information encoded the operations as separate
2974 // instructions, as that will cause the size of the actual prologue to mismatch
2975 // with the prologue size recorded in the Windows CFI.
2976 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2977 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2978 MI.getMF()->getFunction().needsUnwindTableEntry();
2979 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2981 return false;
2982
2983 // On some CPUs quad load/store pairs are slower than two single load/stores.
2984 if (Subtarget.isPaired128Slow()) {
2985 switch (MI.getOpcode()) {
2986 default:
2987 break;
2988 case AArch64::LDURQi:
2989 case AArch64::STURQi:
2990 case AArch64::LDRQui:
2991 case AArch64::STRQui:
2992 return false;
2993 }
2994 }
2995
2996 return true;
2997}
2998
3001 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3002 const TargetRegisterInfo *TRI) const {
3003 if (!LdSt.mayLoadOrStore())
3004 return false;
3005
3006 const MachineOperand *BaseOp;
3007 TypeSize WidthN(0, false);
3008 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3009 WidthN, TRI))
3010 return false;
3011 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3012 // vector.
3013 Width = LocationSize::precise(WidthN);
3014 BaseOps.push_back(BaseOp);
3015 return true;
3016}
3017
3018std::optional<ExtAddrMode>
3020 const TargetRegisterInfo *TRI) const {
3021 const MachineOperand *Base; // Filled with the base operand of MI.
3022 int64_t Offset; // Filled with the offset of MI.
3023 bool OffsetIsScalable;
3024 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3025 return std::nullopt;
3026
3027 if (!Base->isReg())
3028 return std::nullopt;
3029 ExtAddrMode AM;
3030 AM.BaseReg = Base->getReg();
3031 AM.Displacement = Offset;
3032 AM.ScaledReg = 0;
3033 AM.Scale = 0;
3034 return AM;
3035}
3036
3038 Register Reg,
3039 const MachineInstr &AddrI,
3040 ExtAddrMode &AM) const {
3041 // Filter out instructions into which we cannot fold.
3042 unsigned NumBytes;
3043 int64_t OffsetScale = 1;
3044 switch (MemI.getOpcode()) {
3045 default:
3046 return false;
3047
3048 case AArch64::LDURQi:
3049 case AArch64::STURQi:
3050 NumBytes = 16;
3051 break;
3052
3053 case AArch64::LDURDi:
3054 case AArch64::STURDi:
3055 case AArch64::LDURXi:
3056 case AArch64::STURXi:
3057 NumBytes = 8;
3058 break;
3059
3060 case AArch64::LDURWi:
3061 case AArch64::LDURSWi:
3062 case AArch64::STURWi:
3063 NumBytes = 4;
3064 break;
3065
3066 case AArch64::LDURHi:
3067 case AArch64::STURHi:
3068 case AArch64::LDURHHi:
3069 case AArch64::STURHHi:
3070 case AArch64::LDURSHXi:
3071 case AArch64::LDURSHWi:
3072 NumBytes = 2;
3073 break;
3074
3075 case AArch64::LDRBroX:
3076 case AArch64::LDRBBroX:
3077 case AArch64::LDRSBXroX:
3078 case AArch64::LDRSBWroX:
3079 case AArch64::STRBroX:
3080 case AArch64::STRBBroX:
3081 case AArch64::LDURBi:
3082 case AArch64::LDURBBi:
3083 case AArch64::LDURSBXi:
3084 case AArch64::LDURSBWi:
3085 case AArch64::STURBi:
3086 case AArch64::STURBBi:
3087 case AArch64::LDRBui:
3088 case AArch64::LDRBBui:
3089 case AArch64::LDRSBXui:
3090 case AArch64::LDRSBWui:
3091 case AArch64::STRBui:
3092 case AArch64::STRBBui:
3093 NumBytes = 1;
3094 break;
3095
3096 case AArch64::LDRQroX:
3097 case AArch64::STRQroX:
3098 case AArch64::LDRQui:
3099 case AArch64::STRQui:
3100 NumBytes = 16;
3101 OffsetScale = 16;
3102 break;
3103
3104 case AArch64::LDRDroX:
3105 case AArch64::STRDroX:
3106 case AArch64::LDRXroX:
3107 case AArch64::STRXroX:
3108 case AArch64::LDRDui:
3109 case AArch64::STRDui:
3110 case AArch64::LDRXui:
3111 case AArch64::STRXui:
3112 NumBytes = 8;
3113 OffsetScale = 8;
3114 break;
3115
3116 case AArch64::LDRWroX:
3117 case AArch64::LDRSWroX:
3118 case AArch64::STRWroX:
3119 case AArch64::LDRWui:
3120 case AArch64::LDRSWui:
3121 case AArch64::STRWui:
3122 NumBytes = 4;
3123 OffsetScale = 4;
3124 break;
3125
3126 case AArch64::LDRHroX:
3127 case AArch64::STRHroX:
3128 case AArch64::LDRHHroX:
3129 case AArch64::STRHHroX:
3130 case AArch64::LDRSHXroX:
3131 case AArch64::LDRSHWroX:
3132 case AArch64::LDRHui:
3133 case AArch64::STRHui:
3134 case AArch64::LDRHHui:
3135 case AArch64::STRHHui:
3136 case AArch64::LDRSHXui:
3137 case AArch64::LDRSHWui:
3138 NumBytes = 2;
3139 OffsetScale = 2;
3140 break;
3141 }
3142
3143 // Check the fold operand is not the loaded/stored value.
3144 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3145 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3146 return false;
3147
3148 // Handle memory instructions with a [Reg, Reg] addressing mode.
3149 if (MemI.getOperand(2).isReg()) {
3150 // Bail if the addressing mode already includes extension of the offset
3151 // register.
3152 if (MemI.getOperand(3).getImm())
3153 return false;
3154
3155 // Check if we actually have a scaled offset.
3156 if (MemI.getOperand(4).getImm() == 0)
3157 OffsetScale = 1;
3158
3159 // If the address instructions is folded into the base register, then the
3160 // addressing mode must not have a scale. Then we can swap the base and the
3161 // scaled registers.
3162 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3163 return false;
3164
3165 switch (AddrI.getOpcode()) {
3166 default:
3167 return false;
3168
3169 case AArch64::SBFMXri:
3170 // sxtw Xa, Wm
3171 // ldr Xd, [Xn, Xa, lsl #N]
3172 // ->
3173 // ldr Xd, [Xn, Wm, sxtw #N]
3174 if (AddrI.getOperand(2).getImm() != 0 ||
3175 AddrI.getOperand(3).getImm() != 31)
3176 return false;
3177
3178 AM.BaseReg = MemI.getOperand(1).getReg();
3179 if (AM.BaseReg == Reg)
3180 AM.BaseReg = MemI.getOperand(2).getReg();
3181 AM.ScaledReg = AddrI.getOperand(1).getReg();
3182 AM.Scale = OffsetScale;
3183 AM.Displacement = 0;
3185 return true;
3186
3187 case TargetOpcode::SUBREG_TO_REG: {
3188 // mov Wa, Wm
3189 // ldr Xd, [Xn, Xa, lsl #N]
3190 // ->
3191 // ldr Xd, [Xn, Wm, uxtw #N]
3192
3193 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3194 if (AddrI.getOperand(1).getImm() != 0 ||
3195 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3196 return false;
3197
3198 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3199 Register OffsetReg = AddrI.getOperand(2).getReg();
3200 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3201 return false;
3202
3203 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3204 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3205 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3206 DefMI.getOperand(3).getImm() != 0)
3207 return false;
3208
3209 AM.BaseReg = MemI.getOperand(1).getReg();
3210 if (AM.BaseReg == Reg)
3211 AM.BaseReg = MemI.getOperand(2).getReg();
3212 AM.ScaledReg = DefMI.getOperand(2).getReg();
3213 AM.Scale = OffsetScale;
3214 AM.Displacement = 0;
3216 return true;
3217 }
3218 }
3219 }
3220
3221 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3222
3223 // Check we are not breaking a potential conversion to an LDP.
3224 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3225 int64_t NewOffset) -> bool {
3226 int64_t MinOffset, MaxOffset;
3227 switch (NumBytes) {
3228 default:
3229 return true;
3230 case 4:
3231 MinOffset = -256;
3232 MaxOffset = 252;
3233 break;
3234 case 8:
3235 MinOffset = -512;
3236 MaxOffset = 504;
3237 break;
3238 case 16:
3239 MinOffset = -1024;
3240 MaxOffset = 1008;
3241 break;
3242 }
3243 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3244 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3245 };
3246 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3247 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3248 int64_t NewOffset = OldOffset + Disp;
3249 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3250 return false;
3251 // If the old offset would fit into an LDP, but the new offset wouldn't,
3252 // bail out.
3253 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3254 return false;
3255 AM.BaseReg = AddrI.getOperand(1).getReg();
3256 AM.ScaledReg = 0;
3257 AM.Scale = 0;
3258 AM.Displacement = NewOffset;
3260 return true;
3261 };
3262
3263 auto canFoldAddRegIntoAddrMode =
3264 [&](int64_t Scale,
3266 if (MemI.getOperand(2).getImm() != 0)
3267 return false;
3268 if ((unsigned)Scale != Scale)
3269 return false;
3270 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3271 return false;
3272 AM.BaseReg = AddrI.getOperand(1).getReg();
3273 AM.ScaledReg = AddrI.getOperand(2).getReg();
3274 AM.Scale = Scale;
3275 AM.Displacement = 0;
3276 AM.Form = Form;
3277 return true;
3278 };
3279
3280 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3281 unsigned Opcode = MemI.getOpcode();
3282 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3283 Subtarget.isSTRQroSlow();
3284 };
3285
3286 int64_t Disp = 0;
3287 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3288 switch (AddrI.getOpcode()) {
3289 default:
3290 return false;
3291
3292 case AArch64::ADDXri:
3293 // add Xa, Xn, #N
3294 // ldr Xd, [Xa, #M]
3295 // ->
3296 // ldr Xd, [Xn, #N'+M]
3297 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3298 return canFoldAddSubImmIntoAddrMode(Disp);
3299
3300 case AArch64::SUBXri:
3301 // sub Xa, Xn, #N
3302 // ldr Xd, [Xa, #M]
3303 // ->
3304 // ldr Xd, [Xn, #N'+M]
3305 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3306 return canFoldAddSubImmIntoAddrMode(-Disp);
3307
3308 case AArch64::ADDXrs: {
3309 // add Xa, Xn, Xm, lsl #N
3310 // ldr Xd, [Xa]
3311 // ->
3312 // ldr Xd, [Xn, Xm, lsl #N]
3313
3314 // Don't fold the add if the result would be slower, unless optimising for
3315 // size.
3316 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3318 return false;
3319 Shift = AArch64_AM::getShiftValue(Shift);
3320 if (!OptSize) {
3321 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3322 return false;
3323 if (avoidSlowSTRQ(MemI))
3324 return false;
3325 }
3326 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3327 }
3328
3329 case AArch64::ADDXrr:
3330 // add Xa, Xn, Xm
3331 // ldr Xd, [Xa]
3332 // ->
3333 // ldr Xd, [Xn, Xm, lsl #0]
3334
3335 // Don't fold the add if the result would be slower, unless optimising for
3336 // size.
3337 if (!OptSize && avoidSlowSTRQ(MemI))
3338 return false;
3339 return canFoldAddRegIntoAddrMode(1);
3340
3341 case AArch64::ADDXrx:
3342 // add Xa, Xn, Wm, {s,u}xtw #N
3343 // ldr Xd, [Xa]
3344 // ->
3345 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3346
3347 // Don't fold the add if the result would be slower, unless optimising for
3348 // size.
3349 if (!OptSize && avoidSlowSTRQ(MemI))
3350 return false;
3351
3352 // Can fold only sign-/zero-extend of a word.
3353 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3355 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3356 return false;
3357
3358 return canFoldAddRegIntoAddrMode(
3359 1ULL << AArch64_AM::getArithShiftValue(Imm),
3362 }
3363}
3364
3365// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3366// return the opcode of an instruction performing the same operation, but using
3367// the [Reg, Reg] addressing mode.
3368static unsigned regOffsetOpcode(unsigned Opcode) {
3369 switch (Opcode) {
3370 default:
3371 llvm_unreachable("Address folding not implemented for instruction");
3372
3373 case AArch64::LDURQi:
3374 case AArch64::LDRQui:
3375 return AArch64::LDRQroX;
3376 case AArch64::STURQi:
3377 case AArch64::STRQui:
3378 return AArch64::STRQroX;
3379 case AArch64::LDURDi:
3380 case AArch64::LDRDui:
3381 return AArch64::LDRDroX;
3382 case AArch64::STURDi:
3383 case AArch64::STRDui:
3384 return AArch64::STRDroX;
3385 case AArch64::LDURXi:
3386 case AArch64::LDRXui:
3387 return AArch64::LDRXroX;
3388 case AArch64::STURXi:
3389 case AArch64::STRXui:
3390 return AArch64::STRXroX;
3391 case AArch64::LDURWi:
3392 case AArch64::LDRWui:
3393 return AArch64::LDRWroX;
3394 case AArch64::LDURSWi:
3395 case AArch64::LDRSWui:
3396 return AArch64::LDRSWroX;
3397 case AArch64::STURWi:
3398 case AArch64::STRWui:
3399 return AArch64::STRWroX;
3400 case AArch64::LDURHi:
3401 case AArch64::LDRHui:
3402 return AArch64::LDRHroX;
3403 case AArch64::STURHi:
3404 case AArch64::STRHui:
3405 return AArch64::STRHroX;
3406 case AArch64::LDURHHi:
3407 case AArch64::LDRHHui:
3408 return AArch64::LDRHHroX;
3409 case AArch64::STURHHi:
3410 case AArch64::STRHHui:
3411 return AArch64::STRHHroX;
3412 case AArch64::LDURSHXi:
3413 case AArch64::LDRSHXui:
3414 return AArch64::LDRSHXroX;
3415 case AArch64::LDURSHWi:
3416 case AArch64::LDRSHWui:
3417 return AArch64::LDRSHWroX;
3418 case AArch64::LDURBi:
3419 case AArch64::LDRBui:
3420 return AArch64::LDRBroX;
3421 case AArch64::LDURBBi:
3422 case AArch64::LDRBBui:
3423 return AArch64::LDRBBroX;
3424 case AArch64::LDURSBXi:
3425 case AArch64::LDRSBXui:
3426 return AArch64::LDRSBXroX;
3427 case AArch64::LDURSBWi:
3428 case AArch64::LDRSBWui:
3429 return AArch64::LDRSBWroX;
3430 case AArch64::STURBi:
3431 case AArch64::STRBui:
3432 return AArch64::STRBroX;
3433 case AArch64::STURBBi:
3434 case AArch64::STRBBui:
3435 return AArch64::STRBBroX;
3436 }
3437}
3438
3439// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3440// the opcode of an instruction performing the same operation, but using the
3441// [Reg, #Imm] addressing mode with scaled offset.
3442unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3443 switch (Opcode) {
3444 default:
3445 llvm_unreachable("Address folding not implemented for instruction");
3446
3447 case AArch64::LDURQi:
3448 Scale = 16;
3449 return AArch64::LDRQui;
3450 case AArch64::STURQi:
3451 Scale = 16;
3452 return AArch64::STRQui;
3453 case AArch64::LDURDi:
3454 Scale = 8;
3455 return AArch64::LDRDui;
3456 case AArch64::STURDi:
3457 Scale = 8;
3458 return AArch64::STRDui;
3459 case AArch64::LDURXi:
3460 Scale = 8;
3461 return AArch64::LDRXui;
3462 case AArch64::STURXi:
3463 Scale = 8;
3464 return AArch64::STRXui;
3465 case AArch64::LDURWi:
3466 Scale = 4;
3467 return AArch64::LDRWui;
3468 case AArch64::LDURSWi:
3469 Scale = 4;
3470 return AArch64::LDRSWui;
3471 case AArch64::STURWi:
3472 Scale = 4;
3473 return AArch64::STRWui;
3474 case AArch64::LDURHi:
3475 Scale = 2;
3476 return AArch64::LDRHui;
3477 case AArch64::STURHi:
3478 Scale = 2;
3479 return AArch64::STRHui;
3480 case AArch64::LDURHHi:
3481 Scale = 2;
3482 return AArch64::LDRHHui;
3483 case AArch64::STURHHi:
3484 Scale = 2;
3485 return AArch64::STRHHui;
3486 case AArch64::LDURSHXi:
3487 Scale = 2;
3488 return AArch64::LDRSHXui;
3489 case AArch64::LDURSHWi:
3490 Scale = 2;
3491 return AArch64::LDRSHWui;
3492 case AArch64::LDURBi:
3493 Scale = 1;
3494 return AArch64::LDRBui;
3495 case AArch64::LDURBBi:
3496 Scale = 1;
3497 return AArch64::LDRBBui;
3498 case AArch64::LDURSBXi:
3499 Scale = 1;
3500 return AArch64::LDRSBXui;
3501 case AArch64::LDURSBWi:
3502 Scale = 1;
3503 return AArch64::LDRSBWui;
3504 case AArch64::STURBi:
3505 Scale = 1;
3506 return AArch64::STRBui;
3507 case AArch64::STURBBi:
3508 Scale = 1;
3509 return AArch64::STRBBui;
3510 case AArch64::LDRQui:
3511 case AArch64::STRQui:
3512 Scale = 16;
3513 return Opcode;
3514 case AArch64::LDRDui:
3515 case AArch64::STRDui:
3516 case AArch64::LDRXui:
3517 case AArch64::STRXui:
3518 Scale = 8;
3519 return Opcode;
3520 case AArch64::LDRWui:
3521 case AArch64::LDRSWui:
3522 case AArch64::STRWui:
3523 Scale = 4;
3524 return Opcode;
3525 case AArch64::LDRHui:
3526 case AArch64::STRHui:
3527 case AArch64::LDRHHui:
3528 case AArch64::STRHHui:
3529 case AArch64::LDRSHXui:
3530 case AArch64::LDRSHWui:
3531 Scale = 2;
3532 return Opcode;
3533 case AArch64::LDRBui:
3534 case AArch64::LDRBBui:
3535 case AArch64::LDRSBXui:
3536 case AArch64::LDRSBWui:
3537 case AArch64::STRBui:
3538 case AArch64::STRBBui:
3539 Scale = 1;
3540 return Opcode;
3541 }
3542}
3543
3544// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3545// the opcode of an instruction performing the same operation, but using the
3546// [Reg, #Imm] addressing mode with unscaled offset.
3547unsigned unscaledOffsetOpcode(unsigned Opcode) {
3548 switch (Opcode) {
3549 default:
3550 llvm_unreachable("Address folding not implemented for instruction");
3551
3552 case AArch64::LDURQi:
3553 case AArch64::STURQi:
3554 case AArch64::LDURDi:
3555 case AArch64::STURDi:
3556 case AArch64::LDURXi:
3557 case AArch64::STURXi:
3558 case AArch64::LDURWi:
3559 case AArch64::LDURSWi:
3560 case AArch64::STURWi:
3561 case AArch64::LDURHi:
3562 case AArch64::STURHi:
3563 case AArch64::LDURHHi:
3564 case AArch64::STURHHi:
3565 case AArch64::LDURSHXi:
3566 case AArch64::LDURSHWi:
3567 case AArch64::LDURBi:
3568 case AArch64::STURBi:
3569 case AArch64::LDURBBi:
3570 case AArch64::STURBBi:
3571 case AArch64::LDURSBWi:
3572 case AArch64::LDURSBXi:
3573 return Opcode;
3574 case AArch64::LDRQui:
3575 return AArch64::LDURQi;
3576 case AArch64::STRQui:
3577 return AArch64::STURQi;
3578 case AArch64::LDRDui:
3579 return AArch64::LDURDi;
3580 case AArch64::STRDui:
3581 return AArch64::STURDi;
3582 case AArch64::LDRXui:
3583 return AArch64::LDURXi;
3584 case AArch64::STRXui:
3585 return AArch64::STURXi;
3586 case AArch64::LDRWui:
3587 return AArch64::LDURWi;
3588 case AArch64::LDRSWui:
3589 return AArch64::LDURSWi;
3590 case AArch64::STRWui:
3591 return AArch64::STURWi;
3592 case AArch64::LDRHui:
3593 return AArch64::LDURHi;
3594 case AArch64::STRHui:
3595 return AArch64::STURHi;
3596 case AArch64::LDRHHui:
3597 return AArch64::LDURHHi;
3598 case AArch64::STRHHui:
3599 return AArch64::STURHHi;
3600 case AArch64::LDRSHXui:
3601 return AArch64::LDURSHXi;
3602 case AArch64::LDRSHWui:
3603 return AArch64::LDURSHWi;
3604 case AArch64::LDRBBui:
3605 return AArch64::LDURBBi;
3606 case AArch64::LDRBui:
3607 return AArch64::LDURBi;
3608 case AArch64::STRBBui:
3609 return AArch64::STURBBi;
3610 case AArch64::STRBui:
3611 return AArch64::STURBi;
3612 case AArch64::LDRSBWui:
3613 return AArch64::LDURSBWi;
3614 case AArch64::LDRSBXui:
3615 return AArch64::LDURSBXi;
3616 }
3617}
3618
3619// Given the opcode of a memory load/store instruction, return the opcode of an
3620// instruction performing the same operation, but using
3621// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3622// offset register.
3623static unsigned offsetExtendOpcode(unsigned Opcode) {
3624 switch (Opcode) {
3625 default:
3626 llvm_unreachable("Address folding not implemented for instruction");
3627
3628 case AArch64::LDRQroX:
3629 case AArch64::LDURQi:
3630 case AArch64::LDRQui:
3631 return AArch64::LDRQroW;
3632 case AArch64::STRQroX:
3633 case AArch64::STURQi:
3634 case AArch64::STRQui:
3635 return AArch64::STRQroW;
3636 case AArch64::LDRDroX:
3637 case AArch64::LDURDi:
3638 case AArch64::LDRDui:
3639 return AArch64::LDRDroW;
3640 case AArch64::STRDroX:
3641 case AArch64::STURDi:
3642 case AArch64::STRDui:
3643 return AArch64::STRDroW;
3644 case AArch64::LDRXroX:
3645 case AArch64::LDURXi:
3646 case AArch64::LDRXui:
3647 return AArch64::LDRXroW;
3648 case AArch64::STRXroX:
3649 case AArch64::STURXi:
3650 case AArch64::STRXui:
3651 return AArch64::STRXroW;
3652 case AArch64::LDRWroX:
3653 case AArch64::LDURWi:
3654 case AArch64::LDRWui:
3655 return AArch64::LDRWroW;
3656 case AArch64::LDRSWroX:
3657 case AArch64::LDURSWi:
3658 case AArch64::LDRSWui:
3659 return AArch64::LDRSWroW;
3660 case AArch64::STRWroX:
3661 case AArch64::STURWi:
3662 case AArch64::STRWui:
3663 return AArch64::STRWroW;
3664 case AArch64::LDRHroX:
3665 case AArch64::LDURHi:
3666 case AArch64::LDRHui:
3667 return AArch64::LDRHroW;
3668 case AArch64::STRHroX:
3669 case AArch64::STURHi:
3670 case AArch64::STRHui:
3671 return AArch64::STRHroW;
3672 case AArch64::LDRHHroX:
3673 case AArch64::LDURHHi:
3674 case AArch64::LDRHHui:
3675 return AArch64::LDRHHroW;
3676 case AArch64::STRHHroX:
3677 case AArch64::STURHHi:
3678 case AArch64::STRHHui:
3679 return AArch64::STRHHroW;
3680 case AArch64::LDRSHXroX:
3681 case AArch64::LDURSHXi:
3682 case AArch64::LDRSHXui:
3683 return AArch64::LDRSHXroW;
3684 case AArch64::LDRSHWroX:
3685 case AArch64::LDURSHWi:
3686 case AArch64::LDRSHWui:
3687 return AArch64::LDRSHWroW;
3688 case AArch64::LDRBroX:
3689 case AArch64::LDURBi:
3690 case AArch64::LDRBui:
3691 return AArch64::LDRBroW;
3692 case AArch64::LDRBBroX:
3693 case AArch64::LDURBBi:
3694 case AArch64::LDRBBui:
3695 return AArch64::LDRBBroW;
3696 case AArch64::LDRSBXroX:
3697 case AArch64::LDURSBXi:
3698 case AArch64::LDRSBXui:
3699 return AArch64::LDRSBXroW;
3700 case AArch64::LDRSBWroX:
3701 case AArch64::LDURSBWi:
3702 case AArch64::LDRSBWui:
3703 return AArch64::LDRSBWroW;
3704 case AArch64::STRBroX:
3705 case AArch64::STURBi:
3706 case AArch64::STRBui:
3707 return AArch64::STRBroW;
3708 case AArch64::STRBBroX:
3709 case AArch64::STURBBi:
3710 case AArch64::STRBBui:
3711 return AArch64::STRBBroW;
3712 }
3713}
3714
3716 const ExtAddrMode &AM) const {
3717
3718 const DebugLoc &DL = MemI.getDebugLoc();
3719 MachineBasicBlock &MBB = *MemI.getParent();
3721
3723 if (AM.ScaledReg) {
3724 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3725 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3726 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3727 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3728 .addReg(MemI.getOperand(0).getReg(),
3729 MemI.mayLoad() ? RegState::Define : 0)
3730 .addReg(AM.BaseReg)
3731 .addReg(AM.ScaledReg)
3732 .addImm(0)
3733 .addImm(AM.Scale > 1)
3734 .setMemRefs(MemI.memoperands())
3735 .setMIFlags(MemI.getFlags());
3736 return B.getInstr();
3737 }
3738
3739 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3740 "Addressing mode not supported for folding");
3741
3742 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3743 unsigned Scale = 1;
3744 unsigned Opcode = MemI.getOpcode();
3745 if (isInt<9>(AM.Displacement))
3746 Opcode = unscaledOffsetOpcode(Opcode);
3747 else
3748 Opcode = scaledOffsetOpcode(Opcode, Scale);
3749
3750 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3751 .addReg(MemI.getOperand(0).getReg(),
3752 MemI.mayLoad() ? RegState::Define : 0)
3753 .addReg(AM.BaseReg)
3754 .addImm(AM.Displacement / Scale)
3755 .setMemRefs(MemI.memoperands())
3756 .setMIFlags(MemI.getFlags());
3757 return B.getInstr();
3758 }
3759
3762 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3763 assert(AM.ScaledReg && !AM.Displacement &&
3764 "Address offset can be a register or an immediate, but not both");
3765 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3766 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3767 // Make sure the offset register is in the correct register class.
3768 Register OffsetReg = AM.ScaledReg;
3769 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3770 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3771 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3772 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3773 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3774 }
3775 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3776 .addReg(MemI.getOperand(0).getReg(),
3777 MemI.mayLoad() ? RegState::Define : 0)
3778 .addReg(AM.BaseReg)
3779 .addReg(OffsetReg)
3781 .addImm(AM.Scale != 1)
3782 .setMemRefs(MemI.memoperands())
3783 .setMIFlags(MemI.getFlags());
3784
3785 return B.getInstr();
3786 }
3787
3789 "Function must not be called with an addressing mode it can't handle");
3790}
3791
3792/// Return true if the opcode is a post-index ld/st instruction, which really
3793/// loads from base+0.
3794static bool isPostIndexLdStOpcode(unsigned Opcode) {
3795 switch (Opcode) {
3796 default:
3797 return false;
3798 case AArch64::LD1Fourv16b_POST:
3799 case AArch64::LD1Fourv1d_POST:
3800 case AArch64::LD1Fourv2d_POST:
3801 case AArch64::LD1Fourv2s_POST:
3802 case AArch64::LD1Fourv4h_POST:
3803 case AArch64::LD1Fourv4s_POST:
3804 case AArch64::LD1Fourv8b_POST:
3805 case AArch64::LD1Fourv8h_POST:
3806 case AArch64::LD1Onev16b_POST:
3807 case AArch64::LD1Onev1d_POST:
3808 case AArch64::LD1Onev2d_POST:
3809 case AArch64::LD1Onev2s_POST:
3810 case AArch64::LD1Onev4h_POST:
3811 case AArch64::LD1Onev4s_POST:
3812 case AArch64::LD1Onev8b_POST:
3813 case AArch64::LD1Onev8h_POST:
3814 case AArch64::LD1Rv16b_POST:
3815 case AArch64::LD1Rv1d_POST:
3816 case AArch64::LD1Rv2d_POST:
3817 case AArch64::LD1Rv2s_POST:
3818 case AArch64::LD1Rv4h_POST:
3819 case AArch64::LD1Rv4s_POST:
3820 case AArch64::LD1Rv8b_POST:
3821 case AArch64::LD1Rv8h_POST:
3822 case AArch64::LD1Threev16b_POST:
3823 case AArch64::LD1Threev1d_POST:
3824 case AArch64::LD1Threev2d_POST:
3825 case AArch64::LD1Threev2s_POST:
3826 case AArch64::LD1Threev4h_POST:
3827 case AArch64::LD1Threev4s_POST:
3828 case AArch64::LD1Threev8b_POST:
3829 case AArch64::LD1Threev8h_POST:
3830 case AArch64::LD1Twov16b_POST:
3831 case AArch64::LD1Twov1d_POST:
3832 case AArch64::LD1Twov2d_POST:
3833 case AArch64::LD1Twov2s_POST:
3834 case AArch64::LD1Twov4h_POST:
3835 case AArch64::LD1Twov4s_POST:
3836 case AArch64::LD1Twov8b_POST:
3837 case AArch64::LD1Twov8h_POST:
3838 case AArch64::LD1i16_POST:
3839 case AArch64::LD1i32_POST:
3840 case AArch64::LD1i64_POST:
3841 case AArch64::LD1i8_POST:
3842 case AArch64::LD2Rv16b_POST:
3843 case AArch64::LD2Rv1d_POST:
3844 case AArch64::LD2Rv2d_POST:
3845 case AArch64::LD2Rv2s_POST:
3846 case AArch64::LD2Rv4h_POST:
3847 case AArch64::LD2Rv4s_POST:
3848 case AArch64::LD2Rv8b_POST:
3849 case AArch64::LD2Rv8h_POST:
3850 case AArch64::LD2Twov16b_POST:
3851 case AArch64::LD2Twov2d_POST:
3852 case AArch64::LD2Twov2s_POST:
3853 case AArch64::LD2Twov4h_POST:
3854 case AArch64::LD2Twov4s_POST:
3855 case AArch64::LD2Twov8b_POST:
3856 case AArch64::LD2Twov8h_POST:
3857 case AArch64::LD2i16_POST:
3858 case AArch64::LD2i32_POST:
3859 case AArch64::LD2i64_POST:
3860 case AArch64::LD2i8_POST:
3861 case AArch64::LD3Rv16b_POST:
3862 case AArch64::LD3Rv1d_POST:
3863 case AArch64::LD3Rv2d_POST:
3864 case AArch64::LD3Rv2s_POST:
3865 case AArch64::LD3Rv4h_POST:
3866 case AArch64::LD3Rv4s_POST:
3867 case AArch64::LD3Rv8b_POST:
3868 case AArch64::LD3Rv8h_POST:
3869 case AArch64::LD3Threev16b_POST:
3870 case AArch64::LD3Threev2d_POST:
3871 case AArch64::LD3Threev2s_POST:
3872 case AArch64::LD3Threev4h_POST:
3873 case AArch64::LD3Threev4s_POST:
3874 case AArch64::LD3Threev8b_POST:
3875 case AArch64::LD3Threev8h_POST:
3876 case AArch64::LD3i16_POST:
3877 case AArch64::LD3i32_POST:
3878 case AArch64::LD3i64_POST:
3879 case AArch64::LD3i8_POST:
3880 case AArch64::LD4Fourv16b_POST:
3881 case AArch64::LD4Fourv2d_POST:
3882 case AArch64::LD4Fourv2s_POST:
3883 case AArch64::LD4Fourv4h_POST:
3884 case AArch64::LD4Fourv4s_POST:
3885 case AArch64::LD4Fourv8b_POST:
3886 case AArch64::LD4Fourv8h_POST:
3887 case AArch64::LD4Rv16b_POST:
3888 case AArch64::LD4Rv1d_POST:
3889 case AArch64::LD4Rv2d_POST:
3890 case AArch64::LD4Rv2s_POST:
3891 case AArch64::LD4Rv4h_POST:
3892 case AArch64::LD4Rv4s_POST:
3893 case AArch64::LD4Rv8b_POST:
3894 case AArch64::LD4Rv8h_POST:
3895 case AArch64::LD4i16_POST:
3896 case AArch64::LD4i32_POST:
3897 case AArch64::LD4i64_POST:
3898 case AArch64::LD4i8_POST:
3899 case AArch64::LDAPRWpost:
3900 case AArch64::LDAPRXpost:
3901 case AArch64::LDIAPPWpost:
3902 case AArch64::LDIAPPXpost:
3903 case AArch64::LDPDpost:
3904 case AArch64::LDPQpost:
3905 case AArch64::LDPSWpost:
3906 case AArch64::LDPSpost:
3907 case AArch64::LDPWpost:
3908 case AArch64::LDPXpost:
3909 case AArch64::LDRBBpost:
3910 case AArch64::LDRBpost:
3911 case AArch64::LDRDpost:
3912 case AArch64::LDRHHpost:
3913 case AArch64::LDRHpost:
3914 case AArch64::LDRQpost:
3915 case AArch64::LDRSBWpost:
3916 case AArch64::LDRSBXpost:
3917 case AArch64::LDRSHWpost:
3918 case AArch64::LDRSHXpost:
3919 case AArch64::LDRSWpost:
3920 case AArch64::LDRSpost:
3921 case AArch64::LDRWpost:
3922 case AArch64::LDRXpost:
3923 case AArch64::ST1Fourv16b_POST:
3924 case AArch64::ST1Fourv1d_POST:
3925 case AArch64::ST1Fourv2d_POST:
3926 case AArch64::ST1Fourv2s_POST:
3927 case AArch64::ST1Fourv4h_POST:
3928 case AArch64::ST1Fourv4s_POST:
3929 case AArch64::ST1Fourv8b_POST:
3930 case AArch64::ST1Fourv8h_POST:
3931 case AArch64::ST1Onev16b_POST:
3932 case AArch64::ST1Onev1d_POST:
3933 case AArch64::ST1Onev2d_POST:
3934 case AArch64::ST1Onev2s_POST:
3935 case AArch64::ST1Onev4h_POST:
3936 case AArch64::ST1Onev4s_POST:
3937 case AArch64::ST1Onev8b_POST:
3938 case AArch64::ST1Onev8h_POST:
3939 case AArch64::ST1Threev16b_POST:
3940 case AArch64::ST1Threev1d_POST:
3941 case AArch64::ST1Threev2d_POST:
3942 case AArch64::ST1Threev2s_POST:
3943 case AArch64::ST1Threev4h_POST:
3944 case AArch64::ST1Threev4s_POST:
3945 case AArch64::ST1Threev8b_POST:
3946 case AArch64::ST1Threev8h_POST:
3947 case AArch64::ST1Twov16b_POST:
3948 case AArch64::ST1Twov1d_POST:
3949 case AArch64::ST1Twov2d_POST:
3950 case AArch64::ST1Twov2s_POST:
3951 case AArch64::ST1Twov4h_POST:
3952 case AArch64::ST1Twov4s_POST:
3953 case AArch64::ST1Twov8b_POST:
3954 case AArch64::ST1Twov8h_POST:
3955 case AArch64::ST1i16_POST:
3956 case AArch64::ST1i32_POST:
3957 case AArch64::ST1i64_POST:
3958 case AArch64::ST1i8_POST:
3959 case AArch64::ST2GPostIndex:
3960 case AArch64::ST2Twov16b_POST:
3961 case AArch64::ST2Twov2d_POST:
3962 case AArch64::ST2Twov2s_POST:
3963 case AArch64::ST2Twov4h_POST:
3964 case AArch64::ST2Twov4s_POST:
3965 case AArch64::ST2Twov8b_POST:
3966 case AArch64::ST2Twov8h_POST:
3967 case AArch64::ST2i16_POST:
3968 case AArch64::ST2i32_POST:
3969 case AArch64::ST2i64_POST:
3970 case AArch64::ST2i8_POST:
3971 case AArch64::ST3Threev16b_POST:
3972 case AArch64::ST3Threev2d_POST:
3973 case AArch64::ST3Threev2s_POST:
3974 case AArch64::ST3Threev4h_POST:
3975 case AArch64::ST3Threev4s_POST:
3976 case AArch64::ST3Threev8b_POST:
3977 case AArch64::ST3Threev8h_POST:
3978 case AArch64::ST3i16_POST:
3979 case AArch64::ST3i32_POST:
3980 case AArch64::ST3i64_POST:
3981 case AArch64::ST3i8_POST:
3982 case AArch64::ST4Fourv16b_POST:
3983 case AArch64::ST4Fourv2d_POST:
3984 case AArch64::ST4Fourv2s_POST:
3985 case AArch64::ST4Fourv4h_POST:
3986 case AArch64::ST4Fourv4s_POST:
3987 case AArch64::ST4Fourv8b_POST:
3988 case AArch64::ST4Fourv8h_POST:
3989 case AArch64::ST4i16_POST:
3990 case AArch64::ST4i32_POST:
3991 case AArch64::ST4i64_POST:
3992 case AArch64::ST4i8_POST:
3993 case AArch64::STGPostIndex:
3994 case AArch64::STGPpost:
3995 case AArch64::STPDpost:
3996 case AArch64::STPQpost:
3997 case AArch64::STPSpost:
3998 case AArch64::STPWpost:
3999 case AArch64::STPXpost:
4000 case AArch64::STRBBpost:
4001 case AArch64::STRBpost:
4002 case AArch64::STRDpost:
4003 case AArch64::STRHHpost:
4004 case AArch64::STRHpost:
4005 case AArch64::STRQpost:
4006 case AArch64::STRSpost:
4007 case AArch64::STRWpost:
4008 case AArch64::STRXpost:
4009 case AArch64::STZ2GPostIndex:
4010 case AArch64::STZGPostIndex:
4011 return true;
4012 }
4013}
4014
4016 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4017 bool &OffsetIsScalable, TypeSize &Width,
4018 const TargetRegisterInfo *TRI) const {
4019 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4020 // Handle only loads/stores with base register followed by immediate offset.
4021 if (LdSt.getNumExplicitOperands() == 3) {
4022 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4023 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4024 !LdSt.getOperand(2).isImm())
4025 return false;
4026 } else if (LdSt.getNumExplicitOperands() == 4) {
4027 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4028 if (!LdSt.getOperand(1).isReg() ||
4029 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4030 !LdSt.getOperand(3).isImm())
4031 return false;
4032 } else
4033 return false;
4034
4035 // Get the scaling factor for the instruction and set the width for the
4036 // instruction.
4037 TypeSize Scale(0U, false);
4038 int64_t Dummy1, Dummy2;
4039
4040 // If this returns false, then it's an instruction we don't want to handle.
4041 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4042 return false;
4043
4044 // Compute the offset. Offset is calculated as the immediate operand
4045 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4046 // set to 1. Postindex are a special case which have an offset of 0.
4047 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4048 BaseOp = &LdSt.getOperand(2);
4049 Offset = 0;
4050 } else if (LdSt.getNumExplicitOperands() == 3) {
4051 BaseOp = &LdSt.getOperand(1);
4052 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4053 } else {
4054 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4055 BaseOp = &LdSt.getOperand(2);
4056 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4057 }
4058 OffsetIsScalable = Scale.isScalable();
4059
4060 return BaseOp->isReg() || BaseOp->isFI();
4061}
4062
4065 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4066 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4067 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4068 return OfsOp;
4069}
4070
4071bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4072 TypeSize &Width, int64_t &MinOffset,
4073 int64_t &MaxOffset) {
4074 switch (Opcode) {
4075 // Not a memory operation or something we want to handle.
4076 default:
4077 Scale = TypeSize::getFixed(0);
4078 Width = TypeSize::getFixed(0);
4079 MinOffset = MaxOffset = 0;
4080 return false;
4081 // LDR / STR
4082 case AArch64::LDRQui:
4083 case AArch64::STRQui:
4084 Scale = TypeSize::getFixed(16);
4085 Width = TypeSize::getFixed(16);
4086 MinOffset = 0;
4087 MaxOffset = 4095;
4088 break;
4089 case AArch64::LDRXui:
4090 case AArch64::LDRDui:
4091 case AArch64::STRXui:
4092 case AArch64::STRDui:
4093 case AArch64::PRFMui:
4094 Scale = TypeSize::getFixed(8);
4095 Width = TypeSize::getFixed(8);
4096 MinOffset = 0;
4097 MaxOffset = 4095;
4098 break;
4099 case AArch64::LDRWui:
4100 case AArch64::LDRSui:
4101 case AArch64::LDRSWui:
4102 case AArch64::STRWui:
4103 case AArch64::STRSui:
4104 Scale = TypeSize::getFixed(4);
4105 Width = TypeSize::getFixed(4);
4106 MinOffset = 0;
4107 MaxOffset = 4095;
4108 break;
4109 case AArch64::LDRHui:
4110 case AArch64::LDRHHui:
4111 case AArch64::LDRSHWui:
4112 case AArch64::LDRSHXui:
4113 case AArch64::STRHui:
4114 case AArch64::STRHHui:
4115 Scale = TypeSize::getFixed(2);
4116 Width = TypeSize::getFixed(2);
4117 MinOffset = 0;
4118 MaxOffset = 4095;
4119 break;
4120 case AArch64::LDRBui:
4121 case AArch64::LDRBBui:
4122 case AArch64::LDRSBWui:
4123 case AArch64::LDRSBXui:
4124 case AArch64::STRBui:
4125 case AArch64::STRBBui:
4126 Scale = TypeSize::getFixed(1);
4127 Width = TypeSize::getFixed(1);
4128 MinOffset = 0;
4129 MaxOffset = 4095;
4130 break;
4131 // post/pre inc
4132 case AArch64::STRQpre:
4133 case AArch64::LDRQpost:
4134 Scale = TypeSize::getFixed(1);
4135 Width = TypeSize::getFixed(16);
4136 MinOffset = -256;
4137 MaxOffset = 255;
4138 break;
4139 case AArch64::LDRDpost:
4140 case AArch64::LDRDpre:
4141 case AArch64::LDRXpost:
4142 case AArch64::LDRXpre:
4143 case AArch64::STRDpost:
4144 case AArch64::STRDpre:
4145 case AArch64::STRXpost:
4146 case AArch64::STRXpre:
4147 Scale = TypeSize::getFixed(1);
4148 Width = TypeSize::getFixed(8);
4149 MinOffset = -256;
4150 MaxOffset = 255;
4151 break;
4152 case AArch64::STRWpost:
4153 case AArch64::STRWpre:
4154 case AArch64::LDRWpost:
4155 case AArch64::LDRWpre:
4156 case AArch64::STRSpost:
4157 case AArch64::STRSpre:
4158 case AArch64::LDRSpost:
4159 case AArch64::LDRSpre:
4160 Scale = TypeSize::getFixed(1);
4161 Width = TypeSize::getFixed(4);
4162 MinOffset = -256;
4163 MaxOffset = 255;
4164 break;
4165 case AArch64::LDRHpost:
4166 case AArch64::LDRHpre:
4167 case AArch64::STRHpost:
4168 case AArch64::STRHpre:
4169 case AArch64::LDRHHpost:
4170 case AArch64::LDRHHpre:
4171 case AArch64::STRHHpost:
4172 case AArch64::STRHHpre:
4173 Scale = TypeSize::getFixed(1);
4174 Width = TypeSize::getFixed(2);
4175 MinOffset = -256;
4176 MaxOffset = 255;
4177 break;
4178 case AArch64::LDRBpost:
4179 case AArch64::LDRBpre:
4180 case AArch64::STRBpost:
4181 case AArch64::STRBpre:
4182 case AArch64::LDRBBpost:
4183 case AArch64::LDRBBpre:
4184 case AArch64::STRBBpost:
4185 case AArch64::STRBBpre:
4186 Scale = TypeSize::getFixed(1);
4187 Width = TypeSize::getFixed(1);
4188 MinOffset = -256;
4189 MaxOffset = 255;
4190 break;
4191 // Unscaled
4192 case AArch64::LDURQi:
4193 case AArch64::STURQi:
4194 Scale = TypeSize::getFixed(1);
4195 Width = TypeSize::getFixed(16);
4196 MinOffset = -256;
4197 MaxOffset = 255;
4198 break;
4199 case AArch64::LDURXi:
4200 case AArch64::LDURDi:
4201 case AArch64::LDAPURXi:
4202 case AArch64::STURXi:
4203 case AArch64::STURDi:
4204 case AArch64::STLURXi:
4205 case AArch64::PRFUMi:
4206 Scale = TypeSize::getFixed(1);
4207 Width = TypeSize::getFixed(8);
4208 MinOffset = -256;
4209 MaxOffset = 255;
4210 break;
4211 case AArch64::LDURWi:
4212 case AArch64::LDURSi:
4213 case AArch64::LDURSWi:
4214 case AArch64::LDAPURi:
4215 case AArch64::LDAPURSWi:
4216 case AArch64::STURWi:
4217 case AArch64::STURSi:
4218 case AArch64::STLURWi:
4219 Scale = TypeSize::getFixed(1);
4220 Width = TypeSize::getFixed(4);
4221 MinOffset = -256;
4222 MaxOffset = 255;
4223 break;
4224 case AArch64::LDURHi:
4225 case AArch64::LDURHHi:
4226 case AArch64::LDURSHXi:
4227 case AArch64::LDURSHWi:
4228 case AArch64::LDAPURHi:
4229 case AArch64::LDAPURSHWi:
4230 case AArch64::LDAPURSHXi:
4231 case AArch64::STURHi:
4232 case AArch64::STURHHi:
4233 case AArch64::STLURHi:
4234 Scale = TypeSize::getFixed(1);
4235 Width = TypeSize::getFixed(2);
4236 MinOffset = -256;
4237 MaxOffset = 255;
4238 break;
4239 case AArch64::LDURBi:
4240 case AArch64::LDURBBi:
4241 case AArch64::LDURSBXi:
4242 case AArch64::LDURSBWi:
4243 case AArch64::LDAPURBi:
4244 case AArch64::LDAPURSBWi:
4245 case AArch64::LDAPURSBXi:
4246 case AArch64::STURBi:
4247 case AArch64::STURBBi:
4248 case AArch64::STLURBi:
4249 Scale = TypeSize::getFixed(1);
4250 Width = TypeSize::getFixed(1);
4251 MinOffset = -256;
4252 MaxOffset = 255;
4253 break;
4254 // LDP / STP (including pre/post inc)
4255 case AArch64::LDPQi:
4256 case AArch64::LDNPQi:
4257 case AArch64::STPQi:
4258 case AArch64::STNPQi:
4259 case AArch64::LDPQpost:
4260 case AArch64::LDPQpre:
4261 case AArch64::STPQpost:
4262 case AArch64::STPQpre:
4263 Scale = TypeSize::getFixed(16);
4264 Width = TypeSize::getFixed(16 * 2);
4265 MinOffset = -64;
4266 MaxOffset = 63;
4267 break;
4268 case AArch64::LDPXi:
4269 case AArch64::LDPDi:
4270 case AArch64::LDNPXi:
4271 case AArch64::LDNPDi:
4272 case AArch64::STPXi:
4273 case AArch64::STPDi:
4274 case AArch64::STNPXi:
4275 case AArch64::STNPDi:
4276 case AArch64::LDPDpost:
4277 case AArch64::LDPDpre:
4278 case AArch64::LDPXpost:
4279 case AArch64::LDPXpre:
4280 case AArch64::STPDpost:
4281 case AArch64::STPDpre:
4282 case AArch64::STPXpost:
4283 case AArch64::STPXpre:
4284 Scale = TypeSize::getFixed(8);
4285 Width = TypeSize::getFixed(8 * 2);
4286 MinOffset = -64;
4287 MaxOffset = 63;
4288 break;
4289 case AArch64::LDPWi:
4290 case AArch64::LDPSi:
4291 case AArch64::LDNPWi:
4292 case AArch64::LDNPSi:
4293 case AArch64::STPWi:
4294 case AArch64::STPSi:
4295 case AArch64::STNPWi:
4296 case AArch64::STNPSi:
4297 case AArch64::LDPSpost:
4298 case AArch64::LDPSpre:
4299 case AArch64::LDPWpost:
4300 case AArch64::LDPWpre:
4301 case AArch64::STPSpost:
4302 case AArch64::STPSpre:
4303 case AArch64::STPWpost:
4304 case AArch64::STPWpre:
4305 Scale = TypeSize::getFixed(4);
4306 Width = TypeSize::getFixed(4 * 2);
4307 MinOffset = -64;
4308 MaxOffset = 63;
4309 break;
4310 case AArch64::StoreSwiftAsyncContext:
4311 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4312 Scale = TypeSize::getFixed(1);
4313 Width = TypeSize::getFixed(8);
4314 MinOffset = 0;
4315 MaxOffset = 4095;
4316 break;
4317 case AArch64::ADDG:
4318 Scale = TypeSize::getFixed(16);
4319 Width = TypeSize::getFixed(0);
4320 MinOffset = 0;
4321 MaxOffset = 63;
4322 break;
4323 case AArch64::TAGPstack:
4324 Scale = TypeSize::getFixed(16);
4325 Width = TypeSize::getFixed(0);
4326 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4327 // of 63 (not 64!).
4328 MinOffset = -63;
4329 MaxOffset = 63;
4330 break;
4331 case AArch64::LDG:
4332 case AArch64::STGi:
4333 case AArch64::STGPreIndex:
4334 case AArch64::STGPostIndex:
4335 case AArch64::STZGi:
4336 case AArch64::STZGPreIndex:
4337 case AArch64::STZGPostIndex:
4338 Scale = TypeSize::getFixed(16);
4339 Width = TypeSize::getFixed(16);
4340 MinOffset = -256;
4341 MaxOffset = 255;
4342 break;
4343 // SVE
4344 case AArch64::STR_ZZZZXI:
4345 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4346 case AArch64::LDR_ZZZZXI:
4347 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4348 Scale = TypeSize::getScalable(16);
4349 Width = TypeSize::getScalable(16 * 4);
4350 MinOffset = -256;
4351 MaxOffset = 252;
4352 break;
4353 case AArch64::STR_ZZZXI:
4354 case AArch64::LDR_ZZZXI:
4355 Scale = TypeSize::getScalable(16);
4356 Width = TypeSize::getScalable(16 * 3);
4357 MinOffset = -256;
4358 MaxOffset = 253;
4359 break;
4360 case AArch64::STR_ZZXI:
4361 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4362 case AArch64::LDR_ZZXI:
4363 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4364 Scale = TypeSize::getScalable(16);
4365 Width = TypeSize::getScalable(16 * 2);
4366 MinOffset = -256;
4367 MaxOffset = 254;
4368 break;
4369 case AArch64::LDR_PXI:
4370 case AArch64::STR_PXI:
4371 Scale = TypeSize::getScalable(2);
4372 Width = TypeSize::getScalable(2);
4373 MinOffset = -256;
4374 MaxOffset = 255;
4375 break;
4376 case AArch64::LDR_PPXI:
4377 case AArch64::STR_PPXI:
4378 Scale = TypeSize::getScalable(2);
4379 Width = TypeSize::getScalable(2 * 2);
4380 MinOffset = -256;
4381 MaxOffset = 254;
4382 break;
4383 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4384 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4385 case AArch64::LDR_ZXI:
4386 case AArch64::STR_ZXI:
4387 Scale = TypeSize::getScalable(16);
4388 Width = TypeSize::getScalable(16);
4389 MinOffset = -256;
4390 MaxOffset = 255;
4391 break;
4392 case AArch64::LD1B_IMM:
4393 case AArch64::LD1H_IMM:
4394 case AArch64::LD1W_IMM:
4395 case AArch64::LD1D_IMM:
4396 case AArch64::LDNT1B_ZRI:
4397 case AArch64::LDNT1H_ZRI:
4398 case AArch64::LDNT1W_ZRI:
4399 case AArch64::LDNT1D_ZRI:
4400 case AArch64::ST1B_IMM:
4401 case AArch64::ST1H_IMM:
4402 case AArch64::ST1W_IMM:
4403 case AArch64::ST1D_IMM:
4404 case AArch64::STNT1B_ZRI:
4405 case AArch64::STNT1H_ZRI:
4406 case AArch64::STNT1W_ZRI:
4407 case AArch64::STNT1D_ZRI:
4408 case AArch64::LDNF1B_IMM:
4409 case AArch64::LDNF1H_IMM:
4410 case AArch64::LDNF1W_IMM:
4411 case AArch64::LDNF1D_IMM:
4412 // A full vectors worth of data
4413 // Width = mbytes * elements
4414 Scale = TypeSize::getScalable(16);
4415 Width = TypeSize::getScalable(16);
4416 MinOffset = -8;
4417 MaxOffset = 7;
4418 break;
4419 case AArch64::LD2B_IMM:
4420 case AArch64::LD2H_IMM:
4421 case AArch64::LD2W_IMM:
4422 case AArch64::LD2D_IMM:
4423 case AArch64::ST2B_IMM:
4424 case AArch64::ST2H_IMM:
4425 case AArch64::ST2W_IMM:
4426 case AArch64::ST2D_IMM:
4427 Scale = TypeSize::getScalable(32);
4428 Width = TypeSize::getScalable(16 * 2);
4429 MinOffset = -8;
4430 MaxOffset = 7;
4431 break;
4432 case AArch64::LD3B_IMM:
4433 case AArch64::LD3H_IMM:
4434 case AArch64::LD3W_IMM:
4435 case AArch64::LD3D_IMM:
4436 case AArch64::ST3B_IMM:
4437 case AArch64::ST3H_IMM:
4438 case AArch64::ST3W_IMM:
4439 case AArch64::ST3D_IMM:
4440 Scale = TypeSize::getScalable(48);
4441 Width = TypeSize::getScalable(16 * 3);
4442 MinOffset = -8;
4443 MaxOffset = 7;
4444 break;
4445 case AArch64::LD4B_IMM:
4446 case AArch64::LD4H_IMM:
4447 case AArch64::LD4W_IMM:
4448 case AArch64::LD4D_IMM:
4449 case AArch64::ST4B_IMM:
4450 case AArch64::ST4H_IMM:
4451 case AArch64::ST4W_IMM:
4452 case AArch64::ST4D_IMM:
4453 Scale = TypeSize::getScalable(64);
4454 Width = TypeSize::getScalable(16 * 4);
4455 MinOffset = -8;
4456 MaxOffset = 7;
4457 break;
4458 case AArch64::LD1B_H_IMM:
4459 case AArch64::LD1SB_H_IMM:
4460 case AArch64::LD1H_S_IMM:
4461 case AArch64::LD1SH_S_IMM:
4462 case AArch64::LD1W_D_IMM:
4463 case AArch64::LD1SW_D_IMM:
4464 case AArch64::ST1B_H_IMM:
4465 case AArch64::ST1H_S_IMM:
4466 case AArch64::ST1W_D_IMM:
4467 case AArch64::LDNF1B_H_IMM:
4468 case AArch64::LDNF1SB_H_IMM:
4469 case AArch64::LDNF1H_S_IMM:
4470 case AArch64::LDNF1SH_S_IMM:
4471 case AArch64::LDNF1W_D_IMM:
4472 case AArch64::LDNF1SW_D_IMM:
4473 // A half vector worth of data
4474 // Width = mbytes * elements
4475 Scale = TypeSize::getScalable(8);
4476 Width = TypeSize::getScalable(8);
4477 MinOffset = -8;
4478 MaxOffset = 7;
4479 break;
4480 case AArch64::LD1B_S_IMM:
4481 case AArch64::LD1SB_S_IMM:
4482 case AArch64::LD1H_D_IMM:
4483 case AArch64::LD1SH_D_IMM:
4484 case AArch64::ST1B_S_IMM:
4485 case AArch64::ST1H_D_IMM:
4486 case AArch64::LDNF1B_S_IMM:
4487 case AArch64::LDNF1SB_S_IMM:
4488 case AArch64::LDNF1H_D_IMM:
4489 case AArch64::LDNF1SH_D_IMM:
4490 // A quarter vector worth of data
4491 // Width = mbytes * elements
4492 Scale = TypeSize::getScalable(4);
4493 Width = TypeSize::getScalable(4);
4494 MinOffset = -8;
4495 MaxOffset = 7;
4496 break;
4497 case AArch64::LD1B_D_IMM:
4498 case AArch64::LD1SB_D_IMM:
4499 case AArch64::ST1B_D_IMM:
4500 case AArch64::LDNF1B_D_IMM:
4501 case AArch64::LDNF1SB_D_IMM:
4502 // A eighth vector worth of data
4503 // Width = mbytes * elements
4504 Scale = TypeSize::getScalable(2);
4505 Width = TypeSize::getScalable(2);
4506 MinOffset = -8;
4507 MaxOffset = 7;
4508 break;
4509 case AArch64::ST2Gi:
4510 case AArch64::ST2GPreIndex:
4511 case AArch64::ST2GPostIndex:
4512 case AArch64::STZ2Gi:
4513 case AArch64::STZ2GPreIndex:
4514 case AArch64::STZ2GPostIndex:
4515 Scale = TypeSize::getFixed(16);
4516 Width = TypeSize::getFixed(32);
4517 MinOffset = -256;
4518 MaxOffset = 255;
4519 break;
4520 case AArch64::STGPi:
4521 case AArch64::STGPpost:
4522 case AArch64::STGPpre:
4523 Scale = TypeSize::getFixed(16);
4524 Width = TypeSize::getFixed(16);
4525 MinOffset = -64;
4526 MaxOffset = 63;
4527 break;
4528 case AArch64::LD1RB_IMM:
4529 case AArch64::LD1RB_H_IMM:
4530 case AArch64::LD1RB_S_IMM:
4531 case AArch64::LD1RB_D_IMM:
4532 case AArch64::LD1RSB_H_IMM:
4533 case AArch64::LD1RSB_S_IMM:
4534 case AArch64::LD1RSB_D_IMM:
4535 Scale = TypeSize::getFixed(1);
4536 Width = TypeSize::getFixed(1);
4537 MinOffset = 0;
4538 MaxOffset = 63;
4539 break;
4540 case AArch64::LD1RH_IMM:
4541 case AArch64::LD1RH_S_IMM:
4542 case AArch64::LD1RH_D_IMM:
4543 case AArch64::LD1RSH_S_IMM:
4544 case AArch64::LD1RSH_D_IMM:
4545 Scale = TypeSize::getFixed(2);
4546 Width = TypeSize::getFixed(2);
4547 MinOffset = 0;
4548 MaxOffset = 63;
4549 break;
4550 case AArch64::LD1RW_IMM:
4551 case AArch64::LD1RW_D_IMM:
4552 case AArch64::LD1RSW_IMM:
4553 Scale = TypeSize::getFixed(4);
4554 Width = TypeSize::getFixed(4);
4555 MinOffset = 0;
4556 MaxOffset = 63;
4557 break;
4558 case AArch64::LD1RD_IMM:
4559 Scale = TypeSize::getFixed(8);
4560 Width = TypeSize::getFixed(8);
4561 MinOffset = 0;
4562 MaxOffset = 63;
4563 break;
4564 }
4565
4566 return true;
4567}
4568
4569// Scaling factor for unscaled load or store.
4571 switch (Opc) {
4572 default:
4573 llvm_unreachable("Opcode has unknown scale!");
4574 case AArch64::LDRBBui:
4575 case AArch64::LDURBBi:
4576 case AArch64::LDRSBWui:
4577 case AArch64::LDURSBWi:
4578 case AArch64::STRBBui:
4579 case AArch64::STURBBi:
4580 return 1;
4581 case AArch64::LDRHHui:
4582 case AArch64::LDURHHi:
4583 case AArch64::LDRSHWui:
4584 case AArch64::LDURSHWi:
4585 case AArch64::STRHHui:
4586 case AArch64::STURHHi:
4587 return 2;
4588 case AArch64::LDRSui:
4589 case AArch64::LDURSi:
4590 case AArch64::LDRSpre:
4591 case AArch64::LDRSWui:
4592 case AArch64::LDURSWi:
4593 case AArch64::LDRSWpre:
4594 case AArch64::LDRWpre:
4595 case AArch64::LDRWui:
4596 case AArch64::LDURWi:
4597 case AArch64::STRSui:
4598 case AArch64::STURSi:
4599 case AArch64::STRSpre:
4600 case AArch64::STRWui:
4601 case AArch64::STURWi:
4602 case AArch64::STRWpre:
4603 case AArch64::LDPSi:
4604 case AArch64::LDPSWi:
4605 case AArch64::LDPWi:
4606 case AArch64::STPSi:
4607 case AArch64::STPWi:
4608 return 4;
4609 case AArch64::LDRDui:
4610 case AArch64::LDURDi:
4611 case AArch64::LDRDpre:
4612 case AArch64::LDRXui:
4613 case AArch64::LDURXi:
4614 case AArch64::LDRXpre:
4615 case AArch64::STRDui:
4616 case AArch64::STURDi:
4617 case AArch64::STRDpre:
4618 case AArch64::STRXui:
4619 case AArch64::STURXi:
4620 case AArch64::STRXpre:
4621 case AArch64::LDPDi:
4622 case AArch64::LDPXi:
4623 case AArch64::STPDi:
4624 case AArch64::STPXi:
4625 return 8;
4626 case AArch64::LDRQui:
4627 case AArch64::LDURQi:
4628 case AArch64::STRQui:
4629 case AArch64::STURQi:
4630 case AArch64::STRQpre:
4631 case AArch64::LDPQi:
4632 case AArch64::LDRQpre:
4633 case AArch64::STPQi:
4634 case AArch64::STGi:
4635 case AArch64::STZGi:
4636 case AArch64::ST2Gi:
4637 case AArch64::STZ2Gi:
4638 case AArch64::STGPi:
4639 return 16;
4640 }
4641}
4642
4644 switch (MI.getOpcode()) {
4645 default:
4646 return false;
4647 case AArch64::LDRWpre:
4648 case AArch64::LDRXpre:
4649 case AArch64::LDRSWpre:
4650 case AArch64::LDRSpre:
4651 case AArch64::LDRDpre:
4652 case AArch64::LDRQpre:
4653 return true;
4654 }
4655}
4656
4658 switch (MI.getOpcode()) {
4659 default:
4660 return false;
4661 case AArch64::STRWpre:
4662 case AArch64::STRXpre:
4663 case AArch64::STRSpre:
4664 case AArch64::STRDpre:
4665 case AArch64::STRQpre:
4666 return true;
4667 }
4668}
4669
4671 return isPreLd(MI) || isPreSt(MI);
4672}
4673
4675 switch (MI.getOpcode()) {
4676 default:
4677 return false;
4678 case AArch64::LDPSi:
4679 case AArch64::LDPSWi:
4680 case AArch64::LDPDi:
4681 case AArch64::LDPQi:
4682 case AArch64::LDPWi:
4683 case AArch64::LDPXi:
4684 case AArch64::STPSi:
4685 case AArch64::STPDi:
4686 case AArch64::STPQi:
4687 case AArch64::STPWi:
4688 case AArch64::STPXi:
4689 case AArch64::STGPi:
4690 return true;
4691 }
4692}
4693
4695 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4696 unsigned Idx =
4698 : 1;
4699 return MI.getOperand(Idx);
4700}
4701
4702const MachineOperand &
4704 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4705 unsigned Idx =
4707 : 2;
4708 return MI.getOperand(Idx);
4709}
4710
4711const MachineOperand &
4713 switch (MI.getOpcode()) {
4714 default:
4715 llvm_unreachable("Unexpected opcode");
4716 case AArch64::LDRBroX:
4717 case AArch64::LDRBBroX:
4718 case AArch64::LDRSBXroX:
4719 case AArch64::LDRSBWroX:
4720 case AArch64::LDRHroX:
4721 case AArch64::LDRHHroX:
4722 case AArch64::LDRSHXroX:
4723 case AArch64::LDRSHWroX:
4724 case AArch64::LDRWroX:
4725 case AArch64::LDRSroX:
4726 case AArch64::LDRSWroX:
4727 case AArch64::LDRDroX:
4728 case AArch64::LDRXroX:
4729 case AArch64::LDRQroX:
4730 return MI.getOperand(4);
4731 }
4732}
4733
4735 Register Reg) {
4736 if (MI.getParent() == nullptr)
4737 return nullptr;
4738 const MachineFunction *MF = MI.getParent()->getParent();
4739 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4740}
4741
4743 auto IsHFPR = [&](const MachineOperand &Op) {
4744 if (!Op.isReg())
4745 return false;
4746 auto Reg = Op.getReg();
4747 if (Reg.isPhysical())
4748 return AArch64::FPR16RegClass.contains(Reg);
4749 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4750 return TRC == &AArch64::FPR16RegClass ||
4751 TRC == &AArch64::FPR16_loRegClass;
4752 };
4753 return llvm::any_of(MI.operands(), IsHFPR);
4754}
4755
4757 auto IsQFPR = [&](const MachineOperand &Op) {
4758 if (!Op.isReg())
4759 return false;
4760 auto Reg = Op.getReg();
4761 if (Reg.isPhysical())
4762 return AArch64::FPR128RegClass.contains(Reg);
4763 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4764 return TRC == &AArch64::FPR128RegClass ||
4765 TRC == &AArch64::FPR128_loRegClass;
4766 };
4767 return llvm::any_of(MI.operands(), IsQFPR);
4768}
4769
4771 switch (MI.getOpcode()) {
4772 case AArch64::BRK:
4773 case AArch64::HLT:
4774 case AArch64::PACIASP:
4775 case AArch64::PACIBSP:
4776 // Implicit BTI behavior.
4777 return true;
4778 case AArch64::PAUTH_PROLOGUE:
4779 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4780 return true;
4781 case AArch64::HINT: {
4782 unsigned Imm = MI.getOperand(0).getImm();
4783 // Explicit BTI instruction.
4784 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4785 return true;
4786 // PACI(A|B)SP instructions.
4787 if (Imm == 25 || Imm == 27)
4788 return true;
4789 return false;
4790 }
4791 default:
4792 return false;
4793 }
4794}
4795
4797 if (Reg == 0)
4798 return false;
4799 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4800 return AArch64::FPR128RegClass.contains(Reg) ||
4801 AArch64::FPR64RegClass.contains(Reg) ||
4802 AArch64::FPR32RegClass.contains(Reg) ||
4803 AArch64::FPR16RegClass.contains(Reg) ||
4804 AArch64::FPR8RegClass.contains(Reg);
4805}
4806
4808 auto IsFPR = [&](const MachineOperand &Op) {
4809 if (!Op.isReg())
4810 return false;
4811 auto Reg = Op.getReg();
4812 if (Reg.isPhysical())
4813 return isFpOrNEON(Reg);
4814
4815 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4816 return TRC == &AArch64::FPR128RegClass ||
4817 TRC == &AArch64::FPR128_loRegClass ||
4818 TRC == &AArch64::FPR64RegClass ||
4819 TRC == &AArch64::FPR64_loRegClass ||
4820 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4821 TRC == &AArch64::FPR8RegClass;
4822 };
4823 return llvm::any_of(MI.operands(), IsFPR);
4824}
4825
4826// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4827// scaled.
4828static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4830
4831 // If the byte-offset isn't a multiple of the stride, we can't scale this
4832 // offset.
4833 if (Offset % Scale != 0)
4834 return false;
4835
4836 // Convert the byte-offset used by unscaled into an "element" offset used
4837 // by the scaled pair load/store instructions.
4838 Offset /= Scale;
4839 return true;
4840}
4841
4842static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4843 if (FirstOpc == SecondOpc)
4844 return true;
4845 // We can also pair sign-ext and zero-ext instructions.
4846 switch (FirstOpc) {
4847 default:
4848 return false;
4849 case AArch64::STRSui:
4850 case AArch64::STURSi:
4851 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4852 case AArch64::STRDui:
4853 case AArch64::STURDi:
4854 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4855 case AArch64::STRQui:
4856 case AArch64::STURQi:
4857 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4858 case AArch64::STRWui:
4859 case AArch64::STURWi:
4860 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4861 case AArch64::STRXui:
4862 case AArch64::STURXi:
4863 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4864 case AArch64::LDRSui:
4865 case AArch64::LDURSi:
4866 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4867 case AArch64::LDRDui:
4868 case AArch64::LDURDi:
4869 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4870 case AArch64::LDRQui:
4871 case AArch64::LDURQi:
4872 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4873 case AArch64::LDRWui:
4874 case AArch64::LDURWi:
4875 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4876 case AArch64::LDRSWui:
4877 case AArch64::LDURSWi:
4878 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4879 case AArch64::LDRXui:
4880 case AArch64::LDURXi:
4881 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4882 }
4883 // These instructions can't be paired based on their opcodes.
4884 return false;
4885}
4886
4887static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4888 int64_t Offset1, unsigned Opcode1, int FI2,
4889 int64_t Offset2, unsigned Opcode2) {
4890 // Accesses through fixed stack object frame indices may access a different
4891 // fixed stack slot. Check that the object offsets + offsets match.
4892 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4893 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4894 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4895 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4896 // Convert to scaled object offsets.
4897 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4898 if (ObjectOffset1 % Scale1 != 0)
4899 return false;
4900 ObjectOffset1 /= Scale1;
4901 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4902 if (ObjectOffset2 % Scale2 != 0)
4903 return false;
4904 ObjectOffset2 /= Scale2;
4905 ObjectOffset1 += Offset1;
4906 ObjectOffset2 += Offset2;
4907 return ObjectOffset1 + 1 == ObjectOffset2;
4908 }
4909
4910 return FI1 == FI2;
4911}
4912
4913/// Detect opportunities for ldp/stp formation.
4914///
4915/// Only called for LdSt for which getMemOperandWithOffset returns true.
4917 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4918 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4919 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4920 unsigned NumBytes) const {
4921 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4922 const MachineOperand &BaseOp1 = *BaseOps1.front();
4923 const MachineOperand &BaseOp2 = *BaseOps2.front();
4924 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4925 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4926 if (BaseOp1.getType() != BaseOp2.getType())
4927 return false;
4928
4929 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4930 "Only base registers and frame indices are supported.");
4931
4932 // Check for both base regs and base FI.
4933 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4934 return false;
4935
4936 // Only cluster up to a single pair.
4937 if (ClusterSize > 2)
4938 return false;
4939
4940 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4941 return false;
4942
4943 // Can we pair these instructions based on their opcodes?
4944 unsigned FirstOpc = FirstLdSt.getOpcode();
4945 unsigned SecondOpc = SecondLdSt.getOpcode();
4946 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4947 return false;
4948
4949 // Can't merge volatiles or load/stores that have a hint to avoid pair
4950 // formation, for example.
4951 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4952 !isCandidateToMergeOrPair(SecondLdSt))
4953 return false;
4954
4955 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4956 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4957 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4958 return false;
4959
4960 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4961 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4962 return false;
4963
4964 // Pairwise instructions have a 7-bit signed offset field.
4965 if (Offset1 > 63 || Offset1 < -64)
4966 return false;
4967
4968 // The caller should already have ordered First/SecondLdSt by offset.
4969 // Note: except for non-equal frame index bases
4970 if (BaseOp1.isFI()) {
4971 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4972 "Caller should have ordered offsets.");
4973
4974 const MachineFrameInfo &MFI =
4975 FirstLdSt.getParent()->getParent()->getFrameInfo();
4976 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4977 BaseOp2.getIndex(), Offset2, SecondOpc);
4978 }
4979
4980 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4981
4982 return Offset1 + 1 == Offset2;
4983}
4984
4986 MCRegister Reg, unsigned SubIdx,
4987 unsigned State,
4988 const TargetRegisterInfo *TRI) {
4989 if (!SubIdx)
4990 return MIB.addReg(Reg, State);
4991
4992 if (Reg.isPhysical())
4993 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4994 return MIB.addReg(Reg, State, SubIdx);
4995}
4996
4997static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4998 unsigned NumRegs) {
4999 // We really want the positive remainder mod 32 here, that happens to be
5000 // easily obtainable with a mask.
5001 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5002}
5003
5006 const DebugLoc &DL, MCRegister DestReg,
5007 MCRegister SrcReg, bool KillSrc,
5008 unsigned Opcode,
5009 ArrayRef<unsigned> Indices) const {
5010 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5012 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5013 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5014 unsigned NumRegs = Indices.size();
5015
5016 int SubReg = 0, End = NumRegs, Incr = 1;
5017 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5018 SubReg = NumRegs - 1;
5019 End = -1;
5020 Incr = -1;
5021 }
5022
5023 for (; SubReg != End; SubReg += Incr) {
5024 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5025 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5026 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5027 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5028 }
5029}
5030
5033 const DebugLoc &DL, MCRegister DestReg,
5034 MCRegister SrcReg, bool KillSrc,
5035 unsigned Opcode, unsigned ZeroReg,
5036 llvm::ArrayRef<unsigned> Indices) const {
5038 unsigned NumRegs = Indices.size();
5039
5040#ifndef NDEBUG
5041 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5042 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5043 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5044 "GPR reg sequences should not be able to overlap");
5045#endif
5046
5047 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5048 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5049 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5050 MIB.addReg(ZeroReg);
5051 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5052 MIB.addImm(0);
5053 }
5054}
5055
5058 const DebugLoc &DL, Register DestReg,
5059 Register SrcReg, bool KillSrc,
5060 bool RenamableDest,
5061 bool RenamableSrc) const {
5062 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5063 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
5065
5066 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5067 // If either operand is WSP, expand to ADD #0.
5068 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5069 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5070 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5071 MCRegister DestRegX = TRI->getMatchingSuperReg(
5072 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5073 MCRegister SrcRegX = TRI->getMatchingSuperReg(
5074 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5075 // This instruction is reading and writing X registers. This may upset
5076 // the register scavenger and machine verifier, so we need to indicate
5077 // that we are reading an undefined value from SrcRegX, but a proper
5078 // value from SrcReg.
5079 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5080 .addReg(SrcRegX, RegState::Undef)
5081 .addImm(0)
5083 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5084 } else {
5085 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5086 .addReg(SrcReg, getKillRegState(KillSrc))
5087 .addImm(0)
5089 }
5090 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
5091 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5092 .addImm(0)
5094 } else {
5095 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5096 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5097 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5098 MCRegister DestRegX = TRI->getMatchingSuperReg(
5099 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5100 assert(DestRegX.isValid() && "Destination super-reg not valid");
5101 MCRegister SrcRegX =
5102 SrcReg == AArch64::WZR
5103 ? AArch64::XZR
5104 : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
5105 &AArch64::GPR64spRegClass);
5106 assert(SrcRegX.isValid() && "Source super-reg not valid");
5107 // This instruction is reading and writing X registers. This may upset
5108 // the register scavenger and machine verifier, so we need to indicate
5109 // that we are reading an undefined value from SrcRegX, but a proper
5110 // value from SrcReg.
5111 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5112 .addReg(AArch64::XZR)
5113 .addReg(SrcRegX, RegState::Undef)
5114 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5115 } else {
5116 // Otherwise, expand to ORR WZR.
5117 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5118 .addReg(AArch64::WZR)
5119 .addReg(SrcReg, getKillRegState(KillSrc));
5120 }
5121 }
5122 return;
5123 }
5124
5125 // Copy a Predicate register by ORRing with itself.
5126 if (AArch64::PPRRegClass.contains(DestReg) &&
5127 AArch64::PPRRegClass.contains(SrcReg)) {
5128 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5129 "Unexpected SVE register.");
5130 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5131 .addReg(SrcReg) // Pg
5132 .addReg(SrcReg)
5133 .addReg(SrcReg, getKillRegState(KillSrc));
5134 return;
5135 }
5136
5137 // Copy a predicate-as-counter register by ORRing with itself as if it
5138 // were a regular predicate (mask) register.
5139 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5140 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5141 if (DestIsPNR || SrcIsPNR) {
5142 auto ToPPR = [](MCRegister R) -> MCRegister {
5143 return (R - AArch64::PN0) + AArch64::P0;
5144 };
5145 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5146 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5147
5148 if (PPRSrcReg != PPRDestReg) {
5149 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5150 .addReg(PPRSrcReg) // Pg
5151 .addReg(PPRSrcReg)
5152 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5153 if (DestIsPNR)
5154 NewMI.addDef(DestReg, RegState::Implicit);
5155 }
5156 return;
5157 }
5158
5159 // Copy a Z register by ORRing with itself.
5160 if (AArch64::ZPRRegClass.contains(DestReg) &&
5161 AArch64::ZPRRegClass.contains(SrcReg)) {
5162 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5163 "Unexpected SVE register.");
5164 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5165 .addReg(SrcReg)
5166 .addReg(SrcReg, getKillRegState(KillSrc));
5167 return;
5168 }
5169
5170 // Copy a Z register pair by copying the individual sub-registers.
5171 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5172 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5173 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5174 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5175 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5176 "Unexpected SVE register.");
5177 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5178 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5179 Indices);
5180 return;
5181 }
5182
5183 // Copy a Z register triple by copying the individual sub-registers.
5184 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5185 AArch64::ZPR3RegClass.contains(SrcReg)) {
5186 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5187 "Unexpected SVE register.");
5188 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5189 AArch64::zsub2};
5190 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5191 Indices);
5192 return;
5193 }
5194
5195 // Copy a Z register quad by copying the individual sub-registers.
5196 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5197 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5198 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5199 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5200 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5201 "Unexpected SVE register.");
5202 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5203 AArch64::zsub2, AArch64::zsub3};
5204 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5205 Indices);
5206 return;
5207 }
5208
5209 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5210 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5211 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5212 // If either operand is SP, expand to ADD #0.
5213 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5214 .addReg(SrcReg, getKillRegState(KillSrc))
5215 .addImm(0)
5217 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
5218 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5219 .addImm(0)
5221 } else {
5222 // Otherwise, expand to ORR XZR.
5223 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5224 .addReg(AArch64::XZR)
5225 .addReg(SrcReg, getKillRegState(KillSrc));
5226 }
5227 return;
5228 }
5229
5230 // Copy a DDDD register quad by copying the individual sub-registers.
5231 if (AArch64::DDDDRegClass.contains(DestReg) &&
5232 AArch64::DDDDRegClass.contains(SrcReg)) {
5233 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5234 AArch64::dsub2, AArch64::dsub3};
5235 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5236 Indices);
5237 return;
5238 }
5239
5240 // Copy a DDD register triple by copying the individual sub-registers.
5241 if (AArch64::DDDRegClass.contains(DestReg) &&
5242 AArch64::DDDRegClass.contains(SrcReg)) {
5243 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5244 AArch64::dsub2};
5245 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5246 Indices);
5247 return;
5248 }
5249
5250 // Copy a DD register pair by copying the individual sub-registers.
5251 if (AArch64::DDRegClass.contains(DestReg) &&
5252 AArch64::DDRegClass.contains(SrcReg)) {
5253 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5254 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5255 Indices);
5256 return;
5257 }
5258
5259 // Copy a QQQQ register quad by copying the individual sub-registers.
5260 if (AArch64::QQQQRegClass.contains(DestReg) &&
5261 AArch64::QQQQRegClass.contains(SrcReg)) {
5262 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5263 AArch64::qsub2, AArch64::qsub3};
5264 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5265 Indices);
5266 return;
5267 }
5268
5269 // Copy a QQQ register triple by copying the individual sub-registers.
5270 if (AArch64::QQQRegClass.contains(DestReg) &&
5271 AArch64::QQQRegClass.contains(SrcReg)) {
5272 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5273 AArch64::qsub2};
5274 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5275 Indices);
5276 return;
5277 }
5278
5279 // Copy a QQ register pair by copying the individual sub-registers.
5280 if (AArch64::QQRegClass.contains(DestReg) &&
5281 AArch64::QQRegClass.contains(SrcReg)) {
5282 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5283 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5284 Indices);
5285 return;
5286 }
5287
5288 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5289 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5290 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5291 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5292 AArch64::XZR, Indices);
5293 return;
5294 }
5295
5296 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5297 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5298 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5299 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5300 AArch64::WZR, Indices);
5301 return;
5302 }
5303
5304 if (AArch64::FPR128RegClass.contains(DestReg) &&
5305 AArch64::FPR128RegClass.contains(SrcReg)) {
5306 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5307 !Subtarget.isNeonAvailable())
5308 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5309 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5310 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5311 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5312 else if (Subtarget.isNeonAvailable())
5313 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5314 .addReg(SrcReg)
5315 .addReg(SrcReg, getKillRegState(KillSrc));
5316 else {
5317 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5318 .addReg(AArch64::SP, RegState::Define)
5319 .addReg(SrcReg, getKillRegState(KillSrc))
5320 .addReg(AArch64::SP)
5321 .addImm(-16);
5322 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5323 .addReg(AArch64::SP, RegState::Define)
5324 .addReg(DestReg, RegState::Define)
5325 .addReg(AArch64::SP)
5326 .addImm(16);
5327 }
5328 return;
5329 }
5330
5331 if (AArch64::FPR64RegClass.contains(DestReg) &&
5332 AArch64::FPR64RegClass.contains(SrcReg)) {
5333 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5334 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5335 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5337 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
5338 &AArch64::FPR128RegClass);
5339 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
5340 &AArch64::FPR128RegClass);
5341 // This instruction is reading and writing Q registers. This may upset
5342 // the register scavenger and machine verifier, so we need to indicate
5343 // that we are reading an undefined value from SrcRegQ, but a proper
5344 // value from SrcReg.
5345 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5346 .addReg(SrcRegQ, RegState::Undef)
5347 .addReg(SrcRegQ, RegState::Undef)
5348 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5349 } else {
5350 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5351 .addReg(SrcReg, getKillRegState(KillSrc));
5352 }
5353 return;
5354 }
5355
5356 if (AArch64::FPR32RegClass.contains(DestReg) &&
5357 AArch64::FPR32RegClass.contains(SrcReg)) {
5358 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5359 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5360 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5362 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5363 &AArch64::FPR128RegClass);
5364 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5365 &AArch64::FPR128RegClass);
5366 // This instruction is reading and writing Q registers. This may upset
5367 // the register scavenger and machine verifier, so we need to indicate
5368 // that we are reading an undefined value from SrcRegQ, but a proper
5369 // value from SrcReg.
5370 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5371 .addReg(SrcRegQ, RegState::Undef)
5372 .addReg(SrcRegQ, RegState::Undef)
5373 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5374 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5375 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5377 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5378 &AArch64::FPR64RegClass);
5379 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5380 &AArch64::FPR64RegClass);
5381 // This instruction is reading and writing D registers. This may upset
5382 // the register scavenger and machine verifier, so we need to indicate
5383 // that we are reading an undefined value from SrcRegD, but a proper
5384 // value from SrcReg.
5385 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5386 .addReg(SrcRegD, RegState::Undef)
5387 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5388 } else {
5389 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5390 .addReg(SrcReg, getKillRegState(KillSrc));
5391 }
5392 return;
5393 }
5394
5395 if (AArch64::FPR16RegClass.contains(DestReg) &&
5396 AArch64::FPR16RegClass.contains(SrcReg)) {
5397 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5398 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5399 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5401 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5402 &AArch64::FPR128RegClass);
5403 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5404 &AArch64::FPR128RegClass);
5405 // This instruction is reading and writing Q registers. This may upset
5406 // the register scavenger and machine verifier, so we need to indicate
5407 // that we are reading an undefined value from SrcRegQ, but a proper
5408 // value from SrcReg.
5409 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5410 .addReg(SrcRegQ, RegState::Undef)
5411 .addReg(SrcRegQ, RegState::Undef)
5412 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5413 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5414 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5416 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5417 &AArch64::FPR64RegClass);
5418 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5419 &AArch64::FPR64RegClass);
5420 // This instruction is reading and writing D registers. This may upset
5421 // the register scavenger and machine verifier, so we need to indicate
5422 // that we are reading an undefined value from SrcRegD, but a proper
5423 // value from SrcReg.
5424 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5425 .addReg(SrcRegD, RegState::Undef)
5426 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5427 } else {
5428 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5429 &AArch64::FPR32RegClass);
5430 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5431 &AArch64::FPR32RegClass);
5432 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5433 .addReg(SrcReg, getKillRegState(KillSrc));
5434 }
5435 return;
5436 }
5437
5438 if (AArch64::FPR8RegClass.contains(DestReg) &&
5439 AArch64::FPR8RegClass.contains(SrcReg)) {
5440 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5441 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5442 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5444 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5445 &AArch64::FPR128RegClass);
5446 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5447 &AArch64::FPR128RegClass);
5448 // This instruction is reading and writing Q registers. This may upset
5449 // the register scavenger and machine verifier, so we need to indicate
5450 // that we are reading an undefined value from SrcRegQ, but a proper
5451 // value from SrcReg.
5452 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5453 .addReg(SrcRegQ, RegState::Undef)
5454 .addReg(SrcRegQ, RegState::Undef)
5455 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5456 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5457 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5459 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5460 &AArch64::FPR64RegClass);
5461 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5462 &AArch64::FPR64RegClass);
5463 // This instruction is reading and writing D registers. This may upset
5464 // the register scavenger and machine verifier, so we need to indicate
5465 // that we are reading an undefined value from SrcRegD, but a proper
5466 // value from SrcReg.
5467 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5468 .addReg(SrcRegD, RegState::Undef)
5469 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5470 } else {
5471 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5472 &AArch64::FPR32RegClass);
5473 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5474 &AArch64::FPR32RegClass);
5475 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5476 .addReg(SrcReg, getKillRegState(KillSrc));
5477 }
5478 return;
5479 }
5480
5481 // Copies between GPR64 and FPR64.
5482 if (AArch64::FPR64RegClass.contains(DestReg) &&
5483 AArch64::GPR64RegClass.contains(SrcReg)) {
5484 if (AArch64::XZR == SrcReg) {
5485 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5486 } else {
5487 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5488 .addReg(SrcReg, getKillRegState(KillSrc));
5489 }
5490 return;
5491 }
5492 if (AArch64::GPR64RegClass.contains(DestReg) &&
5493 AArch64::FPR64RegClass.contains(SrcReg)) {
5494 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5495 .addReg(SrcReg, getKillRegState(KillSrc));
5496 return;
5497 }
5498 // Copies between GPR32 and FPR32.
5499 if (AArch64::FPR32RegClass.contains(DestReg) &&
5500 AArch64::GPR32RegClass.contains(SrcReg)) {
5501 if (AArch64::WZR == SrcReg) {
5502 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5503 } else {
5504 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5505 .addReg(SrcReg, getKillRegState(KillSrc));
5506 }
5507 return;
5508 }
5509 if (AArch64::GPR32RegClass.contains(DestReg) &&
5510 AArch64::FPR32RegClass.contains(SrcReg)) {
5511 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5512 .addReg(SrcReg, getKillRegState(KillSrc));
5513 return;
5514 }
5515
5516 if (DestReg == AArch64::NZCV) {
5517 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5518 BuildMI(MBB, I, DL, get(AArch64::MSR))
5519 .addImm(AArch64SysReg::NZCV)
5520 .addReg(SrcReg, getKillRegState(KillSrc))
5521 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5522 return;
5523 }
5524
5525 if (SrcReg == AArch64::NZCV) {
5526 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5527 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5528 .addImm(AArch64SysReg::NZCV)
5529 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5530 return;
5531 }
5532
5533#ifndef NDEBUG
5535 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5536 << TRI.getRegAsmName(SrcReg) << "\n";
5537#endif
5538 llvm_unreachable("unimplemented reg-to-reg copy");
5539}
5540
5543 MachineBasicBlock::iterator InsertBefore,
5544 const MCInstrDesc &MCID,
5545 Register SrcReg, bool IsKill,
5546 unsigned SubIdx0, unsigned SubIdx1, int FI,
5547 MachineMemOperand *MMO) {
5548 Register SrcReg0 = SrcReg;
5549 Register SrcReg1 = SrcReg;
5550 if (SrcReg.isPhysical()) {
5551 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5552 SubIdx0 = 0;
5553 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5554 SubIdx1 = 0;
5555 }
5556 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5557 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5558 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5559 .addFrameIndex(FI)
5560 .addImm(0)
5561 .addMemOperand(MMO);
5562}
5563
5566 Register SrcReg, bool isKill, int FI,
5567 const TargetRegisterClass *RC,
5568 const TargetRegisterInfo *TRI,
5569 Register VReg,
5570 MachineInstr::MIFlag Flags) const {
5571 MachineFunction &MF = *MBB.getParent();
5572 MachineFrameInfo &MFI = MF.getFrameInfo();
5573
5575 MachineMemOperand *MMO =
5577 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5578 unsigned Opc = 0;
5579 bool Offset = true;
5581 unsigned StackID = TargetStackID::Default;
5582 switch (TRI->getSpillSize(*RC)) {
5583 case 1:
5584 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5585 Opc = AArch64::STRBui;
5586 break;
5587 case 2: {
5588 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5589 Opc = AArch64::STRHui;
5590 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5591 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5592 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5593 "Unexpected register store without SVE store instructions");
5594 Opc = AArch64::STR_PXI;
5596 }
5597 break;
5598 }
5599 case 4:
5600 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5601 Opc = AArch64::STRWui;
5602 if (SrcReg.isVirtual())
5603 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5604 else
5605 assert(SrcReg != AArch64::WSP);
5606 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5607 Opc = AArch64::STRSui;
5608 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5609 Opc = AArch64::STR_PPXI;
5611 }
5612 break;
5613 case 8:
5614 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5615 Opc = AArch64::STRXui;
5616 if (SrcReg.isVirtual())
5617 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5618 else
5619 assert(SrcReg != AArch64::SP);
5620 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5621 Opc = AArch64::STRDui;
5622 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5624 get(AArch64::STPWi), SrcReg, isKill,
5625 AArch64::sube32, AArch64::subo32, FI, MMO);
5626 return;
5627 }
5628 break;
5629 case 16:
5630 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5631 Opc = AArch64::STRQui;
5632 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5633 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5634 Opc = AArch64::ST1Twov1d;
5635 Offset = false;
5636 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5638 get(AArch64::STPXi), SrcReg, isKill,
5639 AArch64::sube64, AArch64::subo64, FI, MMO);
5640 return;
5641 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5642 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5643 "Unexpected register store without SVE store instructions");
5644 Opc = AArch64::STR_ZXI;
5646 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5647 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5648 "Unexpected predicate store without SVE store instructions");
5649 Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5651 }
5652 break;
5653 case 24:
5654 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5655 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5656 Opc = AArch64::ST1Threev1d;
5657 Offset = false;
5658 }
5659 break;
5660 case 32:
5661 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5662 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5663 Opc = AArch64::ST1Fourv1d;
5664 Offset = false;
5665 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5666 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5667 Opc = AArch64::ST1Twov2d;
5668 Offset = false;
5669 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5670 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5671 "Unexpected register store without SVE store instructions");
5672 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5674 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5675 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5676 "Unexpected register store without SVE store instructions");
5677 Opc = AArch64::STR_ZZXI;
5679 }
5680 break;
5681 case 48:
5682 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5683 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5684 Opc = AArch64::ST1Threev2d;
5685 Offset = false;
5686 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5687 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5688 "Unexpected register store without SVE store instructions");
5689 Opc = AArch64::STR_ZZZXI;
5691 }
5692 break;
5693 case 64:
5694 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5695 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5696 Opc = AArch64::ST1Fourv2d;
5697 Offset = false;
5698 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5699 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5700 "Unexpected register store without SVE store instructions");
5701 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5703 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5704 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5705 "Unexpected register store without SVE store instructions");
5706 Opc = AArch64::STR_ZZZZXI;
5708 }
5709 break;
5710 }
5711 assert(Opc && "Unknown register class");
5712 MFI.setStackID(FI, StackID);
5713
5715 .addReg(SrcReg, getKillRegState(isKill))
5716 .addFrameIndex(FI);
5717
5718 if (Offset)
5719 MI.addImm(0);
5720 if (PNRReg.isValid())
5721 MI.addDef(PNRReg, RegState::Implicit);
5722 MI.addMemOperand(MMO);
5723}
5724
5727 MachineBasicBlock::iterator InsertBefore,
5728 const MCInstrDesc &MCID,
5729 Register DestReg, unsigned SubIdx0,
5730 unsigned SubIdx1, int FI,
5731 MachineMemOperand *MMO) {
5732 Register DestReg0 = DestReg;
5733 Register DestReg1 = DestReg;
5734 bool IsUndef = true;
5735 if (DestReg.isPhysical()) {
5736 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5737 SubIdx0 = 0;
5738 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5739 SubIdx1 = 0;
5740 IsUndef = false;
5741 }
5742 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5743 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5744 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5745 .addFrameIndex(FI)
5746 .addImm(0)
5747 .addMemOperand(MMO);
5748}
5749
5752 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5753 Register VReg, MachineInstr::MIFlag Flags) const {
5754 MachineFunction &MF = *MBB.getParent();
5755 MachineFrameInfo &MFI = MF.getFrameInfo();
5757 MachineMemOperand *MMO =
5759 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5760
5761 unsigned Opc = 0;
5762 bool Offset = true;
5763 unsigned StackID = TargetStackID::Default;
5765 switch (TRI->getSpillSize(*RC)) {
5766 case 1:
5767 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5768 Opc = AArch64::LDRBui;
5769 break;
5770 case 2: {
5771 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5772 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5773 Opc = AArch64::LDRHui;
5774 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5775 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5776 "Unexpected register load without SVE load instructions");
5777 if (IsPNR)
5778 PNRReg = DestReg;
5779 Opc = AArch64::LDR_PXI;
5781 }
5782 break;
5783 }
5784 case 4:
5785 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5786 Opc = AArch64::LDRWui;
5787 if (DestReg.isVirtual())
5788 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5789 else
5790 assert(DestReg != AArch64::WSP);
5791 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5792 Opc = AArch64::LDRSui;
5793 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5794 Opc = AArch64::LDR_PPXI;
5796 }
5797 break;
5798 case 8:
5799 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5800 Opc = AArch64::LDRXui;
5801 if (DestReg.isVirtual())
5802 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5803 else
5804 assert(DestReg != AArch64::SP);
5805 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5806 Opc = AArch64::LDRDui;
5807 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5809 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5810 AArch64::subo32, FI, MMO);
5811 return;
5812 }
5813 break;
5814 case 16:
5815 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5816 Opc = AArch64::LDRQui;
5817 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5818 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5819 Opc = AArch64::LD1Twov1d;
5820 Offset = false;
5821 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5823 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5824 AArch64::subo64, FI, MMO);
5825 return;
5826 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5827 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5828 "Unexpected register load without SVE load instructions");
5829 Opc = AArch64::LDR_ZXI;
5831 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5832 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5833 "Unexpected predicate load without SVE load instructions");
5834 Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5836 }
5837 break;
5838 case 24:
5839 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5840 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5841 Opc = AArch64::LD1Threev1d;
5842 Offset = false;
5843 }
5844 break;
5845 case 32:
5846 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5847 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5848 Opc = AArch64::LD1Fourv1d;
5849 Offset = false;
5850 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5851 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5852 Opc = AArch64::LD1Twov2d;
5853 Offset = false;
5854 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5855 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5856 "Unexpected register load without SVE load instructions");
5857 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5859 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5860 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5861 "Unexpected register load without SVE load instructions");
5862 Opc = AArch64::LDR_ZZXI;
5864 }
5865 break;
5866 case 48:
5867 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5868 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5869 Opc = AArch64::LD1Threev2d;
5870 Offset = false;
5871 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5872 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5873 "Unexpected register load without SVE load instructions");
5874 Opc = AArch64::LDR_ZZZXI;
5876 }
5877 break;
5878 case 64:
5879 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5880 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5881 Opc = AArch64::LD1Fourv2d;
5882 Offset = false;
5883 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5884 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5885 "Unexpected register load without SVE load instructions");
5886 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5888 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5889 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5890 "Unexpected register load without SVE load instructions");
5891 Opc = AArch64::LDR_ZZZZXI;
5893 }
5894 break;
5895 }
5896
5897 assert(Opc && "Unknown register class");
5898 MFI.setStackID(FI, StackID);
5899
5901 .addReg(DestReg, getDefRegState(true))
5902 .addFrameIndex(FI);
5903 if (Offset)
5904 MI.addImm(0);
5905 if (PNRReg.isValid() && !PNRReg.isVirtual())
5906 MI.addDef(PNRReg, RegState::Implicit);
5907 MI.addMemOperand(MMO);
5908}
5909
5911 const MachineInstr &UseMI,
5912 const TargetRegisterInfo *TRI) {
5913 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5914 UseMI.getIterator()),
5915 [TRI](const MachineInstr &I) {
5916 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5917 I.readsRegister(AArch64::NZCV, TRI);
5918 });
5919}
5920
5921void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5922 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5923 // The smallest scalable element supported by scaled SVE addressing
5924 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5925 // byte offset must always be a multiple of 2.
5926 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5927
5928 // VGSized offsets are divided by '2', because the VG register is the
5929 // the number of 64bit granules as opposed to 128bit vector chunks,
5930 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5931 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5932 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5933 ByteSized = Offset.getFixed();
5934 VGSized = Offset.getScalable() / 2;
5935}
5936
5937/// Returns the offset in parts to which this frame offset can be
5938/// decomposed for the purpose of describing a frame offset.
5939/// For non-scalable offsets this is simply its byte size.
5940void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5941 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5942 int64_t &NumDataVectors) {
5943 // The smallest scalable element supported by scaled SVE addressing
5944 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5945 // byte offset must always be a multiple of 2.
5946 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5947
5948 NumBytes = Offset.getFixed();
5949 NumDataVectors = 0;
5950 NumPredicateVectors = Offset.getScalable() / 2;
5951 // This method is used to get the offsets to adjust the frame offset.
5952 // If the function requires ADDPL to be used and needs more than two ADDPL
5953 // instructions, part of the offset is folded into NumDataVectors so that it
5954 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5955 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5956 NumPredicateVectors > 62) {
5957 NumDataVectors = NumPredicateVectors / 8;
5958 NumPredicateVectors -= NumDataVectors * 8;
5959 }
5960}
5961
5962// Convenience function to create a DWARF expression for: Constant `Operation`.
5963// This helper emits compact sequences for common cases. For example, for`-15
5964// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
5967 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
5968 // -Constant (1 to 31)
5969 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
5970 Operation = dwarf::DW_OP_minus;
5971 } else if (Constant >= 0 && Constant <= 31) {
5972 // Literal value 0 to 31
5973 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
5974 } else {
5975 // Signed constant
5976 Expr.push_back(dwarf::DW_OP_consts);
5978 }
5979 return Expr.push_back(Operation);
5980}
5981
5982// Convenience function to create a DWARF expression for a register.
5983static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
5984 Expr.push_back((char)dwarf::DW_OP_bregx);
5986 Expr.push_back(0);
5987}
5988
5989// Convenience function to create a DWARF expression for loading a register from
5990// a CFA offset.
5992 int64_t OffsetFromDefCFA) {
5993 // This assumes the top of the DWARF stack contains the CFA.
5994 Expr.push_back(dwarf::DW_OP_dup);
5995 // Add the offset to the register.
5996 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
5997 // Dereference the address (loads a 64 bit value)..
5998 Expr.push_back(dwarf::DW_OP_deref);
5999}
6000
6001// Convenience function to create a comment for
6002// (+/-) NumBytes (* RegScale)?
6003static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6004 StringRef RegScale = {}) {
6005 if (NumBytes) {
6006 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6007 if (!RegScale.empty())
6008 Comment << ' ' << RegScale;
6009 }
6010}
6011
6012// Creates an MCCFIInstruction:
6013// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6015 unsigned Reg,
6016 const StackOffset &Offset) {
6017 int64_t NumBytes, NumVGScaledBytes;
6018 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6019 NumVGScaledBytes);
6020 std::string CommentBuffer;
6021 llvm::raw_string_ostream Comment(CommentBuffer);
6022
6023 if (Reg == AArch64::SP)
6024 Comment << "sp";
6025 else if (Reg == AArch64::FP)
6026 Comment << "fp";
6027 else
6028 Comment << printReg(Reg, &TRI);
6029
6030 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6031 SmallString<64> Expr;
6032 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6033 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6034 // Reg + NumBytes
6035 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6036 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6037 appendOffsetComment(NumBytes, Comment);
6038 if (NumVGScaledBytes) {
6039 // + VG * NumVGScaledBytes
6040 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6041 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6042 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6043 Expr.push_back(dwarf::DW_OP_plus);
6044 }
6045
6046 // Wrap this into DW_CFA_def_cfa.
6047 SmallString<64> DefCfaExpr;
6048 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6049 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6050 DefCfaExpr.append(Expr.str());
6051 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6052 Comment.str());
6053}
6054
6056 unsigned FrameReg, unsigned Reg,
6057 const StackOffset &Offset,
6058 bool LastAdjustmentWasScalable) {
6059 if (Offset.getScalable())
6060 return createDefCFAExpression(TRI, Reg, Offset);
6061
6062 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6063 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6064
6065 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6066 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6067}
6068
6071 const StackOffset &OffsetFromDefCFA,
6072 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6073 int64_t NumBytes, NumVGScaledBytes;
6074 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6075 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6076
6077 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6078
6079 // Non-scalable offsets can use DW_CFA_offset directly.
6080 if (!NumVGScaledBytes)
6081 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6082
6083 std::string CommentBuffer;
6084 llvm::raw_string_ostream Comment(CommentBuffer);
6085 Comment << printReg(Reg, &TRI) << " @ cfa";
6086
6087 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6088 assert(NumVGScaledBytes && "Expected scalable offset");
6089 SmallString<64> OffsetExpr;
6090 // + VG * NumVGScaledBytes
6091 StringRef VGRegScale;
6092 if (IncomingVGOffsetFromDefCFA) {
6093 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6094 VGRegScale = "* IncomingVG";
6095 } else {
6096 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6097 VGRegScale = "* VG";
6098 }
6099 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6100 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6101 OffsetExpr.push_back(dwarf::DW_OP_plus);
6102 if (NumBytes) {
6103 // + NumBytes
6104 appendOffsetComment(NumBytes, Comment);
6105 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6106 }
6107
6108 // Wrap this into DW_CFA_expression
6109 SmallString<64> CfaExpr;
6110 CfaExpr.push_back(dwarf::DW_CFA_expression);
6111 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6112 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6113 CfaExpr.append(OffsetExpr.str());
6114
6115 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6116 Comment.str());
6117}
6118
6119// Helper function to emit a frame offset adjustment from a given
6120// pointer (SrcReg), stored into DestReg. This function is explicit
6121// in that it requires the opcode.
6124 const DebugLoc &DL, unsigned DestReg,
6125 unsigned SrcReg, int64_t Offset, unsigned Opc,
6126 const TargetInstrInfo *TII,
6127 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6128 bool *HasWinCFI, bool EmitCFAOffset,
6129 StackOffset CFAOffset, unsigned FrameReg) {
6130 int Sign = 1;
6131 unsigned MaxEncoding, ShiftSize;
6132 switch (Opc) {
6133 case AArch64::ADDXri:
6134 case AArch64::ADDSXri:
6135 case AArch64::SUBXri:
6136 case AArch64::SUBSXri:
6137 MaxEncoding = 0xfff;
6138 ShiftSize = 12;
6139 break;
6140 case AArch64::ADDVL_XXI:
6141 case AArch64::ADDPL_XXI:
6142 case AArch64::ADDSVL_XXI:
6143 case AArch64::ADDSPL_XXI:
6144 MaxEncoding = 31;
6145 ShiftSize = 0;
6146 if (Offset < 0) {
6147 MaxEncoding = 32;
6148 Sign = -1;
6149 Offset = -Offset;
6150 }
6151 break;
6152 default:
6153 llvm_unreachable("Unsupported opcode");
6154 }
6155
6156 // `Offset` can be in bytes or in "scalable bytes".
6157 int VScale = 1;
6158 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6159 VScale = 16;
6160 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6161 VScale = 2;
6162
6163 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6164 // scratch register. If DestReg is a virtual register, use it as the
6165 // scratch register; otherwise, create a new virtual register (to be
6166 // replaced by the scavenger at the end of PEI). That case can be optimized
6167 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6168 // register can be loaded with offset%8 and the add/sub can use an extending
6169 // instruction with LSL#3.
6170 // Currently the function handles any offsets but generates a poor sequence
6171 // of code.
6172 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6173
6174 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6175 Register TmpReg = DestReg;
6176 if (TmpReg == AArch64::XZR)
6177 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6178 &AArch64::GPR64RegClass);
6179 do {
6180 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6181 unsigned LocalShiftSize = 0;
6182 if (ThisVal > MaxEncoding) {
6183 ThisVal = ThisVal >> ShiftSize;
6184 LocalShiftSize = ShiftSize;
6185 }
6186 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6187 "Encoding cannot handle value that big");
6188
6189 Offset -= ThisVal << LocalShiftSize;
6190 if (Offset == 0)
6191 TmpReg = DestReg;
6192 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6193 .addReg(SrcReg)
6194 .addImm(Sign * (int)ThisVal);
6195 if (ShiftSize)
6196 MBI = MBI.addImm(
6198 MBI = MBI.setMIFlag(Flag);
6199
6200 auto Change =
6201 VScale == 1
6202 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6203 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6204 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6205 CFAOffset += Change;
6206 else
6207 CFAOffset -= Change;
6208 if (EmitCFAOffset && DestReg == TmpReg) {
6209 MachineFunction &MF = *MBB.getParent();
6210 const TargetSubtargetInfo &STI = MF.getSubtarget();
6211 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6212
6213 unsigned CFIIndex = MF.addFrameInst(
6214 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6215 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6216 .addCFIIndex(CFIIndex)
6217 .setMIFlags(Flag);
6218 }
6219
6220 if (NeedsWinCFI) {
6221 int Imm = (int)(ThisVal << LocalShiftSize);
6222 if (VScale != 1 && DestReg == AArch64::SP) {
6223 if (HasWinCFI)
6224 *HasWinCFI = true;
6225 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6226 .addImm(ThisVal)
6227 .setMIFlag(Flag);
6228 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6229 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6230 assert(VScale == 1 && "Expected non-scalable operation");
6231 if (HasWinCFI)
6232 *HasWinCFI = true;
6233 if (Imm == 0)
6234 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6235 else
6236 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6237 .addImm(Imm)
6238 .setMIFlag(Flag);
6239 assert(Offset == 0 && "Expected remaining offset to be zero to "
6240 "emit a single SEH directive");
6241 } else if (DestReg == AArch64::SP) {
6242 assert(VScale == 1 && "Expected non-scalable operation");
6243 if (HasWinCFI)
6244 *HasWinCFI = true;
6245 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6246 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6247 .addImm(Imm)
6248 .setMIFlag(Flag);
6249 }
6250 }
6251
6252 SrcReg = TmpReg;
6253 } while (Offset);
6254}
6255
6258 unsigned DestReg, unsigned SrcReg,
6260 MachineInstr::MIFlag Flag, bool SetNZCV,
6261 bool NeedsWinCFI, bool *HasWinCFI,
6262 bool EmitCFAOffset, StackOffset CFAOffset,
6263 unsigned FrameReg) {
6264 // If a function is marked as arm_locally_streaming, then the runtime value of
6265 // vscale in the prologue/epilogue is different the runtime value of vscale
6266 // in the function's body. To avoid having to consider multiple vscales,
6267 // we can use `addsvl` to allocate any scalable stack-slots, which under
6268 // most circumstances will be only locals, not callee-save slots.
6269 const Function &F = MBB.getParent()->getFunction();
6270 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6271
6272 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6273 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6274 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6275
6276 // Insert ADDSXri for scalable offset at the end.
6277 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6278 if (NeedsFinalDefNZCV)
6279 SetNZCV = false;
6280
6281 // First emit non-scalable frame offsets, or a simple 'mov'.
6282 if (Bytes || (!Offset && SrcReg != DestReg)) {
6283 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6284 "SP increment/decrement not 8-byte aligned");
6285 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6286 if (Bytes < 0) {
6287 Bytes = -Bytes;
6288 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6289 }
6290 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6291 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6292 FrameReg);
6293 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6294 ? StackOffset::getFixed(-Bytes)
6295 : StackOffset::getFixed(Bytes);
6296 SrcReg = DestReg;
6297 FrameReg = DestReg;
6298 }
6299
6300 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6301 "WinCFI can't allocate fractions of an SVE data vector");
6302
6303 if (NumDataVectors) {
6304 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6305 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6306 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6307 FrameReg);
6308 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6309 SrcReg = DestReg;
6310 }
6311
6312 if (NumPredicateVectors) {
6313 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6314 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6315 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6316 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6317 FrameReg);
6318 }
6319
6320 if (NeedsFinalDefNZCV)
6321 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6322 .addReg(DestReg)
6323 .addImm(0)
6324 .addImm(0);
6325}
6326
6329 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6330 LiveIntervals *LIS, VirtRegMap *VRM) const {
6331 // This is a bit of a hack. Consider this instruction:
6332 //
6333 // %0 = COPY %sp; GPR64all:%0
6334 //
6335 // We explicitly chose GPR64all for the virtual register so such a copy might
6336 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6337 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6338 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6339 //
6340 // To prevent that, we are going to constrain the %0 register class here.
6341 if (MI.isFullCopy()) {
6342 Register DstReg = MI.getOperand(0).getReg();
6343 Register SrcReg = MI.getOperand(1).getReg();
6344 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6345 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6346 return nullptr;
6347 }
6348 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6349 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6350 return nullptr;
6351 }
6352 // Nothing can folded with copy from/to NZCV.
6353 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6354 return nullptr;
6355 }
6356
6357 // Handle the case where a copy is being spilled or filled but the source
6358 // and destination register class don't match. For example:
6359 //
6360 // %0 = COPY %xzr; GPR64common:%0
6361 //
6362 // In this case we can still safely fold away the COPY and generate the
6363 // following spill code:
6364 //
6365 // STRXui %xzr, %stack.0
6366 //
6367 // This also eliminates spilled cross register class COPYs (e.g. between x and
6368 // d regs) of the same size. For example:
6369 //
6370 // %0 = COPY %1; GPR64:%0, FPR64:%1
6371 //
6372 // will be filled as
6373 //
6374 // LDRDui %0, fi<#0>
6375 //
6376 // instead of
6377 //
6378 // LDRXui %Temp, fi<#0>
6379 // %0 = FMOV %Temp
6380 //
6381 if (MI.isCopy() && Ops.size() == 1 &&
6382 // Make sure we're only folding the explicit COPY defs/uses.
6383 (Ops[0] == 0 || Ops[0] == 1)) {
6384 bool IsSpill = Ops[0] == 0;
6385 bool IsFill = !IsSpill;
6387 const MachineRegisterInfo &MRI = MF.getRegInfo();
6388 MachineBasicBlock &MBB = *MI.getParent();
6389 const MachineOperand &DstMO = MI.getOperand(0);
6390 const MachineOperand &SrcMO = MI.getOperand(1);
6391 Register DstReg = DstMO.getReg();
6392 Register SrcReg = SrcMO.getReg();
6393 // This is slightly expensive to compute for physical regs since
6394 // getMinimalPhysRegClass is slow.
6395 auto getRegClass = [&](unsigned Reg) {
6396 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6397 : TRI.getMinimalPhysRegClass(Reg);
6398 };
6399
6400 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6401 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6402 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6403 "Mismatched register size in non subreg COPY");
6404 if (IsSpill)
6405 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6406 getRegClass(SrcReg), &TRI, Register());
6407 else
6408 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6409 getRegClass(DstReg), &TRI, Register());
6410 return &*--InsertPt;
6411 }
6412
6413 // Handle cases like spilling def of:
6414 //
6415 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6416 //
6417 // where the physical register source can be widened and stored to the full
6418 // virtual reg destination stack slot, in this case producing:
6419 //
6420 // STRXui %xzr, %stack.0
6421 //
6422 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6423 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6424 assert(SrcMO.getSubReg() == 0 &&
6425 "Unexpected subreg on physical register");
6426 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6427 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6428 Register());
6429 return &*--InsertPt;
6430 }
6431
6432 // Handle cases like filling use of:
6433 //
6434 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6435 //
6436 // where we can load the full virtual reg source stack slot, into the subreg
6437 // destination, in this case producing:
6438 //
6439 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6440 //
6441 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6442 const TargetRegisterClass *FillRC = nullptr;
6443 switch (DstMO.getSubReg()) {
6444 default:
6445 break;
6446 case AArch64::sub_32:
6447 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6448 FillRC = &AArch64::GPR32RegClass;
6449 break;
6450 case AArch64::ssub:
6451 FillRC = &AArch64::FPR32RegClass;
6452 break;
6453 case AArch64::dsub:
6454 FillRC = &AArch64::FPR64RegClass;
6455 break;
6456 }
6457
6458 if (FillRC) {
6459 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6460 TRI.getRegSizeInBits(*FillRC) &&
6461 "Mismatched regclass size on folded subreg COPY");
6462 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6463 Register());
6464 MachineInstr &LoadMI = *--InsertPt;
6465 MachineOperand &LoadDst = LoadMI.getOperand(0);
6466 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6467 LoadDst.setSubReg(DstMO.getSubReg());
6468 LoadDst.setIsUndef();
6469 return &LoadMI;
6470 }
6471 }
6472 }
6473
6474 // Cannot fold.
6475 return nullptr;
6476}
6477
6479 StackOffset &SOffset,
6480 bool *OutUseUnscaledOp,
6481 unsigned *OutUnscaledOp,
6482 int64_t *EmittableOffset) {
6483 // Set output values in case of early exit.
6484 if (EmittableOffset)
6485 *EmittableOffset = 0;
6486 if (OutUseUnscaledOp)
6487 *OutUseUnscaledOp = false;
6488 if (OutUnscaledOp)
6489 *OutUnscaledOp = 0;
6490
6491 // Exit early for structured vector spills/fills as they can't take an
6492 // immediate offset.
6493 switch (MI.getOpcode()) {
6494 default:
6495 break;
6496 case AArch64::LD1Rv1d:
6497 case AArch64::LD1Rv2s:
6498 case AArch64::LD1Rv2d:
6499 case AArch64::LD1Rv4h:
6500 case AArch64::LD1Rv4s:
6501 case AArch64::LD1Rv8b:
6502 case AArch64::LD1Rv8h:
6503 case AArch64::LD1Rv16b:
6504 case AArch64::LD1Twov2d:
6505 case AArch64::LD1Threev2d:
6506 case AArch64::LD1Fourv2d:
6507 case AArch64::LD1Twov1d:
6508 case AArch64::LD1Threev1d:
6509 case AArch64::LD1Fourv1d:
6510 case AArch64::ST1Twov2d:
6511 case AArch64::ST1Threev2d:
6512 case AArch64::ST1Fourv2d:
6513 case AArch64::ST1Twov1d:
6514 case AArch64::ST1Threev1d:
6515 case AArch64::ST1Fourv1d:
6516 case AArch64::ST1i8:
6517 case AArch64::ST1i16:
6518 case AArch64::ST1i32:
6519 case AArch64::ST1i64:
6520 case AArch64::IRG:
6521 case AArch64::IRGstack:
6522 case AArch64::STGloop:
6523 case AArch64::STZGloop:
6525 }
6526
6527 // Get the min/max offset and the scale.
6528 TypeSize ScaleValue(0U, false), Width(0U, false);
6529 int64_t MinOff, MaxOff;
6530 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6531 MaxOff))
6532 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6533
6534 // Construct the complete offset.
6535 bool IsMulVL = ScaleValue.isScalable();
6536 unsigned Scale = ScaleValue.getKnownMinValue();
6537 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6538
6539 const MachineOperand &ImmOpnd =
6540 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6541 Offset += ImmOpnd.getImm() * Scale;
6542
6543 // If the offset doesn't match the scale, we rewrite the instruction to
6544 // use the unscaled instruction instead. Likewise, if we have a negative
6545 // offset and there is an unscaled op to use.
6546 std::optional<unsigned> UnscaledOp =
6548 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6549 if (useUnscaledOp &&
6550 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6551 MaxOff))
6552 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6553
6554 Scale = ScaleValue.getKnownMinValue();
6555 assert(IsMulVL == ScaleValue.isScalable() &&
6556 "Unscaled opcode has different value for scalable");
6557
6558 int64_t Remainder = Offset % Scale;
6559 assert(!(Remainder && useUnscaledOp) &&
6560 "Cannot have remainder when using unscaled op");
6561
6562 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6563 int64_t NewOffset = Offset / Scale;
6564 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6565 Offset = Remainder;
6566 else {
6567 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6568 Offset = Offset - (NewOffset * Scale);
6569 }
6570
6571 if (EmittableOffset)
6572 *EmittableOffset = NewOffset;
6573 if (OutUseUnscaledOp)
6574 *OutUseUnscaledOp = useUnscaledOp;
6575 if (OutUnscaledOp && UnscaledOp)
6576 *OutUnscaledOp = *UnscaledOp;
6577
6578 if (IsMulVL)
6579 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6580 else
6581 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6583 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6584}
6585
6587 unsigned FrameReg, StackOffset &Offset,
6588 const AArch64InstrInfo *TII) {
6589 unsigned Opcode = MI.getOpcode();
6590 unsigned ImmIdx = FrameRegIdx + 1;
6591
6592 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6593 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6594 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6595 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6596 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6597 MI.eraseFromParent();
6598 Offset = StackOffset();
6599 return true;
6600 }
6601
6602 int64_t NewOffset;
6603 unsigned UnscaledOp;
6604 bool UseUnscaledOp;
6605 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6606 &UnscaledOp, &NewOffset);
6609 // Replace the FrameIndex with FrameReg.
6610 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6611 if (UseUnscaledOp)
6612 MI.setDesc(TII->get(UnscaledOp));
6613
6614 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6615 return !Offset;
6616 }
6617
6618 return false;
6619}
6620
6626
6628 return MCInstBuilder(AArch64::HINT).addImm(0);
6629}
6630
6631// AArch64 supports MachineCombiner.
6632bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6633
6634// True when Opc sets flag
6635static bool isCombineInstrSettingFlag(unsigned Opc) {
6636 switch (Opc) {
6637 case AArch64::ADDSWrr:
6638 case AArch64::ADDSWri:
6639 case AArch64::ADDSXrr:
6640 case AArch64::ADDSXri:
6641 case AArch64::SUBSWrr:
6642 case AArch64::SUBSXrr:
6643 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6644 case AArch64::SUBSWri:
6645 case AArch64::SUBSXri:
6646 return true;
6647 default:
6648 break;
6649 }
6650 return false;
6651}
6652
6653// 32b Opcodes that can be combined with a MUL
6654static bool isCombineInstrCandidate32(unsigned Opc) {
6655 switch (Opc) {
6656 case AArch64::ADDWrr:
6657 case AArch64::ADDWri:
6658 case AArch64::SUBWrr:
6659 case AArch64::ADDSWrr:
6660 case AArch64::ADDSWri:
6661 case AArch64::SUBSWrr:
6662 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6663 case AArch64::SUBWri:
6664 case AArch64::SUBSWri:
6665 return true;
6666 default:
6667 break;
6668 }
6669 return false;
6670}
6671
6672// 64b Opcodes that can be combined with a MUL
6673static bool isCombineInstrCandidate64(unsigned Opc) {
6674 switch (Opc) {
6675 case AArch64::ADDXrr:
6676 case AArch64::ADDXri:
6677 case AArch64::SUBXrr:
6678 case AArch64::ADDSXrr:
6679 case AArch64::ADDSXri:
6680 case AArch64::SUBSXrr:
6681 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6682 case AArch64::SUBXri:
6683 case AArch64::SUBSXri:
6684 case AArch64::ADDv8i8:
6685 case AArch64::ADDv16i8:
6686 case AArch64::ADDv4i16:
6687 case AArch64::ADDv8i16:
6688 case AArch64::ADDv2i32:
6689 case AArch64::ADDv4i32:
6690 case AArch64::SUBv8i8:
6691 case AArch64::SUBv16i8:
6692 case AArch64::SUBv4i16:
6693 case AArch64::SUBv8i16:
6694 case AArch64::SUBv2i32:
6695 case AArch64::SUBv4i32:
6696 return true;
6697 default:
6698 break;
6699 }
6700 return false;
6701}
6702
6703// FP Opcodes that can be combined with a FMUL.
6704static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6705 switch (Inst.getOpcode()) {
6706 default:
6707 break;
6708 case AArch64::FADDHrr:
6709 case AArch64::FADDSrr:
6710 case AArch64::FADDDrr:
6711 case AArch64::FADDv4f16:
6712 case AArch64::FADDv8f16:
6713 case AArch64::FADDv2f32:
6714 case AArch64::FADDv2f64:
6715 case AArch64::FADDv4f32:
6716 case AArch64::FSUBHrr:
6717 case AArch64::FSUBSrr:
6718 case AArch64::FSUBDrr:
6719 case AArch64::FSUBv4f16:
6720 case AArch64::FSUBv8f16:
6721 case AArch64::FSUBv2f32:
6722 case AArch64::FSUBv2f64:
6723 case AArch64::FSUBv4f32:
6725 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6726 // the target options or if FADD/FSUB has the contract fast-math flag.
6727 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6729 }
6730 return false;
6731}
6732
6733// Opcodes that can be combined with a MUL
6737
6738//
6739// Utility routine that checks if \param MO is defined by an
6740// \param CombineOpc instruction in the basic block \param MBB
6742 unsigned CombineOpc, unsigned ZeroReg = 0,
6743 bool CheckZeroReg = false) {
6744 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6745 MachineInstr *MI = nullptr;
6746
6747 if (MO.isReg() && MO.getReg().isVirtual())
6748 MI = MRI.getUniqueVRegDef(MO.getReg());
6749 // And it needs to be in the trace (otherwise, it won't have a depth).
6750 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
6751 return false;
6752 // Must only used by the user we combine with.
6753 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6754 return false;
6755
6756 if (CheckZeroReg) {
6757 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6758 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6759 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6760 // The third input reg must be zero.
6761 if (MI->getOperand(3).getReg() != ZeroReg)
6762 return false;
6763 }
6764
6765 if (isCombineInstrSettingFlag(CombineOpc) &&
6766 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6767 return false;
6768
6769 return true;
6770}
6771
6772//
6773// Is \param MO defined by an integer multiply and can be combined?
6775 unsigned MulOpc, unsigned ZeroReg) {
6776 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6777}
6778
6779//
6780// Is \param MO defined by a floating-point multiply and can be combined?
6782 unsigned MulOpc) {
6783 return canCombine(MBB, MO, MulOpc);
6784}
6785
6786// TODO: There are many more machine instruction opcodes to match:
6787// 1. Other data types (integer, vectors)
6788// 2. Other math / logic operations (xor, or)
6789// 3. Other forms of the same operation (intrinsics and other variants)
6790bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6791 bool Invert) const {
6792 if (Invert)
6793 return false;
6794 switch (Inst.getOpcode()) {
6795 // == Floating-point types ==
6796 // -- Floating-point instructions --
6797 case AArch64::FADDHrr:
6798 case AArch64::FADDSrr:
6799 case AArch64::FADDDrr:
6800 case AArch64::FMULHrr:
6801 case AArch64::FMULSrr:
6802 case AArch64::FMULDrr:
6803 case AArch64::FMULX16:
6804 case AArch64::FMULX32:
6805 case AArch64::FMULX64:
6806 // -- Advanced SIMD instructions --
6807 case AArch64::FADDv4f16:
6808 case AArch64::FADDv8f16:
6809 case AArch64::FADDv2f32:
6810 case AArch64::FADDv4f32:
6811 case AArch64::FADDv2f64:
6812 case AArch64::FMULv4f16:
6813 case AArch64::FMULv8f16:
6814 case AArch64::FMULv2f32:
6815 case AArch64::FMULv4f32:
6816 case AArch64::FMULv2f64:
6817 case AArch64::FMULXv4f16:
6818 case AArch64::FMULXv8f16:
6819 case AArch64::FMULXv2f32:
6820 case AArch64::FMULXv4f32:
6821 case AArch64::FMULXv2f64:
6822 // -- SVE instructions --
6823 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6824 // in the SVE instruction set (though there are predicated ones).
6825 case AArch64::FADD_ZZZ_H:
6826 case AArch64::FADD_ZZZ_S:
6827 case AArch64::FADD_ZZZ_D:
6828 case AArch64::FMUL_ZZZ_H:
6829 case AArch64::FMUL_ZZZ_S:
6830 case AArch64::FMUL_ZZZ_D:
6833
6834 // == Integer types ==
6835 // -- Base instructions --
6836 // Opcodes MULWrr and MULXrr don't exist because
6837 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6838 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6839 // The machine-combiner does not support three-source-operands machine
6840 // instruction. So we cannot reassociate MULs.
6841 case AArch64::ADDWrr:
6842 case AArch64::ADDXrr:
6843 case AArch64::ANDWrr:
6844 case AArch64::ANDXrr:
6845 case AArch64::ORRWrr:
6846 case AArch64::ORRXrr:
6847 case AArch64::EORWrr:
6848 case AArch64::EORXrr:
6849 case AArch64::EONWrr:
6850 case AArch64::EONXrr:
6851 // -- Advanced SIMD instructions --
6852 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6853 // in the Advanced SIMD instruction set.
6854 case AArch64::ADDv8i8:
6855 case AArch64::ADDv16i8:
6856 case AArch64::ADDv4i16:
6857 case AArch64::ADDv8i16:
6858 case AArch64::ADDv2i32:
6859 case AArch64::ADDv4i32:
6860 case AArch64::ADDv1i64:
6861 case AArch64::ADDv2i64:
6862 case AArch64::MULv8i8:
6863 case AArch64::MULv16i8:
6864 case AArch64::MULv4i16:
6865 case AArch64::MULv8i16:
6866 case AArch64::MULv2i32:
6867 case AArch64::MULv4i32:
6868 case AArch64::ANDv8i8:
6869 case AArch64::ANDv16i8:
6870 case AArch64::ORRv8i8:
6871 case AArch64::ORRv16i8:
6872 case AArch64::EORv8i8:
6873 case AArch64::EORv16i8:
6874 // -- SVE instructions --
6875 case AArch64::ADD_ZZZ_B:
6876 case AArch64::ADD_ZZZ_H:
6877 case AArch64::ADD_ZZZ_S:
6878 case AArch64::ADD_ZZZ_D:
6879 case AArch64::MUL_ZZZ_B:
6880 case AArch64::MUL_ZZZ_H:
6881 case AArch64::MUL_ZZZ_S:
6882 case AArch64::MUL_ZZZ_D:
6883 case AArch64::AND_ZZZ:
6884 case AArch64::ORR_ZZZ:
6885 case AArch64::EOR_ZZZ:
6886 return true;
6887
6888 default:
6889 return false;
6890 }
6891}
6892
6893/// Find instructions that can be turned into madd.
6895 SmallVectorImpl<unsigned> &Patterns) {
6896 unsigned Opc = Root.getOpcode();
6897 MachineBasicBlock &MBB = *Root.getParent();
6898 bool Found = false;
6899
6901 return false;
6903 int Cmp_NZCV =
6904 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6905 // When NZCV is live bail out.
6906 if (Cmp_NZCV == -1)
6907 return false;
6908 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6909 // When opcode can't change bail out.
6910 // CHECKME: do we miss any cases for opcode conversion?
6911 if (NewOpc == Opc)
6912 return false;
6913 Opc = NewOpc;
6914 }
6915
6916 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6917 unsigned Pattern) {
6918 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6919 Patterns.push_back(Pattern);
6920 Found = true;
6921 }
6922 };
6923
6924 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6925 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6926 Patterns.push_back(Pattern);
6927 Found = true;
6928 }
6929 };
6930
6932
6933 switch (Opc) {
6934 default:
6935 break;
6936 case AArch64::ADDWrr:
6937 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6938 "ADDWrr does not have register operands");
6939 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6940 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6941 break;
6942 case AArch64::ADDXrr:
6943 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6944 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6945 break;
6946 case AArch64::SUBWrr:
6947 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6948 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6949 break;
6950 case AArch64::SUBXrr:
6951 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6952 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6953 break;
6954 case AArch64::ADDWri:
6955 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6956 break;
6957 case AArch64::ADDXri:
6958 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6959 break;
6960 case AArch64::SUBWri:
6961 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6962 break;
6963 case AArch64::SUBXri:
6964 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6965 break;
6966 case AArch64::ADDv8i8:
6967 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6968 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6969 break;
6970 case AArch64::ADDv16i8:
6971 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6972 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6973 break;
6974 case AArch64::ADDv4i16:
6975 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6976 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6977 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6978 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6979 break;
6980 case AArch64::ADDv8i16:
6981 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6982 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6983 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6984 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6985 break;
6986 case AArch64::ADDv2i32:
6987 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6988 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6989 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6990 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6991 break;
6992 case AArch64::ADDv4i32:
6993 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6994 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6995 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6996 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6997 break;
6998 case AArch64::SUBv8i8:
6999 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7000 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7001 break;
7002 case AArch64::SUBv16i8:
7003 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7004 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7005 break;
7006 case AArch64::SUBv4i16:
7007 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7008 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7009 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7010 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7011 break;
7012 case AArch64::SUBv8i16:
7013 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7014 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7015 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7016 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7017 break;
7018 case AArch64::SUBv2i32:
7019 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7020 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7021 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7022 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7023 break;
7024 case AArch64::SUBv4i32:
7025 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7026 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7027 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7028 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7029 break;
7030 }
7031 return Found;
7032}
7033
7034bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7035 switch (Opcode) {
7036 default:
7037 break;
7038 case AArch64::UABALB_ZZZ_D:
7039 case AArch64::UABALB_ZZZ_H:
7040 case AArch64::UABALB_ZZZ_S:
7041 case AArch64::UABALT_ZZZ_D:
7042 case AArch64::UABALT_ZZZ_H:
7043 case AArch64::UABALT_ZZZ_S:
7044 case AArch64::SABALB_ZZZ_D:
7045 case AArch64::SABALB_ZZZ_S:
7046 case AArch64::SABALB_ZZZ_H:
7047 case AArch64::SABALT_ZZZ_D:
7048 case AArch64::SABALT_ZZZ_S:
7049 case AArch64::SABALT_ZZZ_H:
7050 case AArch64::UABALv16i8_v8i16:
7051 case AArch64::UABALv2i32_v2i64:
7052 case AArch64::UABALv4i16_v4i32:
7053 case AArch64::UABALv4i32_v2i64:
7054 case AArch64::UABALv8i16_v4i32:
7055 case AArch64::UABALv8i8_v8i16:
7056 case AArch64::UABAv16i8:
7057 case AArch64::UABAv2i32:
7058 case AArch64::UABAv4i16:
7059 case AArch64::UABAv4i32:
7060 case AArch64::UABAv8i16:
7061 case AArch64::UABAv8i8:
7062 case AArch64::SABALv16i8_v8i16:
7063 case AArch64::SABALv2i32_v2i64:
7064 case AArch64::SABALv4i16_v4i32:
7065 case AArch64::SABALv4i32_v2i64:
7066 case AArch64::SABALv8i16_v4i32:
7067 case AArch64::SABALv8i8_v8i16:
7068 case AArch64::SABAv16i8:
7069 case AArch64::SABAv2i32:
7070 case AArch64::SABAv4i16:
7071 case AArch64::SABAv4i32:
7072 case AArch64::SABAv8i16:
7073 case AArch64::SABAv8i8:
7074 return true;
7075 }
7076
7077 return false;
7078}
7079
7080unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7081 unsigned AccumulationOpcode) const {
7082 switch (AccumulationOpcode) {
7083 default:
7084 llvm_unreachable("Unsupported accumulation Opcode!");
7085 case AArch64::UABALB_ZZZ_D:
7086 return AArch64::UABDLB_ZZZ_D;
7087 case AArch64::UABALB_ZZZ_H:
7088 return AArch64::UABDLB_ZZZ_H;
7089 case AArch64::UABALB_ZZZ_S:
7090 return AArch64::UABDLB_ZZZ_S;
7091 case AArch64::UABALT_ZZZ_D:
7092 return AArch64::UABDLT_ZZZ_D;
7093 case AArch64::UABALT_ZZZ_H:
7094 return AArch64::UABDLT_ZZZ_H;
7095 case AArch64::UABALT_ZZZ_S:
7096 return AArch64::UABDLT_ZZZ_S;
7097 case AArch64::UABALv16i8_v8i16:
7098 return AArch64::UABDLv16i8_v8i16;
7099 case AArch64::UABALv2i32_v2i64:
7100 return AArch64::UABDLv2i32_v2i64;
7101 case AArch64::UABALv4i16_v4i32:
7102 return AArch64::UABDLv4i16_v4i32;
7103 case AArch64::UABALv4i32_v2i64:
7104 return AArch64::UABDLv4i32_v2i64;
7105 case AArch64::UABALv8i16_v4i32:
7106 return AArch64::UABDLv8i16_v4i32;
7107 case AArch64::UABALv8i8_v8i16:
7108 return AArch64::UABDLv8i8_v8i16;
7109 case AArch64::UABAv16i8:
7110 return AArch64::UABDv16i8;
7111 case AArch64::UABAv2i32:
7112 return AArch64::UABDv2i32;
7113 case AArch64::UABAv4i16:
7114 return AArch64::UABDv4i16;
7115 case AArch64::UABAv4i32:
7116 return AArch64::UABDv4i32;
7117 case AArch64::UABAv8i16:
7118 return AArch64::UABDv8i16;
7119 case AArch64::UABAv8i8:
7120 return AArch64::UABDv8i8;
7121 case AArch64::SABALB_ZZZ_D:
7122 return AArch64::SABDLB_ZZZ_D;
7123 case AArch64::SABALB_ZZZ_S:
7124 return AArch64::SABDLB_ZZZ_S;
7125 case AArch64::SABALB_ZZZ_H:
7126 return AArch64::SABDLB_ZZZ_H;
7127 case AArch64::SABALT_ZZZ_D:
7128 return AArch64::SABDLT_ZZZ_D;
7129 case AArch64::SABALT_ZZZ_S:
7130 return AArch64::SABDLT_ZZZ_S;
7131 case AArch64::SABALT_ZZZ_H:
7132 return AArch64::SABDLT_ZZZ_H;
7133 case AArch64::SABALv16i8_v8i16:
7134 return AArch64::SABDLv16i8_v8i16;
7135 case AArch64::SABALv2i32_v2i64:
7136 return AArch64::SABDLv2i32_v2i64;
7137 case AArch64::SABALv4i16_v4i32:
7138 return AArch64::SABDLv4i16_v4i32;
7139 case AArch64::SABALv4i32_v2i64:
7140 return AArch64::SABDLv4i32_v2i64;
7141 case AArch64::SABALv8i16_v4i32:
7142 return AArch64::SABDLv8i16_v4i32;
7143 case AArch64::SABALv8i8_v8i16:
7144 return AArch64::SABDLv8i8_v8i16;
7145 case AArch64::SABAv16i8:
7146 return AArch64::SABDv16i8;
7147 case AArch64::SABAv2i32:
7148 return AArch64::SABAv2i32;
7149 case AArch64::SABAv4i16:
7150 return AArch64::SABDv4i16;
7151 case AArch64::SABAv4i32:
7152 return AArch64::SABDv4i32;
7153 case AArch64::SABAv8i16:
7154 return AArch64::SABDv8i16;
7155 case AArch64::SABAv8i8:
7156 return AArch64::SABDv8i8;
7157 }
7158}
7159
7160/// Floating-Point Support
7161
7162/// Find instructions that can be turned into madd.
7164 SmallVectorImpl<unsigned> &Patterns) {
7165
7166 if (!isCombineInstrCandidateFP(Root))
7167 return false;
7168
7169 MachineBasicBlock &MBB = *Root.getParent();
7170 bool Found = false;
7171
7172 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7173 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7174 Patterns.push_back(Pattern);
7175 return true;
7176 }
7177 return false;
7178 };
7179
7181
7182 switch (Root.getOpcode()) {
7183 default:
7184 assert(false && "Unsupported FP instruction in combiner\n");
7185 break;
7186 case AArch64::FADDHrr:
7187 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7188 "FADDHrr does not have register operands");
7189
7190 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7191 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7192 break;
7193 case AArch64::FADDSrr:
7194 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7195 "FADDSrr does not have register operands");
7196
7197 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7198 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7199
7200 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7201 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7202 break;
7203 case AArch64::FADDDrr:
7204 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7205 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7206
7207 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7208 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7209 break;
7210 case AArch64::FADDv4f16:
7211 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7212 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7213
7214 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7215 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7216 break;
7217 case AArch64::FADDv8f16:
7218 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7219 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7220
7221 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7222 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7223 break;
7224 case AArch64::FADDv2f32:
7225 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7226 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7227
7228 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7229 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7230 break;
7231 case AArch64::FADDv2f64:
7232 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7233 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7234
7235 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7236 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7237 break;
7238 case AArch64::FADDv4f32:
7239 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7240 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7241
7242 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7243 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7244 break;
7245 case AArch64::FSUBHrr:
7246 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7247 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7248 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7249 break;
7250 case AArch64::FSUBSrr:
7251 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7252
7253 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7254 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7255
7256 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7257 break;
7258 case AArch64::FSUBDrr:
7259 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7260
7261 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7262 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7263
7264 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7265 break;
7266 case AArch64::FSUBv4f16:
7267 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7268 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7269
7270 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7271 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7272 break;
7273 case AArch64::FSUBv8f16:
7274 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7275 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7276
7277 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7278 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7279 break;
7280 case AArch64::FSUBv2f32:
7281 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7282 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7283
7284 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7285 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7286 break;
7287 case AArch64::FSUBv2f64:
7288 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7289 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7290
7291 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7292 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7293 break;
7294 case AArch64::FSUBv4f32:
7295 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7296 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7297
7298 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7299 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7300 break;
7301 }
7302 return Found;
7303}
7304
7306 SmallVectorImpl<unsigned> &Patterns) {
7307 MachineBasicBlock &MBB = *Root.getParent();
7308 bool Found = false;
7309
7310 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7311 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7312 MachineOperand &MO = Root.getOperand(Operand);
7313 MachineInstr *MI = nullptr;
7314 if (MO.isReg() && MO.getReg().isVirtual())
7315 MI = MRI.getUniqueVRegDef(MO.getReg());
7316 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7317 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7318 MI->getOperand(1).getReg().isVirtual())
7319 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7320 if (MI && MI->getOpcode() == Opcode) {
7321 Patterns.push_back(Pattern);
7322 return true;
7323 }
7324 return false;
7325 };
7326
7328
7329 switch (Root.getOpcode()) {
7330 default:
7331 return false;
7332 case AArch64::FMULv2f32:
7333 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7334 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7335 break;
7336 case AArch64::FMULv2f64:
7337 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7338 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7339 break;
7340 case AArch64::FMULv4f16:
7341 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7342 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7343 break;
7344 case AArch64::FMULv4f32:
7345 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7346 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7347 break;
7348 case AArch64::FMULv8f16:
7349 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7350 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7351 break;
7352 }
7353
7354 return Found;
7355}
7356
7358 SmallVectorImpl<unsigned> &Patterns) {
7359 unsigned Opc = Root.getOpcode();
7360 MachineBasicBlock &MBB = *Root.getParent();
7361 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7362
7363 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7364 MachineOperand &MO = Root.getOperand(1);
7365 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7366 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7367 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7371 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7372 Patterns.push_back(Pattern);
7373 return true;
7374 }
7375 return false;
7376 };
7377
7378 switch (Opc) {
7379 default:
7380 break;
7381 case AArch64::FNEGDr:
7382 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7383 case AArch64::FNEGSr:
7384 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7385 }
7386
7387 return false;
7388}
7389
7390/// Return true when a code sequence can improve throughput. It
7391/// should be called only for instructions in loops.
7392/// \param Pattern - combiner pattern
7394 switch (Pattern) {
7395 default:
7396 break;
7502 return true;
7503 } // end switch (Pattern)
7504 return false;
7505}
7506
7507/// Find other MI combine patterns.
7509 SmallVectorImpl<unsigned> &Patterns) {
7510 // A - (B + C) ==> (A - B) - C or (A - C) - B
7511 unsigned Opc = Root.getOpcode();
7512 MachineBasicBlock &MBB = *Root.getParent();
7513
7514 switch (Opc) {
7515 case AArch64::SUBWrr:
7516 case AArch64::SUBSWrr:
7517 case AArch64::SUBXrr:
7518 case AArch64::SUBSXrr:
7519 // Found candidate root.
7520 break;
7521 default:
7522 return false;
7523 }
7524
7526 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7527 -1)
7528 return false;
7529
7530 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7531 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7532 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7533 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7536 return true;
7537 }
7538
7539 return false;
7540}
7541
7542/// Check if the given instruction forms a gather load pattern that can be
7543/// optimized for better Memory-Level Parallelism (MLP). This function
7544/// identifies chains of NEON lane load instructions that load data from
7545/// different memory addresses into individual lanes of a 128-bit vector
7546/// register, then attempts to split the pattern into parallel loads to break
7547/// the serial dependency between instructions.
7548///
7549/// Pattern Matched:
7550/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7551/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7552///
7553/// Transformed Into:
7554/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7555/// to combine the results, enabling better memory-level parallelism.
7556///
7557/// Supported Element Types:
7558/// - 32-bit elements (LD1i32, 4 lanes total)
7559/// - 16-bit elements (LD1i16, 8 lanes total)
7560/// - 8-bit elements (LD1i8, 16 lanes total)
7562 SmallVectorImpl<unsigned> &Patterns,
7563 unsigned LoadLaneOpCode, unsigned NumLanes) {
7564 const MachineFunction *MF = Root.getMF();
7565
7566 // Early exit if optimizing for size.
7567 if (MF->getFunction().hasMinSize())
7568 return false;
7569
7570 const MachineRegisterInfo &MRI = MF->getRegInfo();
7572
7573 // The root of the pattern must load into the last lane of the vector.
7574 if (Root.getOperand(2).getImm() != NumLanes - 1)
7575 return false;
7576
7577 // Check that we have load into all lanes except lane 0.
7578 // For each load we also want to check that:
7579 // 1. It has a single non-debug use (since we will be replacing the virtual
7580 // register)
7581 // 2. That the addressing mode only uses a single pointer operand
7582 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7583 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7584 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7586 while (!RemainingLanes.empty() && CurrInstr &&
7587 CurrInstr->getOpcode() == LoadLaneOpCode &&
7588 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7589 CurrInstr->getNumOperands() == 4) {
7590 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7591 LoadInstrs.push_back(CurrInstr);
7592 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7593 }
7594
7595 // Check that we have found a match for lanes N-1.. 1.
7596 if (!RemainingLanes.empty())
7597 return false;
7598
7599 // Match the SUBREG_TO_REG sequence.
7600 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7601 return false;
7602
7603 // Verify that the subreg to reg loads an integer into the first lane.
7604 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7605 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7606 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7607 return false;
7608
7609 // Verify that it also has a single non debug use.
7610 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7611 return false;
7612
7613 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7614
7615 // If there is any chance of aliasing, do not apply the pattern.
7616 // Walk backward through the MBB starting from Root.
7617 // Exit early if we've encountered all load instructions or hit the search
7618 // limit.
7619 auto MBBItr = Root.getIterator();
7620 unsigned RemainingSteps = GatherOptSearchLimit;
7621 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7622 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7623 const MachineBasicBlock *MBB = Root.getParent();
7624
7625 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7626 !RemainingLoadInstrs.empty();
7627 --MBBItr, --RemainingSteps) {
7628 const MachineInstr &CurrInstr = *MBBItr;
7629
7630 // Remove this instruction from remaining loads if it's one we're tracking.
7631 RemainingLoadInstrs.erase(&CurrInstr);
7632
7633 // Check for potential aliasing with any of the load instructions to
7634 // optimize.
7635 if (CurrInstr.isLoadFoldBarrier())
7636 return false;
7637 }
7638
7639 // If we hit the search limit without finding all load instructions,
7640 // don't match the pattern.
7641 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7642 return false;
7643
7644 switch (NumLanes) {
7645 case 4:
7647 break;
7648 case 8:
7650 break;
7651 case 16:
7653 break;
7654 default:
7655 llvm_unreachable("Got bad number of lanes for gather pattern.");
7656 }
7657
7658 return true;
7659}
7660
7661/// Search for patterns of LD instructions we can optimize.
7663 SmallVectorImpl<unsigned> &Patterns) {
7664
7665 // The pattern searches for loads into single lanes.
7666 switch (Root.getOpcode()) {
7667 case AArch64::LD1i32:
7668 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7669 case AArch64::LD1i16:
7670 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7671 case AArch64::LD1i8:
7672 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7673 default:
7674 return false;
7675 }
7676}
7677
7678/// Generate optimized instruction sequence for gather load patterns to improve
7679/// Memory-Level Parallelism (MLP). This function transforms a chain of
7680/// sequential NEON lane loads into parallel vector loads that can execute
7681/// concurrently.
7682static void
7686 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7687 unsigned Pattern, unsigned NumLanes) {
7688 MachineFunction &MF = *Root.getParent()->getParent();
7691
7692 // Gather the initial load instructions to build the pattern.
7693 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7694 MachineInstr *CurrInstr = &Root;
7695 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7696 LoadToLaneInstrs.push_back(CurrInstr);
7697 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7698 }
7699
7700 // Sort the load instructions according to the lane.
7701 llvm::sort(LoadToLaneInstrs,
7702 [](const MachineInstr *A, const MachineInstr *B) {
7703 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7704 });
7705
7706 MachineInstr *SubregToReg = CurrInstr;
7707 LoadToLaneInstrs.push_back(
7708 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7709 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7710
7711 const TargetRegisterClass *FPR128RegClass =
7712 MRI.getRegClass(Root.getOperand(0).getReg());
7713
7714 // Helper lambda to create a LD1 instruction.
7715 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7716 Register SrcRegister, unsigned Lane,
7717 Register OffsetRegister,
7718 bool OffsetRegisterKillState) {
7719 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7720 MachineInstrBuilder LoadIndexIntoRegister =
7721 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7722 NewRegister)
7723 .addReg(SrcRegister)
7724 .addImm(Lane)
7725 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7726 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7727 InsInstrs.push_back(LoadIndexIntoRegister);
7728 return NewRegister;
7729 };
7730
7731 // Helper to create load instruction based on the NumLanes in the NEON
7732 // register we are rewriting.
7733 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7734 Register OffsetReg,
7735 bool KillState) -> MachineInstrBuilder {
7736 unsigned Opcode;
7737 switch (NumLanes) {
7738 case 4:
7739 Opcode = AArch64::LDRSui;
7740 break;
7741 case 8:
7742 Opcode = AArch64::LDRHui;
7743 break;
7744 case 16:
7745 Opcode = AArch64::LDRBui;
7746 break;
7747 default:
7749 "Got unsupported number of lanes in machine-combiner gather pattern");
7750 }
7751 // Immediate offset load
7752 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7753 .addReg(OffsetReg)
7754 .addImm(0);
7755 };
7756
7757 // Load the remaining lanes into register 0.
7758 auto LanesToLoadToReg0 =
7759 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7760 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7761 Register PrevReg = SubregToReg->getOperand(0).getReg();
7762 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7763 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7764 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7765 OffsetRegOperand.getReg(),
7766 OffsetRegOperand.isKill());
7767 DelInstrs.push_back(LoadInstr);
7768 }
7769 Register LastLoadReg0 = PrevReg;
7770
7771 // First load into register 1. Perform an integer load to zero out the upper
7772 // lanes in a single instruction.
7773 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7774 MachineInstr *OriginalSplitLoad =
7775 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7776 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7777 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7778
7779 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7780 OriginalSplitLoad->getOperand(3);
7781 MachineInstrBuilder MiddleIndexLoadInstr =
7782 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7783 OriginalSplitToLoadOffsetOperand.getReg(),
7784 OriginalSplitToLoadOffsetOperand.isKill());
7785
7786 InstrIdxForVirtReg.insert(
7787 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7788 InsInstrs.push_back(MiddleIndexLoadInstr);
7789 DelInstrs.push_back(OriginalSplitLoad);
7790
7791 // Subreg To Reg instruction for register 1.
7792 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7793 unsigned SubregType;
7794 switch (NumLanes) {
7795 case 4:
7796 SubregType = AArch64::ssub;
7797 break;
7798 case 8:
7799 SubregType = AArch64::hsub;
7800 break;
7801 case 16:
7802 SubregType = AArch64::bsub;
7803 break;
7804 default:
7806 "Got invalid NumLanes for machine-combiner gather pattern");
7807 }
7808
7809 auto SubRegToRegInstr =
7810 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7811 DestRegForSubregToReg)
7812 .addImm(0)
7813 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7814 .addImm(SubregType);
7815 InstrIdxForVirtReg.insert(
7816 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7817 InsInstrs.push_back(SubRegToRegInstr);
7818
7819 // Load remaining lanes into register 1.
7820 auto LanesToLoadToReg1 =
7821 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7822 LoadToLaneInstrsAscending.end());
7823 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7824 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7825 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7826 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7827 OffsetRegOperand.getReg(),
7828 OffsetRegOperand.isKill());
7829
7830 // Do not add the last reg to DelInstrs - it will be removed later.
7831 if (Index == NumLanes / 2 - 2) {
7832 break;
7833 }
7834 DelInstrs.push_back(LoadInstr);
7835 }
7836 Register LastLoadReg1 = PrevReg;
7837
7838 // Create the final zip instruction to combine the results.
7839 MachineInstrBuilder ZipInstr =
7840 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7841 Root.getOperand(0).getReg())
7842 .addReg(LastLoadReg0)
7843 .addReg(LastLoadReg1);
7844 InsInstrs.push_back(ZipInstr);
7845}
7846
7860
7861/// Return true when there is potentially a faster code sequence for an
7862/// instruction chain ending in \p Root. All potential patterns are listed in
7863/// the \p Pattern vector. Pattern should be sorted in priority order since the
7864/// pattern evaluator stops checking as soon as it finds a faster sequence.
7865
7866bool AArch64InstrInfo::getMachineCombinerPatterns(
7867 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7868 bool DoRegPressureReduce) const {
7869 // Integer patterns
7870 if (getMaddPatterns(Root, Patterns))
7871 return true;
7872 // Floating point patterns
7873 if (getFMULPatterns(Root, Patterns))
7874 return true;
7875 if (getFMAPatterns(Root, Patterns))
7876 return true;
7877 if (getFNEGPatterns(Root, Patterns))
7878 return true;
7879
7880 // Other patterns
7881 if (getMiscPatterns(Root, Patterns))
7882 return true;
7883
7884 // Load patterns
7885 if (getLoadPatterns(Root, Patterns))
7886 return true;
7887
7888 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7889 DoRegPressureReduce);
7890}
7891
7893/// genFusedMultiply - Generate fused multiply instructions.
7894/// This function supports both integer and floating point instructions.
7895/// A typical example:
7896/// F|MUL I=A,B,0
7897/// F|ADD R,I,C
7898/// ==> F|MADD R,A,B,C
7899/// \param MF Containing MachineFunction
7900/// \param MRI Register information
7901/// \param TII Target information
7902/// \param Root is the F|ADD instruction
7903/// \param [out] InsInstrs is a vector of machine instructions and will
7904/// contain the generated madd instruction
7905/// \param IdxMulOpd is index of operand in Root that is the result of
7906/// the F|MUL. In the example above IdxMulOpd is 1.
7907/// \param MaddOpc the opcode fo the f|madd instruction
7908/// \param RC Register class of operands
7909/// \param kind of fma instruction (addressing mode) to be generated
7910/// \param ReplacedAddend is the result register from the instruction
7911/// replacing the non-combined operand, if any.
7912static MachineInstr *
7914 const TargetInstrInfo *TII, MachineInstr &Root,
7915 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7916 unsigned MaddOpc, const TargetRegisterClass *RC,
7918 const Register *ReplacedAddend = nullptr) {
7919 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7920
7921 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7922 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7923 Register ResultReg = Root.getOperand(0).getReg();
7924 Register SrcReg0 = MUL->getOperand(1).getReg();
7925 bool Src0IsKill = MUL->getOperand(1).isKill();
7926 Register SrcReg1 = MUL->getOperand(2).getReg();
7927 bool Src1IsKill = MUL->getOperand(2).isKill();
7928
7929 Register SrcReg2;
7930 bool Src2IsKill;
7931 if (ReplacedAddend) {
7932 // If we just generated a new addend, we must be it's only use.
7933 SrcReg2 = *ReplacedAddend;
7934 Src2IsKill = true;
7935 } else {
7936 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7937 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7938 }
7939
7940 if (ResultReg.isVirtual())
7941 MRI.constrainRegClass(ResultReg, RC);
7942 if (SrcReg0.isVirtual())
7943 MRI.constrainRegClass(SrcReg0, RC);
7944 if (SrcReg1.isVirtual())
7945 MRI.constrainRegClass(SrcReg1, RC);
7946 if (SrcReg2.isVirtual())
7947 MRI.constrainRegClass(SrcReg2, RC);
7948
7950 if (kind == FMAInstKind::Default)
7951 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7952 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7953 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7954 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7955 else if (kind == FMAInstKind::Indexed)
7956 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7957 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7958 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7959 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7960 .addImm(MUL->getOperand(3).getImm());
7961 else if (kind == FMAInstKind::Accumulator)
7962 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7963 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7964 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7965 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7966 else
7967 assert(false && "Invalid FMA instruction kind \n");
7968 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7969 InsInstrs.push_back(MIB);
7970 return MUL;
7971}
7972
7973static MachineInstr *
7975 const TargetInstrInfo *TII, MachineInstr &Root,
7977 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7978
7979 unsigned Opc = 0;
7980 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7981 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7982 Opc = AArch64::FNMADDSrrr;
7983 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7984 Opc = AArch64::FNMADDDrrr;
7985 else
7986 return nullptr;
7987
7988 Register ResultReg = Root.getOperand(0).getReg();
7989 Register SrcReg0 = MAD->getOperand(1).getReg();
7990 Register SrcReg1 = MAD->getOperand(2).getReg();
7991 Register SrcReg2 = MAD->getOperand(3).getReg();
7992 bool Src0IsKill = MAD->getOperand(1).isKill();
7993 bool Src1IsKill = MAD->getOperand(2).isKill();
7994 bool Src2IsKill = MAD->getOperand(3).isKill();
7995 if (ResultReg.isVirtual())
7996 MRI.constrainRegClass(ResultReg, RC);
7997 if (SrcReg0.isVirtual())
7998 MRI.constrainRegClass(SrcReg0, RC);
7999 if (SrcReg1.isVirtual())
8000 MRI.constrainRegClass(SrcReg1, RC);
8001 if (SrcReg2.isVirtual())
8002 MRI.constrainRegClass(SrcReg2, RC);
8003
8005 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8006 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8007 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8008 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8009 InsInstrs.push_back(MIB);
8010
8011 return MAD;
8012}
8013
8014/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8015static MachineInstr *
8018 unsigned IdxDupOp, unsigned MulOpc,
8020 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8021 "Invalid index of FMUL operand");
8022
8023 MachineFunction &MF = *Root.getMF();
8025
8026 MachineInstr *Dup =
8027 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8028
8029 if (Dup->getOpcode() == TargetOpcode::COPY)
8030 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8031
8032 Register DupSrcReg = Dup->getOperand(1).getReg();
8033 MRI.clearKillFlags(DupSrcReg);
8034 MRI.constrainRegClass(DupSrcReg, RC);
8035
8036 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8037
8038 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8039 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8040
8041 Register ResultReg = Root.getOperand(0).getReg();
8042
8044 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8045 .add(MulOp)
8046 .addReg(DupSrcReg)
8047 .addImm(DupSrcLane);
8048
8049 InsInstrs.push_back(MIB);
8050 return &Root;
8051}
8052
8053/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8054/// instructions.
8055///
8056/// \see genFusedMultiply
8060 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8061 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8063}
8064
8065/// genNeg - Helper to generate an intermediate negation of the second operand
8066/// of Root
8068 const TargetInstrInfo *TII, MachineInstr &Root,
8070 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8071 unsigned MnegOpc, const TargetRegisterClass *RC) {
8072 Register NewVR = MRI.createVirtualRegister(RC);
8074 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8075 .add(Root.getOperand(2));
8076 InsInstrs.push_back(MIB);
8077
8078 assert(InstrIdxForVirtReg.empty());
8079 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8080
8081 return NewVR;
8082}
8083
8084/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8085/// instructions with an additional negation of the accumulator
8089 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8090 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8091 assert(IdxMulOpd == 1);
8092
8093 Register NewVR =
8094 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8095 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8096 FMAInstKind::Accumulator, &NewVR);
8097}
8098
8099/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8100/// instructions.
8101///
8102/// \see genFusedMultiply
8106 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8107 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8109}
8110
8111/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8112/// instructions with an additional negation of the accumulator
8116 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8117 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8118 assert(IdxMulOpd == 1);
8119
8120 Register NewVR =
8121 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8122
8123 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8124 FMAInstKind::Indexed, &NewVR);
8125}
8126
8127/// genMaddR - Generate madd instruction and combine mul and add using
8128/// an extra virtual register
8129/// Example - an ADD intermediate needs to be stored in a register:
8130/// MUL I=A,B,0
8131/// ADD R,I,Imm
8132/// ==> ORR V, ZR, Imm
8133/// ==> MADD R,A,B,V
8134/// \param MF Containing MachineFunction
8135/// \param MRI Register information
8136/// \param TII Target information
8137/// \param Root is the ADD instruction
8138/// \param [out] InsInstrs is a vector of machine instructions and will
8139/// contain the generated madd instruction
8140/// \param IdxMulOpd is index of operand in Root that is the result of
8141/// the MUL. In the example above IdxMulOpd is 1.
8142/// \param MaddOpc the opcode fo the madd instruction
8143/// \param VR is a virtual register that holds the value of an ADD operand
8144/// (V in the example above).
8145/// \param RC Register class of operands
8147 const TargetInstrInfo *TII, MachineInstr &Root,
8149 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8150 const TargetRegisterClass *RC) {
8151 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8152
8153 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8154 Register ResultReg = Root.getOperand(0).getReg();
8155 Register SrcReg0 = MUL->getOperand(1).getReg();
8156 bool Src0IsKill = MUL->getOperand(1).isKill();
8157 Register SrcReg1 = MUL->getOperand(2).getReg();
8158 bool Src1IsKill = MUL->getOperand(2).isKill();
8159
8160 if (ResultReg.isVirtual())
8161 MRI.constrainRegClass(ResultReg, RC);
8162 if (SrcReg0.isVirtual())
8163 MRI.constrainRegClass(SrcReg0, RC);
8164 if (SrcReg1.isVirtual())
8165 MRI.constrainRegClass(SrcReg1, RC);
8167 MRI.constrainRegClass(VR, RC);
8168
8170 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8171 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8172 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8173 .addReg(VR);
8174 // Insert the MADD
8175 InsInstrs.push_back(MIB);
8176 return MUL;
8177}
8178
8179/// Do the following transformation
8180/// A - (B + C) ==> (A - B) - C
8181/// A - (B + C) ==> (A - C) - B
8183 const TargetInstrInfo *TII, MachineInstr &Root,
8186 unsigned IdxOpd1,
8187 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8188 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8189 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8190 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8191
8192 Register ResultReg = Root.getOperand(0).getReg();
8193 Register RegA = Root.getOperand(1).getReg();
8194 bool RegAIsKill = Root.getOperand(1).isKill();
8195 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8196 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8197 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8198 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8199 Register NewVR =
8200 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8201
8202 unsigned Opcode = Root.getOpcode();
8203 if (Opcode == AArch64::SUBSWrr)
8204 Opcode = AArch64::SUBWrr;
8205 else if (Opcode == AArch64::SUBSXrr)
8206 Opcode = AArch64::SUBXrr;
8207 else
8208 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8209 "Unexpected instruction opcode.");
8210
8211 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8212 Flags &= ~MachineInstr::NoSWrap;
8213 Flags &= ~MachineInstr::NoUWrap;
8214
8215 MachineInstrBuilder MIB1 =
8216 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8217 .addReg(RegA, getKillRegState(RegAIsKill))
8218 .addReg(RegB, getKillRegState(RegBIsKill))
8219 .setMIFlags(Flags);
8220 MachineInstrBuilder MIB2 =
8221 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8222 .addReg(NewVR, getKillRegState(true))
8223 .addReg(RegC, getKillRegState(RegCIsKill))
8224 .setMIFlags(Flags);
8225
8226 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8227 InsInstrs.push_back(MIB1);
8228 InsInstrs.push_back(MIB2);
8229 DelInstrs.push_back(AddMI);
8230 DelInstrs.push_back(&Root);
8231}
8232
8233unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8234 unsigned int AccumulatorOpCode) const {
8235 switch (AccumulatorOpCode) {
8236 case AArch64::UABALB_ZZZ_D:
8237 case AArch64::SABALB_ZZZ_D:
8238 case AArch64::UABALT_ZZZ_D:
8239 case AArch64::SABALT_ZZZ_D:
8240 return AArch64::ADD_ZZZ_D;
8241 case AArch64::UABALB_ZZZ_H:
8242 case AArch64::SABALB_ZZZ_H:
8243 case AArch64::UABALT_ZZZ_H:
8244 case AArch64::SABALT_ZZZ_H:
8245 return AArch64::ADD_ZZZ_H;
8246 case AArch64::UABALB_ZZZ_S:
8247 case AArch64::SABALB_ZZZ_S:
8248 case AArch64::UABALT_ZZZ_S:
8249 case AArch64::SABALT_ZZZ_S:
8250 return AArch64::ADD_ZZZ_S;
8251 case AArch64::UABALv16i8_v8i16:
8252 case AArch64::SABALv8i8_v8i16:
8253 case AArch64::SABAv8i16:
8254 case AArch64::UABAv8i16:
8255 return AArch64::ADDv8i16;
8256 case AArch64::SABALv2i32_v2i64:
8257 case AArch64::UABALv2i32_v2i64:
8258 case AArch64::SABALv4i32_v2i64:
8259 return AArch64::ADDv2i64;
8260 case AArch64::UABALv4i16_v4i32:
8261 case AArch64::SABALv4i16_v4i32:
8262 case AArch64::SABALv8i16_v4i32:
8263 case AArch64::SABAv4i32:
8264 case AArch64::UABAv4i32:
8265 return AArch64::ADDv4i32;
8266 case AArch64::UABALv4i32_v2i64:
8267 return AArch64::ADDv2i64;
8268 case AArch64::UABALv8i16_v4i32:
8269 return AArch64::ADDv4i32;
8270 case AArch64::UABALv8i8_v8i16:
8271 case AArch64::SABALv16i8_v8i16:
8272 return AArch64::ADDv8i16;
8273 case AArch64::UABAv16i8:
8274 case AArch64::SABAv16i8:
8275 return AArch64::ADDv16i8;
8276 case AArch64::UABAv4i16:
8277 case AArch64::SABAv4i16:
8278 return AArch64::ADDv4i16;
8279 case AArch64::UABAv2i32:
8280 case AArch64::SABAv2i32:
8281 return AArch64::ADDv2i32;
8282 case AArch64::UABAv8i8:
8283 case AArch64::SABAv8i8:
8284 return AArch64::ADDv8i8;
8285 default:
8286 llvm_unreachable("Unknown accumulator opcode");
8287 }
8288}
8289
8290/// When getMachineCombinerPatterns() finds potential patterns,
8291/// this function generates the instructions that could replace the
8292/// original code sequence
8293void AArch64InstrInfo::genAlternativeCodeSequence(
8294 MachineInstr &Root, unsigned Pattern,
8297 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8298 MachineBasicBlock &MBB = *Root.getParent();
8299 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8300 MachineFunction &MF = *MBB.getParent();
8301 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8302
8303 MachineInstr *MUL = nullptr;
8304 const TargetRegisterClass *RC;
8305 unsigned Opc;
8306 switch (Pattern) {
8307 default:
8308 // Reassociate instructions.
8309 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8310 DelInstrs, InstrIdxForVirtReg);
8311 return;
8313 // A - (B + C)
8314 // ==> (A - B) - C
8315 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8316 InstrIdxForVirtReg);
8317 return;
8319 // A - (B + C)
8320 // ==> (A - C) - B
8321 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8322 InstrIdxForVirtReg);
8323 return;
8326 // MUL I=A,B,0
8327 // ADD R,I,C
8328 // ==> MADD R,A,B,C
8329 // --- Create(MADD);
8331 Opc = AArch64::MADDWrrr;
8332 RC = &AArch64::GPR32RegClass;
8333 } else {
8334 Opc = AArch64::MADDXrrr;
8335 RC = &AArch64::GPR64RegClass;
8336 }
8337 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8338 break;
8341 // MUL I=A,B,0
8342 // ADD R,C,I
8343 // ==> MADD R,A,B,C
8344 // --- Create(MADD);
8346 Opc = AArch64::MADDWrrr;
8347 RC = &AArch64::GPR32RegClass;
8348 } else {
8349 Opc = AArch64::MADDXrrr;
8350 RC = &AArch64::GPR64RegClass;
8351 }
8352 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8353 break;
8358 // MUL I=A,B,0
8359 // ADD/SUB R,I,Imm
8360 // ==> MOV V, Imm/-Imm
8361 // ==> MADD R,A,B,V
8362 // --- Create(MADD);
8363 const TargetRegisterClass *RC;
8364 unsigned BitSize, MovImm;
8367 MovImm = AArch64::MOVi32imm;
8368 RC = &AArch64::GPR32spRegClass;
8369 BitSize = 32;
8370 Opc = AArch64::MADDWrrr;
8371 RC = &AArch64::GPR32RegClass;
8372 } else {
8373 MovImm = AArch64::MOVi64imm;
8374 RC = &AArch64::GPR64spRegClass;
8375 BitSize = 64;
8376 Opc = AArch64::MADDXrrr;
8377 RC = &AArch64::GPR64RegClass;
8378 }
8379 Register NewVR = MRI.createVirtualRegister(RC);
8380 uint64_t Imm = Root.getOperand(2).getImm();
8381
8382 if (Root.getOperand(3).isImm()) {
8383 unsigned Val = Root.getOperand(3).getImm();
8384 Imm = Imm << Val;
8385 }
8386 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8388 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8389 // Check that the immediate can be composed via a single instruction.
8391 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8392 if (Insn.size() != 1)
8393 return;
8394 MachineInstrBuilder MIB1 =
8395 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8396 .addImm(IsSub ? -Imm : Imm);
8397 InsInstrs.push_back(MIB1);
8398 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8399 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8400 break;
8401 }
8404 // MUL I=A,B,0
8405 // SUB R,I, C
8406 // ==> SUB V, 0, C
8407 // ==> MADD R,A,B,V // = -C + A*B
8408 // --- Create(MADD);
8409 const TargetRegisterClass *SubRC;
8410 unsigned SubOpc, ZeroReg;
8412 SubOpc = AArch64::SUBWrr;
8413 SubRC = &AArch64::GPR32spRegClass;
8414 ZeroReg = AArch64::WZR;
8415 Opc = AArch64::MADDWrrr;
8416 RC = &AArch64::GPR32RegClass;
8417 } else {
8418 SubOpc = AArch64::SUBXrr;
8419 SubRC = &AArch64::GPR64spRegClass;
8420 ZeroReg = AArch64::XZR;
8421 Opc = AArch64::MADDXrrr;
8422 RC = &AArch64::GPR64RegClass;
8423 }
8424 Register NewVR = MRI.createVirtualRegister(SubRC);
8425 // SUB NewVR, 0, C
8426 MachineInstrBuilder MIB1 =
8427 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8428 .addReg(ZeroReg)
8429 .add(Root.getOperand(2));
8430 InsInstrs.push_back(MIB1);
8431 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8432 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8433 break;
8434 }
8437 // MUL I=A,B,0
8438 // SUB R,C,I
8439 // ==> MSUB R,A,B,C (computes C - A*B)
8440 // --- Create(MSUB);
8442 Opc = AArch64::MSUBWrrr;
8443 RC = &AArch64::GPR32RegClass;
8444 } else {
8445 Opc = AArch64::MSUBXrrr;
8446 RC = &AArch64::GPR64RegClass;
8447 }
8448 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8449 break;
8451 Opc = AArch64::MLAv8i8;
8452 RC = &AArch64::FPR64RegClass;
8453 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8454 break;
8456 Opc = AArch64::MLAv8i8;
8457 RC = &AArch64::FPR64RegClass;
8458 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8459 break;
8461 Opc = AArch64::MLAv16i8;
8462 RC = &AArch64::FPR128RegClass;
8463 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8464 break;
8466 Opc = AArch64::MLAv16i8;
8467 RC = &AArch64::FPR128RegClass;
8468 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8469 break;
8471 Opc = AArch64::MLAv4i16;
8472 RC = &AArch64::FPR64RegClass;
8473 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8474 break;
8476 Opc = AArch64::MLAv4i16;
8477 RC = &AArch64::FPR64RegClass;
8478 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8479 break;
8481 Opc = AArch64::MLAv8i16;
8482 RC = &AArch64::FPR128RegClass;
8483 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8484 break;
8486 Opc = AArch64::MLAv8i16;
8487 RC = &AArch64::FPR128RegClass;
8488 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8489 break;
8491 Opc = AArch64::MLAv2i32;
8492 RC = &AArch64::FPR64RegClass;
8493 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8494 break;
8496 Opc = AArch64::MLAv2i32;
8497 RC = &AArch64::FPR64RegClass;
8498 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8499 break;
8501 Opc = AArch64::MLAv4i32;
8502 RC = &AArch64::FPR128RegClass;
8503 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8504 break;
8506 Opc = AArch64::MLAv4i32;
8507 RC = &AArch64::FPR128RegClass;
8508 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8509 break;
8510
8512 Opc = AArch64::MLAv8i8;
8513 RC = &AArch64::FPR64RegClass;
8514 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8515 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8516 RC);
8517 break;
8519 Opc = AArch64::MLSv8i8;
8520 RC = &AArch64::FPR64RegClass;
8521 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8522 break;
8524 Opc = AArch64::MLAv16i8;
8525 RC = &AArch64::FPR128RegClass;
8526 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8527 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8528 RC);
8529 break;
8531 Opc = AArch64::MLSv16i8;
8532 RC = &AArch64::FPR128RegClass;
8533 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8534 break;
8536 Opc = AArch64::MLAv4i16;
8537 RC = &AArch64::FPR64RegClass;
8538 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8539 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8540 RC);
8541 break;
8543 Opc = AArch64::MLSv4i16;
8544 RC = &AArch64::FPR64RegClass;
8545 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8546 break;
8548 Opc = AArch64::MLAv8i16;
8549 RC = &AArch64::FPR128RegClass;
8550 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8551 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8552 RC);
8553 break;
8555 Opc = AArch64::MLSv8i16;
8556 RC = &AArch64::FPR128RegClass;
8557 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8558 break;
8560 Opc = AArch64::MLAv2i32;
8561 RC = &AArch64::FPR64RegClass;
8562 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8563 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8564 RC);
8565 break;
8567 Opc = AArch64::MLSv2i32;
8568 RC = &AArch64::FPR64RegClass;
8569 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8570 break;
8572 Opc = AArch64::MLAv4i32;
8573 RC = &AArch64::FPR128RegClass;
8574 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8575 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8576 RC);
8577 break;
8579 Opc = AArch64::MLSv4i32;
8580 RC = &AArch64::FPR128RegClass;
8581 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8582 break;
8583
8585 Opc = AArch64::MLAv4i16_indexed;
8586 RC = &AArch64::FPR64RegClass;
8587 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8588 break;
8590 Opc = AArch64::MLAv4i16_indexed;
8591 RC = &AArch64::FPR64RegClass;
8592 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8593 break;
8595 Opc = AArch64::MLAv8i16_indexed;
8596 RC = &AArch64::FPR128RegClass;
8597 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8598 break;
8600 Opc = AArch64::MLAv8i16_indexed;
8601 RC = &AArch64::FPR128RegClass;
8602 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8603 break;
8605 Opc = AArch64::MLAv2i32_indexed;
8606 RC = &AArch64::FPR64RegClass;
8607 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8608 break;
8610 Opc = AArch64::MLAv2i32_indexed;
8611 RC = &AArch64::FPR64RegClass;
8612 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8613 break;
8615 Opc = AArch64::MLAv4i32_indexed;
8616 RC = &AArch64::FPR128RegClass;
8617 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8618 break;
8620 Opc = AArch64::MLAv4i32_indexed;
8621 RC = &AArch64::FPR128RegClass;
8622 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8623 break;
8624
8626 Opc = AArch64::MLAv4i16_indexed;
8627 RC = &AArch64::FPR64RegClass;
8628 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8629 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8630 RC);
8631 break;
8633 Opc = AArch64::MLSv4i16_indexed;
8634 RC = &AArch64::FPR64RegClass;
8635 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8636 break;
8638 Opc = AArch64::MLAv8i16_indexed;
8639 RC = &AArch64::FPR128RegClass;
8640 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8641 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8642 RC);
8643 break;
8645 Opc = AArch64::MLSv8i16_indexed;
8646 RC = &AArch64::FPR128RegClass;
8647 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8648 break;
8650 Opc = AArch64::MLAv2i32_indexed;
8651 RC = &AArch64::FPR64RegClass;
8652 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8653 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8654 RC);
8655 break;
8657 Opc = AArch64::MLSv2i32_indexed;
8658 RC = &AArch64::FPR64RegClass;
8659 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8660 break;
8662 Opc = AArch64::MLAv4i32_indexed;
8663 RC = &AArch64::FPR128RegClass;
8664 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8665 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8666 RC);
8667 break;
8669 Opc = AArch64::MLSv4i32_indexed;
8670 RC = &AArch64::FPR128RegClass;
8671 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8672 break;
8673
8674 // Floating Point Support
8676 Opc = AArch64::FMADDHrrr;
8677 RC = &AArch64::FPR16RegClass;
8678 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8679 break;
8681 Opc = AArch64::FMADDSrrr;
8682 RC = &AArch64::FPR32RegClass;
8683 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8684 break;
8686 Opc = AArch64::FMADDDrrr;
8687 RC = &AArch64::FPR64RegClass;
8688 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8689 break;
8690
8692 Opc = AArch64::FMADDHrrr;
8693 RC = &AArch64::FPR16RegClass;
8694 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8695 break;
8697 Opc = AArch64::FMADDSrrr;
8698 RC = &AArch64::FPR32RegClass;
8699 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8700 break;
8702 Opc = AArch64::FMADDDrrr;
8703 RC = &AArch64::FPR64RegClass;
8704 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8705 break;
8706
8708 Opc = AArch64::FMLAv1i32_indexed;
8709 RC = &AArch64::FPR32RegClass;
8710 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8712 break;
8714 Opc = AArch64::FMLAv1i32_indexed;
8715 RC = &AArch64::FPR32RegClass;
8716 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8718 break;
8719
8721 Opc = AArch64::FMLAv1i64_indexed;
8722 RC = &AArch64::FPR64RegClass;
8723 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8725 break;
8727 Opc = AArch64::FMLAv1i64_indexed;
8728 RC = &AArch64::FPR64RegClass;
8729 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8731 break;
8732
8734 RC = &AArch64::FPR64RegClass;
8735 Opc = AArch64::FMLAv4i16_indexed;
8736 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8738 break;
8740 RC = &AArch64::FPR64RegClass;
8741 Opc = AArch64::FMLAv4f16;
8742 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8744 break;
8746 RC = &AArch64::FPR64RegClass;
8747 Opc = AArch64::FMLAv4i16_indexed;
8748 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8750 break;
8752 RC = &AArch64::FPR64RegClass;
8753 Opc = AArch64::FMLAv4f16;
8754 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8756 break;
8757
8760 RC = &AArch64::FPR64RegClass;
8762 Opc = AArch64::FMLAv2i32_indexed;
8763 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8765 } else {
8766 Opc = AArch64::FMLAv2f32;
8767 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8769 }
8770 break;
8773 RC = &AArch64::FPR64RegClass;
8775 Opc = AArch64::FMLAv2i32_indexed;
8776 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8778 } else {
8779 Opc = AArch64::FMLAv2f32;
8780 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8782 }
8783 break;
8784
8786 RC = &AArch64::FPR128RegClass;
8787 Opc = AArch64::FMLAv8i16_indexed;
8788 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8790 break;
8792 RC = &AArch64::FPR128RegClass;
8793 Opc = AArch64::FMLAv8f16;
8794 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8796 break;
8798 RC = &AArch64::FPR128RegClass;
8799 Opc = AArch64::FMLAv8i16_indexed;
8800 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8802 break;
8804 RC = &AArch64::FPR128RegClass;
8805 Opc = AArch64::FMLAv8f16;
8806 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8808 break;
8809
8812 RC = &AArch64::FPR128RegClass;
8814 Opc = AArch64::FMLAv2i64_indexed;
8815 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8817 } else {
8818 Opc = AArch64::FMLAv2f64;
8819 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8821 }
8822 break;
8825 RC = &AArch64::FPR128RegClass;
8827 Opc = AArch64::FMLAv2i64_indexed;
8828 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8830 } else {
8831 Opc = AArch64::FMLAv2f64;
8832 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8834 }
8835 break;
8836
8839 RC = &AArch64::FPR128RegClass;
8841 Opc = AArch64::FMLAv4i32_indexed;
8842 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8844 } else {
8845 Opc = AArch64::FMLAv4f32;
8846 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8848 }
8849 break;
8850
8853 RC = &AArch64::FPR128RegClass;
8855 Opc = AArch64::FMLAv4i32_indexed;
8856 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8858 } else {
8859 Opc = AArch64::FMLAv4f32;
8860 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8862 }
8863 break;
8864
8866 Opc = AArch64::FNMSUBHrrr;
8867 RC = &AArch64::FPR16RegClass;
8868 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8869 break;
8871 Opc = AArch64::FNMSUBSrrr;
8872 RC = &AArch64::FPR32RegClass;
8873 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8874 break;
8876 Opc = AArch64::FNMSUBDrrr;
8877 RC = &AArch64::FPR64RegClass;
8878 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8879 break;
8880
8882 Opc = AArch64::FNMADDHrrr;
8883 RC = &AArch64::FPR16RegClass;
8884 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8885 break;
8887 Opc = AArch64::FNMADDSrrr;
8888 RC = &AArch64::FPR32RegClass;
8889 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8890 break;
8892 Opc = AArch64::FNMADDDrrr;
8893 RC = &AArch64::FPR64RegClass;
8894 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8895 break;
8896
8898 Opc = AArch64::FMSUBHrrr;
8899 RC = &AArch64::FPR16RegClass;
8900 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8901 break;
8903 Opc = AArch64::FMSUBSrrr;
8904 RC = &AArch64::FPR32RegClass;
8905 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8906 break;
8908 Opc = AArch64::FMSUBDrrr;
8909 RC = &AArch64::FPR64RegClass;
8910 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8911 break;
8912
8914 Opc = AArch64::FMLSv1i32_indexed;
8915 RC = &AArch64::FPR32RegClass;
8916 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8918 break;
8919
8921 Opc = AArch64::FMLSv1i64_indexed;
8922 RC = &AArch64::FPR64RegClass;
8923 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8925 break;
8926
8929 RC = &AArch64::FPR64RegClass;
8930 Register NewVR = MRI.createVirtualRegister(RC);
8931 MachineInstrBuilder MIB1 =
8932 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8933 .add(Root.getOperand(2));
8934 InsInstrs.push_back(MIB1);
8935 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8937 Opc = AArch64::FMLAv4f16;
8938 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8939 FMAInstKind::Accumulator, &NewVR);
8940 } else {
8941 Opc = AArch64::FMLAv4i16_indexed;
8942 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8943 FMAInstKind::Indexed, &NewVR);
8944 }
8945 break;
8946 }
8948 RC = &AArch64::FPR64RegClass;
8949 Opc = AArch64::FMLSv4f16;
8950 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8952 break;
8954 RC = &AArch64::FPR64RegClass;
8955 Opc = AArch64::FMLSv4i16_indexed;
8956 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8958 break;
8959
8962 RC = &AArch64::FPR64RegClass;
8964 Opc = AArch64::FMLSv2i32_indexed;
8965 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8967 } else {
8968 Opc = AArch64::FMLSv2f32;
8969 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8971 }
8972 break;
8973
8976 RC = &AArch64::FPR128RegClass;
8977 Register NewVR = MRI.createVirtualRegister(RC);
8978 MachineInstrBuilder MIB1 =
8979 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8980 .add(Root.getOperand(2));
8981 InsInstrs.push_back(MIB1);
8982 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8984 Opc = AArch64::FMLAv8f16;
8985 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8986 FMAInstKind::Accumulator, &NewVR);
8987 } else {
8988 Opc = AArch64::FMLAv8i16_indexed;
8989 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8990 FMAInstKind::Indexed, &NewVR);
8991 }
8992 break;
8993 }
8995 RC = &AArch64::FPR128RegClass;
8996 Opc = AArch64::FMLSv8f16;
8997 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8999 break;
9001 RC = &AArch64::FPR128RegClass;
9002 Opc = AArch64::FMLSv8i16_indexed;
9003 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9005 break;
9006
9009 RC = &AArch64::FPR128RegClass;
9011 Opc = AArch64::FMLSv2i64_indexed;
9012 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9014 } else {
9015 Opc = AArch64::FMLSv2f64;
9016 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9018 }
9019 break;
9020
9023 RC = &AArch64::FPR128RegClass;
9025 Opc = AArch64::FMLSv4i32_indexed;
9026 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9028 } else {
9029 Opc = AArch64::FMLSv4f32;
9030 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9032 }
9033 break;
9036 RC = &AArch64::FPR64RegClass;
9037 Register NewVR = MRI.createVirtualRegister(RC);
9038 MachineInstrBuilder MIB1 =
9039 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9040 .add(Root.getOperand(2));
9041 InsInstrs.push_back(MIB1);
9042 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9044 Opc = AArch64::FMLAv2i32_indexed;
9045 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9046 FMAInstKind::Indexed, &NewVR);
9047 } else {
9048 Opc = AArch64::FMLAv2f32;
9049 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9050 FMAInstKind::Accumulator, &NewVR);
9051 }
9052 break;
9053 }
9056 RC = &AArch64::FPR128RegClass;
9057 Register NewVR = MRI.createVirtualRegister(RC);
9058 MachineInstrBuilder MIB1 =
9059 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9060 .add(Root.getOperand(2));
9061 InsInstrs.push_back(MIB1);
9062 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9064 Opc = AArch64::FMLAv4i32_indexed;
9065 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9066 FMAInstKind::Indexed, &NewVR);
9067 } else {
9068 Opc = AArch64::FMLAv4f32;
9069 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9070 FMAInstKind::Accumulator, &NewVR);
9071 }
9072 break;
9073 }
9076 RC = &AArch64::FPR128RegClass;
9077 Register NewVR = MRI.createVirtualRegister(RC);
9078 MachineInstrBuilder MIB1 =
9079 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9080 .add(Root.getOperand(2));
9081 InsInstrs.push_back(MIB1);
9082 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9084 Opc = AArch64::FMLAv2i64_indexed;
9085 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9086 FMAInstKind::Indexed, &NewVR);
9087 } else {
9088 Opc = AArch64::FMLAv2f64;
9089 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9090 FMAInstKind::Accumulator, &NewVR);
9091 }
9092 break;
9093 }
9096 unsigned IdxDupOp =
9098 : 2;
9099 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9100 &AArch64::FPR128RegClass, MRI);
9101 break;
9102 }
9105 unsigned IdxDupOp =
9107 : 2;
9108 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9109 &AArch64::FPR128RegClass, MRI);
9110 break;
9111 }
9114 unsigned IdxDupOp =
9116 : 2;
9117 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9118 &AArch64::FPR128_loRegClass, MRI);
9119 break;
9120 }
9123 unsigned IdxDupOp =
9125 : 2;
9126 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9127 &AArch64::FPR128RegClass, MRI);
9128 break;
9129 }
9132 unsigned IdxDupOp =
9134 : 2;
9135 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9136 &AArch64::FPR128_loRegClass, MRI);
9137 break;
9138 }
9140 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9141 break;
9142 }
9144 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9145 Pattern, 4);
9146 break;
9147 }
9149 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9150 Pattern, 8);
9151 break;
9152 }
9154 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9155 Pattern, 16);
9156 break;
9157 }
9158
9159 } // end switch (Pattern)
9160 // Record MUL and ADD/SUB for deletion
9161 if (MUL)
9162 DelInstrs.push_back(MUL);
9163 DelInstrs.push_back(&Root);
9164
9165 // Set the flags on the inserted instructions to be the merged flags of the
9166 // instructions that we have combined.
9167 uint32_t Flags = Root.getFlags();
9168 if (MUL)
9169 Flags = Root.mergeFlagsWith(*MUL);
9170 for (auto *MI : InsInstrs)
9171 MI->setFlags(Flags);
9172}
9173
9174/// Replace csincr-branch sequence by simple conditional branch
9175///
9176/// Examples:
9177/// 1. \code
9178/// csinc w9, wzr, wzr, <condition code>
9179/// tbnz w9, #0, 0x44
9180/// \endcode
9181/// to
9182/// \code
9183/// b.<inverted condition code>
9184/// \endcode
9185///
9186/// 2. \code
9187/// csinc w9, wzr, wzr, <condition code>
9188/// tbz w9, #0, 0x44
9189/// \endcode
9190/// to
9191/// \code
9192/// b.<condition code>
9193/// \endcode
9194///
9195/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9196/// compare's constant operand is power of 2.
9197///
9198/// Examples:
9199/// \code
9200/// and w8, w8, #0x400
9201/// cbnz w8, L1
9202/// \endcode
9203/// to
9204/// \code
9205/// tbnz w8, #10, L1
9206/// \endcode
9207///
9208/// \param MI Conditional Branch
9209/// \return True when the simple conditional branch is generated
9210///
9212 bool IsNegativeBranch = false;
9213 bool IsTestAndBranch = false;
9214 unsigned TargetBBInMI = 0;
9215 switch (MI.getOpcode()) {
9216 default:
9217 llvm_unreachable("Unknown branch instruction?");
9218 case AArch64::Bcc:
9219 case AArch64::CBWPri:
9220 case AArch64::CBXPri:
9221 case AArch64::CBWPrr:
9222 case AArch64::CBXPrr:
9223 return false;
9224 case AArch64::CBZW:
9225 case AArch64::CBZX:
9226 TargetBBInMI = 1;
9227 break;
9228 case AArch64::CBNZW:
9229 case AArch64::CBNZX:
9230 TargetBBInMI = 1;
9231 IsNegativeBranch = true;
9232 break;
9233 case AArch64::TBZW:
9234 case AArch64::TBZX:
9235 TargetBBInMI = 2;
9236 IsTestAndBranch = true;
9237 break;
9238 case AArch64::TBNZW:
9239 case AArch64::TBNZX:
9240 TargetBBInMI = 2;
9241 IsNegativeBranch = true;
9242 IsTestAndBranch = true;
9243 break;
9244 }
9245 // So we increment a zero register and test for bits other
9246 // than bit 0? Conservatively bail out in case the verifier
9247 // missed this case.
9248 if (IsTestAndBranch && MI.getOperand(1).getImm())
9249 return false;
9250
9251 // Find Definition.
9252 assert(MI.getParent() && "Incomplete machine instruction\n");
9253 MachineBasicBlock *MBB = MI.getParent();
9254 MachineFunction *MF = MBB->getParent();
9256 Register VReg = MI.getOperand(0).getReg();
9257 if (!VReg.isVirtual())
9258 return false;
9259
9260 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9261
9262 // Look through COPY instructions to find definition.
9263 while (DefMI->isCopy()) {
9264 Register CopyVReg = DefMI->getOperand(1).getReg();
9265 if (!MRI->hasOneNonDBGUse(CopyVReg))
9266 return false;
9267 if (!MRI->hasOneDef(CopyVReg))
9268 return false;
9269 DefMI = MRI->getVRegDef(CopyVReg);
9270 }
9271
9272 switch (DefMI->getOpcode()) {
9273 default:
9274 return false;
9275 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9276 case AArch64::ANDWri:
9277 case AArch64::ANDXri: {
9278 if (IsTestAndBranch)
9279 return false;
9280 if (DefMI->getParent() != MBB)
9281 return false;
9282 if (!MRI->hasOneNonDBGUse(VReg))
9283 return false;
9284
9285 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9287 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9288 if (!isPowerOf2_64(Mask))
9289 return false;
9290
9291 MachineOperand &MO = DefMI->getOperand(1);
9292 Register NewReg = MO.getReg();
9293 if (!NewReg.isVirtual())
9294 return false;
9295
9296 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9297
9298 MachineBasicBlock &RefToMBB = *MBB;
9299 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9300 DebugLoc DL = MI.getDebugLoc();
9301 unsigned Imm = Log2_64(Mask);
9302 unsigned Opc = (Imm < 32)
9303 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9304 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9305 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9306 .addReg(NewReg)
9307 .addImm(Imm)
9308 .addMBB(TBB);
9309 // Register lives on to the CBZ now.
9310 MO.setIsKill(false);
9311
9312 // For immediate smaller than 32, we need to use the 32-bit
9313 // variant (W) in all cases. Indeed the 64-bit variant does not
9314 // allow to encode them.
9315 // Therefore, if the input register is 64-bit, we need to take the
9316 // 32-bit sub-part.
9317 if (!Is32Bit && Imm < 32)
9318 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9319 MI.eraseFromParent();
9320 return true;
9321 }
9322 // Look for CSINC
9323 case AArch64::CSINCWr:
9324 case AArch64::CSINCXr: {
9325 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9326 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9327 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9328 DefMI->getOperand(2).getReg() == AArch64::XZR))
9329 return false;
9330
9331 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9332 true) != -1)
9333 return false;
9334
9335 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9336 // Convert only when the condition code is not modified between
9337 // the CSINC and the branch. The CC may be used by other
9338 // instructions in between.
9340 return false;
9341 MachineBasicBlock &RefToMBB = *MBB;
9342 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9343 DebugLoc DL = MI.getDebugLoc();
9344 if (IsNegativeBranch)
9346 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9347 MI.eraseFromParent();
9348 return true;
9349 }
9350 }
9351}
9352
9353std::pair<unsigned, unsigned>
9354AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9355 const unsigned Mask = AArch64II::MO_FRAGMENT;
9356 return std::make_pair(TF & Mask, TF & ~Mask);
9357}
9358
9360AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9361 using namespace AArch64II;
9362
9363 static const std::pair<unsigned, const char *> TargetFlags[] = {
9364 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9365 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9366 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9367 {MO_HI12, "aarch64-hi12"}};
9368 return ArrayRef(TargetFlags);
9369}
9370
9372AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9373 using namespace AArch64II;
9374
9375 static const std::pair<unsigned, const char *> TargetFlags[] = {
9376 {MO_COFFSTUB, "aarch64-coffstub"},
9377 {MO_GOT, "aarch64-got"},
9378 {MO_NC, "aarch64-nc"},
9379 {MO_S, "aarch64-s"},
9380 {MO_TLS, "aarch64-tls"},
9381 {MO_DLLIMPORT, "aarch64-dllimport"},
9382 {MO_PREL, "aarch64-prel"},
9383 {MO_TAGGED, "aarch64-tagged"},
9384 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9385 };
9386 return ArrayRef(TargetFlags);
9387}
9388
9390AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9391 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9392 {{MOSuppressPair, "aarch64-suppress-pair"},
9393 {MOStridedAccess, "aarch64-strided-access"}};
9394 return ArrayRef(TargetFlags);
9395}
9396
9397/// Constants defining how certain sequences should be outlined.
9398/// This encompasses how an outlined function should be called, and what kind of
9399/// frame should be emitted for that outlined function.
9400///
9401/// \p MachineOutlinerDefault implies that the function should be called with
9402/// a save and restore of LR to the stack.
9403///
9404/// That is,
9405///
9406/// I1 Save LR OUTLINED_FUNCTION:
9407/// I2 --> BL OUTLINED_FUNCTION I1
9408/// I3 Restore LR I2
9409/// I3
9410/// RET
9411///
9412/// * Call construction overhead: 3 (save + BL + restore)
9413/// * Frame construction overhead: 1 (ret)
9414/// * Requires stack fixups? Yes
9415///
9416/// \p MachineOutlinerTailCall implies that the function is being created from
9417/// a sequence of instructions ending in a return.
9418///
9419/// That is,
9420///
9421/// I1 OUTLINED_FUNCTION:
9422/// I2 --> B OUTLINED_FUNCTION I1
9423/// RET I2
9424/// RET
9425///
9426/// * Call construction overhead: 1 (B)
9427/// * Frame construction overhead: 0 (Return included in sequence)
9428/// * Requires stack fixups? No
9429///
9430/// \p MachineOutlinerNoLRSave implies that the function should be called using
9431/// a BL instruction, but doesn't require LR to be saved and restored. This
9432/// happens when LR is known to be dead.
9433///
9434/// That is,
9435///
9436/// I1 OUTLINED_FUNCTION:
9437/// I2 --> BL OUTLINED_FUNCTION I1
9438/// I3 I2
9439/// I3
9440/// RET
9441///
9442/// * Call construction overhead: 1 (BL)
9443/// * Frame construction overhead: 1 (RET)
9444/// * Requires stack fixups? No
9445///
9446/// \p MachineOutlinerThunk implies that the function is being created from
9447/// a sequence of instructions ending in a call. The outlined function is
9448/// called with a BL instruction, and the outlined function tail-calls the
9449/// original call destination.
9450///
9451/// That is,
9452///
9453/// I1 OUTLINED_FUNCTION:
9454/// I2 --> BL OUTLINED_FUNCTION I1
9455/// BL f I2
9456/// B f
9457/// * Call construction overhead: 1 (BL)
9458/// * Frame construction overhead: 0
9459/// * Requires stack fixups? No
9460///
9461/// \p MachineOutlinerRegSave implies that the function should be called with a
9462/// save and restore of LR to an available register. This allows us to avoid
9463/// stack fixups. Note that this outlining variant is compatible with the
9464/// NoLRSave case.
9465///
9466/// That is,
9467///
9468/// I1 Save LR OUTLINED_FUNCTION:
9469/// I2 --> BL OUTLINED_FUNCTION I1
9470/// I3 Restore LR I2
9471/// I3
9472/// RET
9473///
9474/// * Call construction overhead: 3 (save + BL + restore)
9475/// * Frame construction overhead: 1 (ret)
9476/// * Requires stack fixups? No
9478 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9479 MachineOutlinerTailCall, /// Only emit a branch.
9480 MachineOutlinerNoLRSave, /// Emit a call and return.
9481 MachineOutlinerThunk, /// Emit a call and tail-call.
9482 MachineOutlinerRegSave /// Same as default, but save to a register.
9483};
9484
9490
9492AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9493 MachineFunction *MF = C.getMF();
9494 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9495 const AArch64RegisterInfo *ARI =
9496 static_cast<const AArch64RegisterInfo *>(&TRI);
9497 // Check if there is an available register across the sequence that we can
9498 // use.
9499 for (unsigned Reg : AArch64::GPR64RegClass) {
9500 if (!ARI->isReservedReg(*MF, Reg) &&
9501 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9502 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9503 Reg != AArch64::X17 && // Ditto for X17.
9504 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9505 C.isAvailableInsideSeq(Reg, TRI))
9506 return Reg;
9507 }
9508 return Register();
9509}
9510
9511static bool
9513 const outliner::Candidate &b) {
9514 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9515 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9516
9517 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9518 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9519}
9520
9521static bool
9523 const outliner::Candidate &b) {
9524 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9525 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9526
9527 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9528}
9529
9531 const outliner::Candidate &b) {
9532 const AArch64Subtarget &SubtargetA =
9534 const AArch64Subtarget &SubtargetB =
9535 b.getMF()->getSubtarget<AArch64Subtarget>();
9536 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9537}
9538
9539std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9540AArch64InstrInfo::getOutliningCandidateInfo(
9541 const MachineModuleInfo &MMI,
9542 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9543 unsigned MinRepeats) const {
9544 unsigned SequenceSize = 0;
9545 for (auto &MI : RepeatedSequenceLocs[0])
9546 SequenceSize += getInstSizeInBytes(MI);
9547
9548 unsigned NumBytesToCreateFrame = 0;
9549
9550 // We only allow outlining for functions having exactly matching return
9551 // address signing attributes, i.e., all share the same value for the
9552 // attribute "sign-return-address" and all share the same type of key they
9553 // are signed with.
9554 // Additionally we require all functions to simultaneously either support
9555 // v8.3a features or not. Otherwise an outlined function could get signed
9556 // using dedicated v8.3 instructions and a call from a function that doesn't
9557 // support v8.3 instructions would therefore be invalid.
9558 if (std::adjacent_find(
9559 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9560 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9561 // Return true if a and b are non-equal w.r.t. return address
9562 // signing or support of v8.3a features
9563 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9564 outliningCandidatesSigningKeyConsensus(a, b) &&
9565 outliningCandidatesV8_3OpsConsensus(a, b)) {
9566 return false;
9567 }
9568 return true;
9569 }) != RepeatedSequenceLocs.end()) {
9570 return std::nullopt;
9571 }
9572
9573 // Since at this point all candidates agree on their return address signing
9574 // picking just one is fine. If the candidate functions potentially sign their
9575 // return addresses, the outlined function should do the same. Note that in
9576 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9577 // not certainly true that the outlined function will have to sign its return
9578 // address but this decision is made later, when the decision to outline
9579 // has already been made.
9580 // The same holds for the number of additional instructions we need: On
9581 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9582 // necessary. However, at this point we don't know if the outlined function
9583 // will have a RET instruction so we assume the worst.
9584 const TargetRegisterInfo &TRI = getRegisterInfo();
9585 // Performing a tail call may require extra checks when PAuth is enabled.
9586 // If PAuth is disabled, set it to zero for uniformity.
9587 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9588 if (RepeatedSequenceLocs[0]
9589 .getMF()
9590 ->getInfo<AArch64FunctionInfo>()
9591 ->shouldSignReturnAddress(true)) {
9592 // One PAC and one AUT instructions
9593 NumBytesToCreateFrame += 8;
9594
9595 // PAuth is enabled - set extra tail call cost, if any.
9596 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9597 *RepeatedSequenceLocs[0].getMF());
9598 NumBytesToCheckLRInTCEpilogue =
9600 // Checking the authenticated LR value may significantly impact
9601 // SequenceSize, so account for it for more precise results.
9602 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9603 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9604
9605 // We have to check if sp modifying instructions would get outlined.
9606 // If so we only allow outlining if sp is unchanged overall, so matching
9607 // sub and add instructions are okay to outline, all other sp modifications
9608 // are not
9609 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9610 int SPValue = 0;
9611 for (auto &MI : C) {
9612 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9613 switch (MI.getOpcode()) {
9614 case AArch64::ADDXri:
9615 case AArch64::ADDWri:
9616 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9617 assert(MI.getOperand(2).isImm() &&
9618 "Expected operand to be immediate");
9619 assert(MI.getOperand(1).isReg() &&
9620 "Expected operand to be a register");
9621 // Check if the add just increments sp. If so, we search for
9622 // matching sub instructions that decrement sp. If not, the
9623 // modification is illegal
9624 if (MI.getOperand(1).getReg() == AArch64::SP)
9625 SPValue += MI.getOperand(2).getImm();
9626 else
9627 return true;
9628 break;
9629 case AArch64::SUBXri:
9630 case AArch64::SUBWri:
9631 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9632 assert(MI.getOperand(2).isImm() &&
9633 "Expected operand to be immediate");
9634 assert(MI.getOperand(1).isReg() &&
9635 "Expected operand to be a register");
9636 // Check if the sub just decrements sp. If so, we search for
9637 // matching add instructions that increment sp. If not, the
9638 // modification is illegal
9639 if (MI.getOperand(1).getReg() == AArch64::SP)
9640 SPValue -= MI.getOperand(2).getImm();
9641 else
9642 return true;
9643 break;
9644 default:
9645 return true;
9646 }
9647 }
9648 }
9649 if (SPValue)
9650 return true;
9651 return false;
9652 };
9653 // Remove candidates with illegal stack modifying instructions
9654 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9655
9656 // If the sequence doesn't have enough candidates left, then we're done.
9657 if (RepeatedSequenceLocs.size() < MinRepeats)
9658 return std::nullopt;
9659 }
9660
9661 // Properties about candidate MBBs that hold for all of them.
9662 unsigned FlagsSetInAll = 0xF;
9663
9664 // Compute liveness information for each candidate, and set FlagsSetInAll.
9665 for (outliner::Candidate &C : RepeatedSequenceLocs)
9666 FlagsSetInAll &= C.Flags;
9667
9668 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9669
9670 // Helper lambda which sets call information for every candidate.
9671 auto SetCandidateCallInfo =
9672 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9673 for (outliner::Candidate &C : RepeatedSequenceLocs)
9674 C.setCallInfo(CallID, NumBytesForCall);
9675 };
9676
9677 unsigned FrameID = MachineOutlinerDefault;
9678 NumBytesToCreateFrame += 4;
9679
9680 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9681 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9682 });
9683
9684 // We check to see if CFI Instructions are present, and if they are
9685 // we find the number of CFI Instructions in the candidates.
9686 unsigned CFICount = 0;
9687 for (auto &I : RepeatedSequenceLocs[0]) {
9688 if (I.isCFIInstruction())
9689 CFICount++;
9690 }
9691
9692 // We compare the number of found CFI Instructions to the number of CFI
9693 // instructions in the parent function for each candidate. We must check this
9694 // since if we outline one of the CFI instructions in a function, we have to
9695 // outline them all for correctness. If we do not, the address offsets will be
9696 // incorrect between the two sections of the program.
9697 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9698 std::vector<MCCFIInstruction> CFIInstructions =
9699 C.getMF()->getFrameInstructions();
9700
9701 if (CFICount > 0 && CFICount != CFIInstructions.size())
9702 return std::nullopt;
9703 }
9704
9705 // Returns true if an instructions is safe to fix up, false otherwise.
9706 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9707 if (MI.isCall())
9708 return true;
9709
9710 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9711 !MI.readsRegister(AArch64::SP, &TRI))
9712 return true;
9713
9714 // Any modification of SP will break our code to save/restore LR.
9715 // FIXME: We could handle some instructions which add a constant
9716 // offset to SP, with a bit more work.
9717 if (MI.modifiesRegister(AArch64::SP, &TRI))
9718 return false;
9719
9720 // At this point, we have a stack instruction that we might need to
9721 // fix up. We'll handle it if it's a load or store.
9722 if (MI.mayLoadOrStore()) {
9723 const MachineOperand *Base; // Filled with the base operand of MI.
9724 int64_t Offset; // Filled with the offset of MI.
9725 bool OffsetIsScalable;
9726
9727 // Does it allow us to offset the base operand and is the base the
9728 // register SP?
9729 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9730 !Base->isReg() || Base->getReg() != AArch64::SP)
9731 return false;
9732
9733 // Fixe-up code below assumes bytes.
9734 if (OffsetIsScalable)
9735 return false;
9736
9737 // Find the minimum/maximum offset for this instruction and check
9738 // if fixing it up would be in range.
9739 int64_t MinOffset,
9740 MaxOffset; // Unscaled offsets for the instruction.
9741 // The scale to multiply the offsets by.
9742 TypeSize Scale(0U, false), DummyWidth(0U, false);
9743 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9744
9745 Offset += 16; // Update the offset to what it would be if we outlined.
9746 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9747 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9748 return false;
9749
9750 // It's in range, so we can outline it.
9751 return true;
9752 }
9753
9754 // FIXME: Add handling for instructions like "add x0, sp, #8".
9755
9756 // We can't fix it up, so don't outline it.
9757 return false;
9758 };
9759
9760 // True if it's possible to fix up each stack instruction in this sequence.
9761 // Important for frames/call variants that modify the stack.
9762 bool AllStackInstrsSafe =
9763 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9764
9765 // If the last instruction in any candidate is a terminator, then we should
9766 // tail call all of the candidates.
9767 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9768 FrameID = MachineOutlinerTailCall;
9769 NumBytesToCreateFrame = 0;
9770 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9771 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9772 }
9773
9774 else if (LastInstrOpcode == AArch64::BL ||
9775 ((LastInstrOpcode == AArch64::BLR ||
9776 LastInstrOpcode == AArch64::BLRNoIP) &&
9777 !HasBTI)) {
9778 // FIXME: Do we need to check if the code after this uses the value of LR?
9779 FrameID = MachineOutlinerThunk;
9780 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9781 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9782 }
9783
9784 else {
9785 // We need to decide how to emit calls + frames. We can always emit the same
9786 // frame if we don't need to save to the stack. If we have to save to the
9787 // stack, then we need a different frame.
9788 unsigned NumBytesNoStackCalls = 0;
9789 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9790
9791 // Check if we have to save LR.
9792 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9793 bool LRAvailable =
9795 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9796 : true;
9797 // If we have a noreturn caller, then we're going to be conservative and
9798 // say that we have to save LR. If we don't have a ret at the end of the
9799 // block, then we can't reason about liveness accurately.
9800 //
9801 // FIXME: We can probably do better than always disabling this in
9802 // noreturn functions by fixing up the liveness info.
9803 bool IsNoReturn =
9804 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9805
9806 // Is LR available? If so, we don't need a save.
9807 if (LRAvailable && !IsNoReturn) {
9808 NumBytesNoStackCalls += 4;
9809 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9810 CandidatesWithoutStackFixups.push_back(C);
9811 }
9812
9813 // Is an unused register available? If so, we won't modify the stack, so
9814 // we can outline with the same frame type as those that don't save LR.
9815 else if (findRegisterToSaveLRTo(C)) {
9816 NumBytesNoStackCalls += 12;
9817 C.setCallInfo(MachineOutlinerRegSave, 12);
9818 CandidatesWithoutStackFixups.push_back(C);
9819 }
9820
9821 // Is SP used in the sequence at all? If not, we don't have to modify
9822 // the stack, so we are guaranteed to get the same frame.
9823 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9824 NumBytesNoStackCalls += 12;
9825 C.setCallInfo(MachineOutlinerDefault, 12);
9826 CandidatesWithoutStackFixups.push_back(C);
9827 }
9828
9829 // If we outline this, we need to modify the stack. Pretend we don't
9830 // outline this by saving all of its bytes.
9831 else {
9832 NumBytesNoStackCalls += SequenceSize;
9833 }
9834 }
9835
9836 // If there are no places where we have to save LR, then note that we
9837 // don't have to update the stack. Otherwise, give every candidate the
9838 // default call type, as long as it's safe to do so.
9839 if (!AllStackInstrsSafe ||
9840 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9841 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9842 FrameID = MachineOutlinerNoLRSave;
9843 if (RepeatedSequenceLocs.size() < MinRepeats)
9844 return std::nullopt;
9845 } else {
9846 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9847
9848 // Bugzilla ID: 46767
9849 // TODO: Check if fixing up the stack more than once is safe so we can
9850 // outline these.
9851 //
9852 // An outline resulting in a caller that requires stack fixups at the
9853 // callsite to a callee that also requires stack fixups can happen when
9854 // there are no available registers at the candidate callsite for a
9855 // candidate that itself also has calls.
9856 //
9857 // In other words if function_containing_sequence in the following pseudo
9858 // assembly requires that we save LR at the point of the call, but there
9859 // are no available registers: in this case we save using SP and as a
9860 // result the SP offsets requires stack fixups by multiples of 16.
9861 //
9862 // function_containing_sequence:
9863 // ...
9864 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9865 // call OUTLINED_FUNCTION_N
9866 // restore LR from SP
9867 // ...
9868 //
9869 // OUTLINED_FUNCTION_N:
9870 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9871 // ...
9872 // bl foo
9873 // restore LR from SP
9874 // ret
9875 //
9876 // Because the code to handle more than one stack fixup does not
9877 // currently have the proper checks for legality, these cases will assert
9878 // in the AArch64 MachineOutliner. This is because the code to do this
9879 // needs more hardening, testing, better checks that generated code is
9880 // legal, etc and because it is only verified to handle a single pass of
9881 // stack fixup.
9882 //
9883 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9884 // these cases until they are known to be handled. Bugzilla 46767 is
9885 // referenced in comments at the assert site.
9886 //
9887 // To avoid asserting (or generating non-legal code on noassert builds)
9888 // we remove all candidates which would need more than one stack fixup by
9889 // pruning the cases where the candidate has calls while also having no
9890 // available LR and having no available general purpose registers to copy
9891 // LR to (ie one extra stack save/restore).
9892 //
9893 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9894 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9895 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9896 return (llvm::any_of(C, IsCall)) &&
9897 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9898 !findRegisterToSaveLRTo(C));
9899 });
9900 }
9901 }
9902
9903 // If we dropped all of the candidates, bail out here.
9904 if (RepeatedSequenceLocs.size() < MinRepeats)
9905 return std::nullopt;
9906 }
9907
9908 // Does every candidate's MBB contain a call? If so, then we might have a call
9909 // in the range.
9910 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9911 // Check if the range contains a call. These require a save + restore of the
9912 // link register.
9913 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9914 bool ModStackToSaveLR = false;
9915 if (any_of(drop_end(FirstCand),
9916 [](const MachineInstr &MI) { return MI.isCall(); }))
9917 ModStackToSaveLR = true;
9918
9919 // Handle the last instruction separately. If this is a tail call, then the
9920 // last instruction is a call. We don't want to save + restore in this case.
9921 // However, it could be possible that the last instruction is a call without
9922 // it being valid to tail call this sequence. We should consider this as
9923 // well.
9924 else if (FrameID != MachineOutlinerThunk &&
9925 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9926 ModStackToSaveLR = true;
9927
9928 if (ModStackToSaveLR) {
9929 // We can't fix up the stack. Bail out.
9930 if (!AllStackInstrsSafe)
9931 return std::nullopt;
9932
9933 // Save + restore LR.
9934 NumBytesToCreateFrame += 8;
9935 }
9936 }
9937
9938 // If we have CFI instructions, we can only outline if the outlined section
9939 // can be a tail call
9940 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9941 return std::nullopt;
9942
9943 return std::make_unique<outliner::OutlinedFunction>(
9944 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9945}
9946
9947void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9948 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9949 // If a bunch of candidates reach this point they must agree on their return
9950 // address signing. It is therefore enough to just consider the signing
9951 // behaviour of one of them
9952 const auto &CFn = Candidates.front().getMF()->getFunction();
9953
9954 if (CFn.hasFnAttribute("ptrauth-returns"))
9955 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9956 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9957 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9958 // Since all candidates belong to the same module, just copy the
9959 // function-level attributes of an arbitrary function.
9960 if (CFn.hasFnAttribute("sign-return-address"))
9961 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9962 if (CFn.hasFnAttribute("sign-return-address-key"))
9963 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9964
9965 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9966}
9967
9968bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
9969 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9970 const Function &F = MF.getFunction();
9971
9972 // Can F be deduplicated by the linker? If it can, don't outline from it.
9973 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9974 return false;
9975
9976 // Don't outline from functions with section markings; the program could
9977 // expect that all the code is in the named section.
9978 // FIXME: Allow outlining from multiple functions with the same section
9979 // marking.
9980 if (F.hasSection())
9981 return false;
9982
9983 // Outlining from functions with redzones is unsafe since the outliner may
9984 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9985 // outline from it.
9986 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
9987 if (!AFI || AFI->hasRedZone().value_or(true))
9988 return false;
9989
9990 // FIXME: Determine whether it is safe to outline from functions which contain
9991 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9992 // outlined together and ensure it is safe to outline with async unwind info,
9993 // required for saving & restoring VG around calls.
9994 if (AFI->hasStreamingModeChanges())
9995 return false;
9996
9997 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9999 return false;
10000
10001 // It's safe to outline from MF.
10002 return true;
10003}
10004
10006AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10007 unsigned &Flags) const {
10009 "Must track liveness!");
10011 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10012 Ranges;
10013 // According to the AArch64 Procedure Call Standard, the following are
10014 // undefined on entry/exit from a function call:
10015 //
10016 // * Registers x16, x17, (and thus w16, w17)
10017 // * Condition codes (and thus the NZCV register)
10018 //
10019 // If any of these registers are used inside or live across an outlined
10020 // function, then they may be modified later, either by the compiler or
10021 // some other tool (like the linker).
10022 //
10023 // To avoid outlining in these situations, partition each block into ranges
10024 // where these registers are dead. We will only outline from those ranges.
10025 LiveRegUnits LRU(getRegisterInfo());
10026 auto AreAllUnsafeRegsDead = [&LRU]() {
10027 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10028 LRU.available(AArch64::NZCV);
10029 };
10030
10031 // We need to know if LR is live across an outlining boundary later on in
10032 // order to decide how we'll create the outlined call, frame, etc.
10033 //
10034 // It's pretty expensive to check this for *every candidate* within a block.
10035 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10036 // to compute liveness from the end of the block for O(n) candidates within
10037 // the block.
10038 //
10039 // So, to improve the average case, let's keep track of liveness from the end
10040 // of the block to the beginning of *every outlinable range*. If we know that
10041 // LR is available in every range we could outline from, then we know that
10042 // we don't need to check liveness for any candidate within that range.
10043 bool LRAvailableEverywhere = true;
10044 // Compute liveness bottom-up.
10045 LRU.addLiveOuts(MBB);
10046 // Update flags that require info about the entire MBB.
10047 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10048 if (MI.isCall() && !MI.isTerminator())
10050 };
10051 // Range: [RangeBegin, RangeEnd)
10052 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10053 unsigned RangeLen;
10054 auto CreateNewRangeStartingAt =
10055 [&RangeBegin, &RangeEnd,
10056 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10057 RangeBegin = NewBegin;
10058 RangeEnd = std::next(RangeBegin);
10059 RangeLen = 0;
10060 };
10061 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10062 // At least one unsafe register is not dead. We do not want to outline at
10063 // this point. If it is long enough to outline from and does not cross a
10064 // bundle boundary, save the range [RangeBegin, RangeEnd).
10065 if (RangeLen <= 1)
10066 return;
10067 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10068 return;
10069 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10070 return;
10071 Ranges.emplace_back(RangeBegin, RangeEnd);
10072 };
10073 // Find the first point where all unsafe registers are dead.
10074 // FIND: <safe instr> <-- end of first potential range
10075 // SKIP: <unsafe def>
10076 // SKIP: ... everything between ...
10077 // SKIP: <unsafe use>
10078 auto FirstPossibleEndPt = MBB.instr_rbegin();
10079 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10080 LRU.stepBackward(*FirstPossibleEndPt);
10081 // Update flags that impact how we outline across the entire block,
10082 // regardless of safety.
10083 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10084 if (AreAllUnsafeRegsDead())
10085 break;
10086 }
10087 // If we exhausted the entire block, we have no safe ranges to outline.
10088 if (FirstPossibleEndPt == MBB.instr_rend())
10089 return Ranges;
10090 // Current range.
10091 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10092 // StartPt points to the first place where all unsafe registers
10093 // are dead (if there is any such point). Begin partitioning the MBB into
10094 // ranges.
10095 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10096 LRU.stepBackward(MI);
10097 UpdateWholeMBBFlags(MI);
10098 if (!AreAllUnsafeRegsDead()) {
10099 SaveRangeIfNonEmpty();
10100 CreateNewRangeStartingAt(MI.getIterator());
10101 continue;
10102 }
10103 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10104 RangeBegin = MI.getIterator();
10105 ++RangeLen;
10106 }
10107 // Above loop misses the last (or only) range. If we are still safe, then
10108 // let's save the range.
10109 if (AreAllUnsafeRegsDead())
10110 SaveRangeIfNonEmpty();
10111 if (Ranges.empty())
10112 return Ranges;
10113 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10114 // the order.
10115 std::reverse(Ranges.begin(), Ranges.end());
10116 // If there is at least one outlinable range where LR is unavailable
10117 // somewhere, remember that.
10118 if (!LRAvailableEverywhere)
10120 return Ranges;
10121}
10122
10124AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10126 unsigned Flags) const {
10127 MachineInstr &MI = *MIT;
10128
10129 // Don't outline anything used for return address signing. The outlined
10130 // function will get signed later if needed
10131 switch (MI.getOpcode()) {
10132 case AArch64::PACM:
10133 case AArch64::PACIASP:
10134 case AArch64::PACIBSP:
10135 case AArch64::PACIASPPC:
10136 case AArch64::PACIBSPPC:
10137 case AArch64::AUTIASP:
10138 case AArch64::AUTIBSP:
10139 case AArch64::AUTIASPPCi:
10140 case AArch64::AUTIASPPCr:
10141 case AArch64::AUTIBSPPCi:
10142 case AArch64::AUTIBSPPCr:
10143 case AArch64::RETAA:
10144 case AArch64::RETAB:
10145 case AArch64::RETAASPPCi:
10146 case AArch64::RETAASPPCr:
10147 case AArch64::RETABSPPCi:
10148 case AArch64::RETABSPPCr:
10149 case AArch64::EMITBKEY:
10150 case AArch64::PAUTH_PROLOGUE:
10151 case AArch64::PAUTH_EPILOGUE:
10153 }
10154
10155 // We can only outline these if we will tail call the outlined function, or
10156 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10157 // in a tail call.
10158 //
10159 // FIXME: If the proper fixups for the offset are implemented, this should be
10160 // possible.
10161 if (MI.isCFIInstruction())
10163
10164 // Is this a terminator for a basic block?
10165 if (MI.isTerminator())
10166 // TargetInstrInfo::getOutliningType has already filtered out anything
10167 // that would break this, so we can allow it here.
10169
10170 // Make sure none of the operands are un-outlinable.
10171 for (const MachineOperand &MOP : MI.operands()) {
10172 // A check preventing CFI indices was here before, but only CFI
10173 // instructions should have those.
10174 assert(!MOP.isCFIIndex());
10175
10176 // If it uses LR or W30 explicitly, then don't touch it.
10177 if (MOP.isReg() && !MOP.isImplicit() &&
10178 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10180 }
10181
10182 // Special cases for instructions that can always be outlined, but will fail
10183 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10184 // be outlined because they don't require a *specific* value to be in LR.
10185 if (MI.getOpcode() == AArch64::ADRP)
10187
10188 // If MI is a call we might be able to outline it. We don't want to outline
10189 // any calls that rely on the position of items on the stack. When we outline
10190 // something containing a call, we have to emit a save and restore of LR in
10191 // the outlined function. Currently, this always happens by saving LR to the
10192 // stack. Thus, if we outline, say, half the parameters for a function call
10193 // plus the call, then we'll break the callee's expectations for the layout
10194 // of the stack.
10195 //
10196 // FIXME: Allow calls to functions which construct a stack frame, as long
10197 // as they don't access arguments on the stack.
10198 // FIXME: Figure out some way to analyze functions defined in other modules.
10199 // We should be able to compute the memory usage based on the IR calling
10200 // convention, even if we can't see the definition.
10201 if (MI.isCall()) {
10202 // Get the function associated with the call. Look at each operand and find
10203 // the one that represents the callee and get its name.
10204 const Function *Callee = nullptr;
10205 for (const MachineOperand &MOP : MI.operands()) {
10206 if (MOP.isGlobal()) {
10207 Callee = dyn_cast<Function>(MOP.getGlobal());
10208 break;
10209 }
10210 }
10211
10212 // Never outline calls to mcount. There isn't any rule that would require
10213 // this, but the Linux kernel's "ftrace" feature depends on it.
10214 if (Callee && Callee->getName() == "\01_mcount")
10216
10217 // If we don't know anything about the callee, assume it depends on the
10218 // stack layout of the caller. In that case, it's only legal to outline
10219 // as a tail-call. Explicitly list the call instructions we know about so we
10220 // don't get unexpected results with call pseudo-instructions.
10221 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10222 if (MI.getOpcode() == AArch64::BLR ||
10223 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10224 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10225
10226 if (!Callee)
10227 return UnknownCallOutlineType;
10228
10229 // We have a function we have information about. Check it if it's something
10230 // can safely outline.
10231 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10232
10233 // We don't know what's going on with the callee at all. Don't touch it.
10234 if (!CalleeMF)
10235 return UnknownCallOutlineType;
10236
10237 // Check if we know anything about the callee saves on the function. If we
10238 // don't, then don't touch it, since that implies that we haven't
10239 // computed anything about its stack frame yet.
10240 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10241 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10242 MFI.getNumObjects() > 0)
10243 return UnknownCallOutlineType;
10244
10245 // At this point, we can say that CalleeMF ought to not pass anything on the
10246 // stack. Therefore, we can outline it.
10248 }
10249
10250 // Don't touch the link register or W30.
10251 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10252 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10254
10255 // Don't outline BTI instructions, because that will prevent the outlining
10256 // site from being indirectly callable.
10257 if (hasBTISemantics(MI))
10259
10261}
10262
10263void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10264 for (MachineInstr &MI : MBB) {
10265 const MachineOperand *Base;
10266 TypeSize Width(0, false);
10267 int64_t Offset;
10268 bool OffsetIsScalable;
10269
10270 // Is this a load or store with an immediate offset with SP as the base?
10271 if (!MI.mayLoadOrStore() ||
10272 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10273 &RI) ||
10274 (Base->isReg() && Base->getReg() != AArch64::SP))
10275 continue;
10276
10277 // It is, so we have to fix it up.
10278 TypeSize Scale(0U, false);
10279 int64_t Dummy1, Dummy2;
10280
10281 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10282 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10283 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10284 assert(Scale != 0 && "Unexpected opcode!");
10285 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10286
10287 // We've pushed the return address to the stack, so add 16 to the offset.
10288 // This is safe, since we already checked if it would overflow when we
10289 // checked if this instruction was legal to outline.
10290 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10291 StackOffsetOperand.setImm(NewImm);
10292 }
10293}
10294
10296 const AArch64InstrInfo *TII,
10297 bool ShouldSignReturnAddr) {
10298 if (!ShouldSignReturnAddr)
10299 return;
10300
10301 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10303 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10304 TII->get(AArch64::PAUTH_EPILOGUE))
10306}
10307
10308void AArch64InstrInfo::buildOutlinedFrame(
10310 const outliner::OutlinedFunction &OF) const {
10311
10312 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10313
10314 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10315 FI->setOutliningStyle("Tail Call");
10316 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10317 // For thunk outlining, rewrite the last instruction from a call to a
10318 // tail-call.
10319 MachineInstr *Call = &*--MBB.instr_end();
10320 unsigned TailOpcode;
10321 if (Call->getOpcode() == AArch64::BL) {
10322 TailOpcode = AArch64::TCRETURNdi;
10323 } else {
10324 assert(Call->getOpcode() == AArch64::BLR ||
10325 Call->getOpcode() == AArch64::BLRNoIP);
10326 TailOpcode = AArch64::TCRETURNriALL;
10327 }
10328 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10329 .add(Call->getOperand(0))
10330 .addImm(0);
10331 MBB.insert(MBB.end(), TC);
10333
10334 FI->setOutliningStyle("Thunk");
10335 }
10336
10337 bool IsLeafFunction = true;
10338
10339 // Is there a call in the outlined range?
10340 auto IsNonTailCall = [](const MachineInstr &MI) {
10341 return MI.isCall() && !MI.isReturn();
10342 };
10343
10344 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10345 // Fix up the instructions in the range, since we're going to modify the
10346 // stack.
10347
10348 // Bugzilla ID: 46767
10349 // TODO: Check if fixing up twice is safe so we can outline these.
10350 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10351 "Can only fix up stack references once");
10352 fixupPostOutline(MBB);
10353
10354 IsLeafFunction = false;
10355
10356 // LR has to be a live in so that we can save it.
10357 if (!MBB.isLiveIn(AArch64::LR))
10358 MBB.addLiveIn(AArch64::LR);
10359
10362
10363 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10364 OF.FrameConstructionID == MachineOutlinerThunk)
10365 Et = std::prev(MBB.end());
10366
10367 // Insert a save before the outlined region
10368 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10369 .addReg(AArch64::SP, RegState::Define)
10370 .addReg(AArch64::LR)
10371 .addReg(AArch64::SP)
10372 .addImm(-16);
10373 It = MBB.insert(It, STRXpre);
10374
10375 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10376 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10377
10378 // Add a CFI saying the stack was moved 16 B down.
10379 CFIBuilder.buildDefCFAOffset(16);
10380
10381 // Add a CFI saying that the LR that we want to find is now 16 B higher
10382 // than before.
10383 CFIBuilder.buildOffset(AArch64::LR, -16);
10384 }
10385
10386 // Insert a restore before the terminator for the function.
10387 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10388 .addReg(AArch64::SP, RegState::Define)
10389 .addReg(AArch64::LR, RegState::Define)
10390 .addReg(AArch64::SP)
10391 .addImm(16);
10392 Et = MBB.insert(Et, LDRXpost);
10393 }
10394
10395 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10396
10397 // If this is a tail call outlined function, then there's already a return.
10398 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10399 OF.FrameConstructionID == MachineOutlinerThunk) {
10400 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10401 return;
10402 }
10403
10404 // It's not a tail call, so we have to insert the return ourselves.
10405
10406 // LR has to be a live in so that we can return to it.
10407 if (!MBB.isLiveIn(AArch64::LR))
10408 MBB.addLiveIn(AArch64::LR);
10409
10410 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10411 .addReg(AArch64::LR);
10412 MBB.insert(MBB.end(), ret);
10413
10414 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10415
10416 FI->setOutliningStyle("Function");
10417
10418 // Did we have to modify the stack by saving the link register?
10419 if (OF.FrameConstructionID != MachineOutlinerDefault)
10420 return;
10421
10422 // We modified the stack.
10423 // Walk over the basic block and fix up all the stack accesses.
10424 fixupPostOutline(MBB);
10425}
10426
10427MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10430
10431 // Are we tail calling?
10432 if (C.CallConstructionID == MachineOutlinerTailCall) {
10433 // If yes, then we can just branch to the label.
10434 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10435 .addGlobalAddress(M.getNamedValue(MF.getName()))
10436 .addImm(0));
10437 return It;
10438 }
10439
10440 // Are we saving the link register?
10441 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10442 C.CallConstructionID == MachineOutlinerThunk) {
10443 // No, so just insert the call.
10444 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10445 .addGlobalAddress(M.getNamedValue(MF.getName())));
10446 return It;
10447 }
10448
10449 // We want to return the spot where we inserted the call.
10451
10452 // Instructions for saving and restoring LR around the call instruction we're
10453 // going to insert.
10454 MachineInstr *Save;
10455 MachineInstr *Restore;
10456 // Can we save to a register?
10457 if (C.CallConstructionID == MachineOutlinerRegSave) {
10458 // FIXME: This logic should be sunk into a target-specific interface so that
10459 // we don't have to recompute the register.
10460 Register Reg = findRegisterToSaveLRTo(C);
10461 assert(Reg && "No callee-saved register available?");
10462
10463 // LR has to be a live in so that we can save it.
10464 if (!MBB.isLiveIn(AArch64::LR))
10465 MBB.addLiveIn(AArch64::LR);
10466
10467 // Save and restore LR from Reg.
10468 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10469 .addReg(AArch64::XZR)
10470 .addReg(AArch64::LR)
10471 .addImm(0);
10472 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10473 .addReg(AArch64::XZR)
10474 .addReg(Reg)
10475 .addImm(0);
10476 } else {
10477 // We have the default case. Save and restore from SP.
10478 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10479 .addReg(AArch64::SP, RegState::Define)
10480 .addReg(AArch64::LR)
10481 .addReg(AArch64::SP)
10482 .addImm(-16);
10483 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10484 .addReg(AArch64::SP, RegState::Define)
10485 .addReg(AArch64::LR, RegState::Define)
10486 .addReg(AArch64::SP)
10487 .addImm(16);
10488 }
10489
10490 It = MBB.insert(It, Save);
10491 It++;
10492
10493 // Insert the call.
10494 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10495 .addGlobalAddress(M.getNamedValue(MF.getName())));
10496 CallPt = It;
10497 It++;
10498
10499 It = MBB.insert(It, Restore);
10500 return CallPt;
10501}
10502
10503bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10504 MachineFunction &MF) const {
10505 return MF.getFunction().hasMinSize();
10506}
10507
10508void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10510 DebugLoc &DL,
10511 bool AllowSideEffects) const {
10512 const MachineFunction &MF = *MBB.getParent();
10513 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10514 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10515
10516 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10517 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10518 } else if (STI.isSVEorStreamingSVEAvailable()) {
10519 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10520 .addImm(0)
10521 .addImm(0);
10522 } else if (STI.isNeonAvailable()) {
10523 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10524 .addImm(0);
10525 } else {
10526 // This is a streaming-compatible function without SVE. We don't have full
10527 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10528 // So given `movi v..` would be illegal use `fmov d..` instead.
10529 assert(STI.hasNEON() && "Expected to have NEON.");
10530 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10531 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10532 }
10533}
10534
10535std::optional<DestSourcePair>
10537
10538 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10539 // and zero immediate operands used as an alias for mov instruction.
10540 if (((MI.getOpcode() == AArch64::ORRWrs &&
10541 MI.getOperand(1).getReg() == AArch64::WZR &&
10542 MI.getOperand(3).getImm() == 0x0) ||
10543 (MI.getOpcode() == AArch64::ORRWrr &&
10544 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10545 // Check that the w->w move is not a zero-extending w->x mov.
10546 (!MI.getOperand(0).getReg().isVirtual() ||
10547 MI.getOperand(0).getSubReg() == 0) &&
10548 (!MI.getOperand(0).getReg().isPhysical() ||
10549 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10550 /*TRI=*/nullptr) == -1))
10551 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10552
10553 if (MI.getOpcode() == AArch64::ORRXrs &&
10554 MI.getOperand(1).getReg() == AArch64::XZR &&
10555 MI.getOperand(3).getImm() == 0x0)
10556 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10557
10558 return std::nullopt;
10559}
10560
10561std::optional<DestSourcePair>
10563 if ((MI.getOpcode() == AArch64::ORRWrs &&
10564 MI.getOperand(1).getReg() == AArch64::WZR &&
10565 MI.getOperand(3).getImm() == 0x0) ||
10566 (MI.getOpcode() == AArch64::ORRWrr &&
10567 MI.getOperand(1).getReg() == AArch64::WZR))
10568 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10569 return std::nullopt;
10570}
10571
10572std::optional<RegImmPair>
10573AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10574 int Sign = 1;
10575 int64_t Offset = 0;
10576
10577 // TODO: Handle cases where Reg is a super- or sub-register of the
10578 // destination register.
10579 const MachineOperand &Op0 = MI.getOperand(0);
10580 if (!Op0.isReg() || Reg != Op0.getReg())
10581 return std::nullopt;
10582
10583 switch (MI.getOpcode()) {
10584 default:
10585 return std::nullopt;
10586 case AArch64::SUBWri:
10587 case AArch64::SUBXri:
10588 case AArch64::SUBSWri:
10589 case AArch64::SUBSXri:
10590 Sign *= -1;
10591 [[fallthrough]];
10592 case AArch64::ADDSWri:
10593 case AArch64::ADDSXri:
10594 case AArch64::ADDWri:
10595 case AArch64::ADDXri: {
10596 // TODO: Third operand can be global address (usually some string).
10597 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10598 !MI.getOperand(2).isImm())
10599 return std::nullopt;
10600 int Shift = MI.getOperand(3).getImm();
10601 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10602 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10603 }
10604 }
10605 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10606}
10607
10608/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10609/// the destination register then, if possible, describe the value in terms of
10610/// the source register.
10611static std::optional<ParamLoadedValue>
10613 const TargetInstrInfo *TII,
10614 const TargetRegisterInfo *TRI) {
10615 auto DestSrc = TII->isCopyLikeInstr(MI);
10616 if (!DestSrc)
10617 return std::nullopt;
10618
10619 Register DestReg = DestSrc->Destination->getReg();
10620 Register SrcReg = DestSrc->Source->getReg();
10621
10622 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10623
10624 // If the described register is the destination, just return the source.
10625 if (DestReg == DescribedReg)
10626 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10627
10628 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10629 if (MI.getOpcode() == AArch64::ORRWrs &&
10630 TRI->isSuperRegister(DestReg, DescribedReg))
10631 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10632
10633 // We may need to describe the lower part of a ORRXrs move.
10634 if (MI.getOpcode() == AArch64::ORRXrs &&
10635 TRI->isSubRegister(DestReg, DescribedReg)) {
10636 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10637 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10638 }
10639
10640 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10641 "Unhandled ORR[XW]rs copy case");
10642
10643 return std::nullopt;
10644}
10645
10646bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10647 // Functions cannot be split to different sections on AArch64 if they have
10648 // a red zone. This is because relaxing a cross-section branch may require
10649 // incrementing the stack pointer to spill a register, which would overwrite
10650 // the red zone.
10651 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10652 return false;
10653
10655}
10656
10657bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10658 const MachineBasicBlock &MBB) const {
10659 // Asm Goto blocks can contain conditional branches to goto labels, which can
10660 // get moved out of range of the branch instruction.
10661 auto isAsmGoto = [](const MachineInstr &MI) {
10662 return MI.getOpcode() == AArch64::INLINEASM_BR;
10663 };
10664 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10665 return false;
10666
10667 // Because jump tables are label-relative instead of table-relative, they all
10668 // must be in the same section or relocation fixup handling will fail.
10669
10670 // Check if MBB is a jump table target
10671 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10672 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10673 return llvm::is_contained(JTE.MBBs, &MBB);
10674 };
10675 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10676 return false;
10677
10678 // Check if MBB contains a jump table lookup
10679 for (const MachineInstr &MI : MBB) {
10680 switch (MI.getOpcode()) {
10681 case TargetOpcode::G_BRJT:
10682 case AArch64::JumpTableDest32:
10683 case AArch64::JumpTableDest16:
10684 case AArch64::JumpTableDest8:
10685 return false;
10686 default:
10687 continue;
10688 }
10689 }
10690
10691 // MBB isn't a special case, so it's safe to be split to the cold section.
10692 return true;
10693}
10694
10695std::optional<ParamLoadedValue>
10696AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10697 Register Reg) const {
10698 const MachineFunction *MF = MI.getMF();
10699 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10700 switch (MI.getOpcode()) {
10701 case AArch64::MOVZWi:
10702 case AArch64::MOVZXi: {
10703 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10704 // 64-bit parameters, so we need to consider super-registers.
10705 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10706 return std::nullopt;
10707
10708 if (!MI.getOperand(1).isImm())
10709 return std::nullopt;
10710 int64_t Immediate = MI.getOperand(1).getImm();
10711 int Shift = MI.getOperand(2).getImm();
10712 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10713 nullptr);
10714 }
10715 case AArch64::ORRWrs:
10716 case AArch64::ORRXrs:
10717 return describeORRLoadedValue(MI, Reg, this, TRI);
10718 }
10719
10721}
10722
10723bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10724 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10725 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10726 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10727 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10728
10729 // Anyexts are nops.
10730 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10731 return true;
10732
10733 Register DefReg = ExtMI.getOperand(0).getReg();
10734 if (!MRI.hasOneNonDBGUse(DefReg))
10735 return false;
10736
10737 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10738 // addressing mode.
10739 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10740 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10741}
10742
10743uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10744 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10745}
10746
10747bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10748 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10749}
10750
10751bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10752 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10753}
10754
10755unsigned int
10756AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10757 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10758}
10759
10760bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10761 unsigned Scale) const {
10762 if (Offset && Scale)
10763 return false;
10764
10765 // Check Reg + Imm
10766 if (!Scale) {
10767 // 9-bit signed offset
10768 if (isInt<9>(Offset))
10769 return true;
10770
10771 // 12-bit unsigned offset
10772 unsigned Shift = Log2_64(NumBytes);
10773 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10774 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10775 (Offset >> Shift) << Shift == Offset)
10776 return true;
10777 return false;
10778 }
10779
10780 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10781 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10782}
10783
10785 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10786 return AArch64::BLRNoIP;
10787 else
10788 return AArch64::BLR;
10789}
10790
10793 Register TargetReg, bool FrameSetup) const {
10794 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10795
10796 MachineBasicBlock &MBB = *MBBI->getParent();
10797 MachineFunction &MF = *MBB.getParent();
10798 const AArch64InstrInfo *TII =
10799 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10800 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10801 DebugLoc DL = MBB.findDebugLoc(MBBI);
10802
10803 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10804 MachineBasicBlock *LoopTestMBB =
10805 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10806 MF.insert(MBBInsertPoint, LoopTestMBB);
10807 MachineBasicBlock *LoopBodyMBB =
10808 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10809 MF.insert(MBBInsertPoint, LoopBodyMBB);
10810 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10811 MF.insert(MBBInsertPoint, ExitMBB);
10812 MachineInstr::MIFlag Flags =
10814
10815 // LoopTest:
10816 // SUB SP, SP, #ProbeSize
10817 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10818 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10819
10820 // CMP SP, TargetReg
10821 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10822 AArch64::XZR)
10823 .addReg(AArch64::SP)
10824 .addReg(TargetReg)
10826 .setMIFlags(Flags);
10827
10828 // B.<Cond> LoopExit
10829 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10831 .addMBB(ExitMBB)
10832 .setMIFlags(Flags);
10833
10834 // STR XZR, [SP]
10835 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10836 .addReg(AArch64::XZR)
10837 .addReg(AArch64::SP)
10838 .addImm(0)
10839 .setMIFlags(Flags);
10840
10841 // B loop
10842 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10843 .addMBB(LoopTestMBB)
10844 .setMIFlags(Flags);
10845
10846 // LoopExit:
10847 // MOV SP, TargetReg
10848 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10849 .addReg(TargetReg)
10850 .addImm(0)
10852 .setMIFlags(Flags);
10853
10854 // LDR XZR, [SP]
10855 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10856 .addReg(AArch64::XZR, RegState::Define)
10857 .addReg(AArch64::SP)
10858 .addImm(0)
10859 .setMIFlags(Flags);
10860
10861 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10863
10864 LoopTestMBB->addSuccessor(ExitMBB);
10865 LoopTestMBB->addSuccessor(LoopBodyMBB);
10866 LoopBodyMBB->addSuccessor(LoopTestMBB);
10867 MBB.addSuccessor(LoopTestMBB);
10868
10869 // Update liveins.
10870 if (MF.getRegInfo().reservedRegsFrozen())
10871 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10872
10873 return ExitMBB->begin();
10874}
10875
10876namespace {
10877class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10878 MachineFunction *MF;
10879 const TargetInstrInfo *TII;
10880 const TargetRegisterInfo *TRI;
10882
10883 /// The block of the loop
10884 MachineBasicBlock *LoopBB;
10885 /// The conditional branch of the loop
10886 MachineInstr *CondBranch;
10887 /// The compare instruction for loop control
10888 MachineInstr *Comp;
10889 /// The number of the operand of the loop counter value in Comp
10890 unsigned CompCounterOprNum;
10891 /// The instruction that updates the loop counter value
10892 MachineInstr *Update;
10893 /// The number of the operand of the loop counter value in Update
10894 unsigned UpdateCounterOprNum;
10895 /// The initial value of the loop counter
10896 Register Init;
10897 /// True iff Update is a predecessor of Comp
10898 bool IsUpdatePriorComp;
10899
10900 /// The normalized condition used by createTripCountGreaterCondition()
10902
10903public:
10904 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10905 MachineInstr *Comp, unsigned CompCounterOprNum,
10906 MachineInstr *Update, unsigned UpdateCounterOprNum,
10907 Register Init, bool IsUpdatePriorComp,
10909 : MF(Comp->getParent()->getParent()),
10910 TII(MF->getSubtarget().getInstrInfo()),
10911 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10912 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10913 CompCounterOprNum(CompCounterOprNum), Update(Update),
10914 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10915 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10916
10917 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10918 // Make the instructions for loop control be placed in stage 0.
10919 // The predecessors of Comp are considered by the caller.
10920 return MI == Comp;
10921 }
10922
10923 std::optional<bool> createTripCountGreaterCondition(
10924 int TC, MachineBasicBlock &MBB,
10925 SmallVectorImpl<MachineOperand> &CondParam) override {
10926 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10927 // Cond is normalized for such use.
10928 // The predecessors of the branch are assumed to have already been inserted.
10929 CondParam = Cond;
10930 return {};
10931 }
10932
10933 void createRemainingIterationsGreaterCondition(
10934 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10935 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10936
10937 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10938
10939 void adjustTripCount(int TripCountAdjust) override {}
10940
10941 bool isMVEExpanderSupported() override { return true; }
10942};
10943} // namespace
10944
10945/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10946/// is replaced by ReplaceReg. The output register is newly created.
10947/// The other operands are unchanged from MI.
10948static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10949 Register ReplaceReg, MachineBasicBlock &MBB,
10950 MachineBasicBlock::iterator InsertTo) {
10951 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10952 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10953 const TargetRegisterInfo *TRI =
10954 MBB.getParent()->getSubtarget().getRegisterInfo();
10955 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
10956 Register Result = 0;
10957 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10958 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10959 Result = MRI.createVirtualRegister(
10960 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10961 NewMI->getOperand(I).setReg(Result);
10962 } else if (I == ReplaceOprNum) {
10963 MRI.constrainRegClass(ReplaceReg,
10964 TII->getRegClass(NewMI->getDesc(), I, TRI));
10965 NewMI->getOperand(I).setReg(ReplaceReg);
10966 }
10967 }
10968 MBB.insert(InsertTo, NewMI);
10969 return Result;
10970}
10971
10972void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10975 // Create and accumulate conditions for next TC iterations.
10976 // Example:
10977 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10978 // # iteration of the kernel
10979 //
10980 // # insert the following instructions
10981 // cond = CSINCXr 0, 0, C, implicit $nzcv
10982 // counter = ADDXri counter, 1 # clone from this->Update
10983 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10984 // cond = CSINCXr cond, cond, C, implicit $nzcv
10985 // ... (repeat TC times)
10986 // SUBSXri cond, 0, implicit-def $nzcv
10987
10988 assert(CondBranch->getOpcode() == AArch64::Bcc);
10989 // CondCode to exit the loop
10991 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10992 if (CondBranch->getOperand(1).getMBB() == LoopBB)
10994
10995 // Accumulate conditions to exit the loop
10996 Register AccCond = AArch64::XZR;
10997
10998 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10999 auto AccumulateCond = [&](Register CurCond,
11001 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11002 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11003 .addReg(NewCond, RegState::Define)
11004 .addReg(CurCond)
11005 .addReg(CurCond)
11007 return NewCond;
11008 };
11009
11010 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11011 // Update and Comp for I==0 are already exists in MBB
11012 // (MBB is an unrolled kernel)
11013 Register Counter;
11014 for (int I = 0; I <= TC; ++I) {
11015 Register NextCounter;
11016 if (I != 0)
11017 NextCounter =
11018 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11019
11020 AccCond = AccumulateCond(AccCond, CC);
11021
11022 if (I != TC) {
11023 if (I == 0) {
11024 if (Update != Comp && IsUpdatePriorComp) {
11025 Counter =
11026 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11027 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11028 MBB.end());
11029 } else {
11030 // can use already calculated value
11031 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11032 }
11033 } else if (Update != Comp) {
11034 NextCounter =
11035 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11036 }
11037 }
11038 Counter = NextCounter;
11039 }
11040 } else {
11041 Register Counter;
11042 if (LastStage0Insts.empty()) {
11043 // use initial counter value (testing if the trip count is sufficient to
11044 // be executed by pipelined code)
11045 Counter = Init;
11046 if (IsUpdatePriorComp)
11047 Counter =
11048 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11049 } else {
11050 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11051 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11052 }
11053
11054 for (int I = 0; I <= TC; ++I) {
11055 Register NextCounter;
11056 NextCounter =
11057 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11058 AccCond = AccumulateCond(AccCond, CC);
11059 if (I != TC && Update != Comp)
11060 NextCounter =
11061 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11062 Counter = NextCounter;
11063 }
11064 }
11065
11066 // If AccCond == 0, the remainder is greater than TC.
11067 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11068 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11069 .addReg(AccCond)
11070 .addImm(0)
11071 .addImm(0);
11072 Cond.clear();
11074}
11075
11076static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11077 Register &RegMBB, Register &RegOther) {
11078 assert(Phi.getNumOperands() == 5);
11079 if (Phi.getOperand(2).getMBB() == MBB) {
11080 RegMBB = Phi.getOperand(1).getReg();
11081 RegOther = Phi.getOperand(3).getReg();
11082 } else {
11083 assert(Phi.getOperand(4).getMBB() == MBB);
11084 RegMBB = Phi.getOperand(3).getReg();
11085 RegOther = Phi.getOperand(1).getReg();
11086 }
11087}
11088
11090 if (!Reg.isVirtual())
11091 return false;
11092 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11093 return MRI.getVRegDef(Reg)->getParent() != BB;
11094}
11095
11096/// If Reg is an induction variable, return true and set some parameters
11097static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11098 MachineInstr *&UpdateInst,
11099 unsigned &UpdateCounterOprNum, Register &InitReg,
11100 bool &IsUpdatePriorComp) {
11101 // Example:
11102 //
11103 // Preheader:
11104 // InitReg = ...
11105 // LoopBB:
11106 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11107 // Reg = COPY Reg0 ; COPY is ignored.
11108 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11109 // ; Reg is the value calculated in the previous
11110 // ; iteration, so IsUpdatePriorComp == false.
11111
11112 if (LoopBB->pred_size() != 2)
11113 return false;
11114 if (!Reg.isVirtual())
11115 return false;
11116 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11117 UpdateInst = nullptr;
11118 UpdateCounterOprNum = 0;
11119 InitReg = 0;
11120 IsUpdatePriorComp = true;
11121 Register CurReg = Reg;
11122 while (true) {
11123 MachineInstr *Def = MRI.getVRegDef(CurReg);
11124 if (Def->getParent() != LoopBB)
11125 return false;
11126 if (Def->isCopy()) {
11127 // Ignore copy instructions unless they contain subregisters
11128 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11129 return false;
11130 CurReg = Def->getOperand(1).getReg();
11131 } else if (Def->isPHI()) {
11132 if (InitReg != 0)
11133 return false;
11134 if (!UpdateInst)
11135 IsUpdatePriorComp = false;
11136 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11137 } else {
11138 if (UpdateInst)
11139 return false;
11140 switch (Def->getOpcode()) {
11141 case AArch64::ADDSXri:
11142 case AArch64::ADDSWri:
11143 case AArch64::SUBSXri:
11144 case AArch64::SUBSWri:
11145 case AArch64::ADDXri:
11146 case AArch64::ADDWri:
11147 case AArch64::SUBXri:
11148 case AArch64::SUBWri:
11149 UpdateInst = Def;
11150 UpdateCounterOprNum = 1;
11151 break;
11152 case AArch64::ADDSXrr:
11153 case AArch64::ADDSWrr:
11154 case AArch64::SUBSXrr:
11155 case AArch64::SUBSWrr:
11156 case AArch64::ADDXrr:
11157 case AArch64::ADDWrr:
11158 case AArch64::SUBXrr:
11159 case AArch64::SUBWrr:
11160 UpdateInst = Def;
11161 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11162 UpdateCounterOprNum = 1;
11163 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11164 UpdateCounterOprNum = 2;
11165 else
11166 return false;
11167 break;
11168 default:
11169 return false;
11170 }
11171 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11172 }
11173
11174 if (!CurReg.isVirtual())
11175 return false;
11176 if (Reg == CurReg)
11177 break;
11178 }
11179
11180 if (!UpdateInst)
11181 return false;
11182
11183 return true;
11184}
11185
11186std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11188 // Accept loops that meet the following conditions
11189 // * The conditional branch is BCC
11190 // * The compare instruction is ADDS/SUBS/WHILEXX
11191 // * One operand of the compare is an induction variable and the other is a
11192 // loop invariant value
11193 // * The induction variable is incremented/decremented by a single instruction
11194 // * Does not contain CALL or instructions which have unmodeled side effects
11195
11196 for (MachineInstr &MI : *LoopBB)
11197 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11198 // This instruction may use NZCV, which interferes with the instruction to
11199 // be inserted for loop control.
11200 return nullptr;
11201
11202 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11204 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11205 return nullptr;
11206
11207 // Infinite loops are not supported
11208 if (TBB == LoopBB && FBB == LoopBB)
11209 return nullptr;
11210
11211 // Must be conditional branch
11212 if (TBB != LoopBB && FBB == nullptr)
11213 return nullptr;
11214
11215 assert((TBB == LoopBB || FBB == LoopBB) &&
11216 "The Loop must be a single-basic-block loop");
11217
11218 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11220
11221 if (CondBranch->getOpcode() != AArch64::Bcc)
11222 return nullptr;
11223
11224 // Normalization for createTripCountGreaterCondition()
11225 if (TBB == LoopBB)
11227
11228 MachineInstr *Comp = nullptr;
11229 unsigned CompCounterOprNum = 0;
11230 for (MachineInstr &MI : reverse(*LoopBB)) {
11231 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11232 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11233 // operands is a loop invariant value
11234
11235 switch (MI.getOpcode()) {
11236 case AArch64::SUBSXri:
11237 case AArch64::SUBSWri:
11238 case AArch64::ADDSXri:
11239 case AArch64::ADDSWri:
11240 Comp = &MI;
11241 CompCounterOprNum = 1;
11242 break;
11243 case AArch64::ADDSWrr:
11244 case AArch64::ADDSXrr:
11245 case AArch64::SUBSWrr:
11246 case AArch64::SUBSXrr:
11247 Comp = &MI;
11248 break;
11249 default:
11250 if (isWhileOpcode(MI.getOpcode())) {
11251 Comp = &MI;
11252 break;
11253 }
11254 return nullptr;
11255 }
11256
11257 if (CompCounterOprNum == 0) {
11258 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11259 CompCounterOprNum = 2;
11260 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11261 CompCounterOprNum = 1;
11262 else
11263 return nullptr;
11264 }
11265 break;
11266 }
11267 }
11268 if (!Comp)
11269 return nullptr;
11270
11271 MachineInstr *Update = nullptr;
11272 Register Init;
11273 bool IsUpdatePriorComp;
11274 unsigned UpdateCounterOprNum;
11275 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11276 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11277 return nullptr;
11278
11279 return std::make_unique<AArch64PipelinerLoopInfo>(
11280 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11281 Init, IsUpdatePriorComp, Cond);
11282}
11283
11284/// verifyInstruction - Perform target specific instruction verification.
11285bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11286 StringRef &ErrInfo) const {
11287 // Verify that immediate offsets on load/store instructions are within range.
11288 // Stack objects with an FI operand are excluded as they can be fixed up
11289 // during PEI.
11290 TypeSize Scale(0U, false), Width(0U, false);
11291 int64_t MinOffset, MaxOffset;
11292 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11293 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11294 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11295 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11296 if (Imm < MinOffset || Imm > MaxOffset) {
11297 ErrInfo = "Unexpected immediate on load/store instruction";
11298 return false;
11299 }
11300 }
11301 }
11302
11303 const MCInstrDesc &MCID = MI.getDesc();
11304 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11305 const MachineOperand &MO = MI.getOperand(Op);
11306 switch (MCID.operands()[Op].OperandType) {
11308 if (!MO.isImm() || MO.getImm() != 0) {
11309 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11310 return false;
11311 }
11312 break;
11314 if (!MO.isImm() ||
11316 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11317 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11318 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11319 return false;
11320 }
11321 break;
11322 default:
11323 break;
11324 }
11325 }
11326 return true;
11327}
11328
11329#define GET_INSTRINFO_HELPERS
11330#define GET_INSTRMAP_INFO
11331#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:107
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
static constexpr unsigned NoRegister
Definition MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
Represents a location in source code.
Definition SMLoc.h:23
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:197
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:134
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2461
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1721
auto reverse(ContainerTy &&C)
Definition STLExtras.h:401
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1633
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:318
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2109
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1886
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:238
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.