LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
350 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
358 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
386 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
443 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
503 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
601 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewVReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcOpNum = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::ADDSXri:
714 case AArch64::ADDSWri:
715 // if NZCV is used, do not fold.
716 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
717 true) == -1)
718 return 0;
719 // fall-through to ADDXri and ADDWri.
720 [[fallthrough]];
721 case AArch64::ADDXri:
722 case AArch64::ADDWri:
723 // add x, 1 -> csinc.
724 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
725 DefMI->getOperand(3).getImm() != 0)
726 return 0;
727 SrcOpNum = 1;
728 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729 break;
730
731 case AArch64::ORNXrr:
732 case AArch64::ORNWrr: {
733 // not x -> csinv, represented as orn dst, xzr, src.
734 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
735 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736 return 0;
737 SrcOpNum = 2;
738 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739 break;
740 }
741
742 case AArch64::SUBSXrr:
743 case AArch64::SUBSWrr:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to SUBXrr and SUBWrr.
749 [[fallthrough]];
750 case AArch64::SUBXrr:
751 case AArch64::SUBWrr: {
752 // neg x -> csneg, represented as sub dst, xzr, src.
753 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
754 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755 return 0;
756 SrcOpNum = 2;
757 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758 break;
759 }
760 default:
761 return 0;
762 }
763 assert(Opc && SrcOpNum && "Missing parameters");
764
765 if (NewVReg)
766 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
767 return Opc;
768}
769
772 Register DstReg, Register TrueReg,
773 Register FalseReg, int &CondCycles,
774 int &TrueCycles,
775 int &FalseCycles) const {
776 // Check register classes.
777 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
778 const TargetRegisterClass *RC =
779 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
780 if (!RC)
781 return false;
782
783 // Also need to check the dest regclass, in case we're trying to optimize
784 // something like:
785 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
786 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
787 return false;
788
789 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
790 unsigned ExtraCondLat = Cond.size() != 1;
791
792 // GPRs are handled by csel.
793 // FIXME: Fold in x+1, -x, and ~x when applicable.
794 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
795 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
796 // Single-cycle csel, csinc, csinv, and csneg.
797 CondCycles = 1 + ExtraCondLat;
798 TrueCycles = FalseCycles = 1;
799 if (canFoldIntoCSel(MRI, TrueReg))
800 TrueCycles = 0;
801 else if (canFoldIntoCSel(MRI, FalseReg))
802 FalseCycles = 0;
803 return true;
804 }
805
806 // Scalar floating point is handled by fcsel.
807 // FIXME: Form fabs, fmin, and fmax when applicable.
808 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
809 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
810 CondCycles = 5 + ExtraCondLat;
811 TrueCycles = FalseCycles = 2;
812 return true;
813 }
814
815 // Can't do vectors.
816 return false;
817}
818
821 const DebugLoc &DL, Register DstReg,
823 Register TrueReg, Register FalseReg) const {
824 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
825
826 // Parse the condition code, see parseCondBranch() above.
828 switch (Cond.size()) {
829 default:
830 llvm_unreachable("Unknown condition opcode in Cond");
831 case 1: // b.cc
833 break;
834 case 3: { // cbz/cbnz
835 // We must insert a compare against 0.
836 bool Is64Bit;
837 switch (Cond[1].getImm()) {
838 default:
839 llvm_unreachable("Unknown branch opcode in Cond");
840 case AArch64::CBZW:
841 Is64Bit = false;
842 CC = AArch64CC::EQ;
843 break;
844 case AArch64::CBZX:
845 Is64Bit = true;
846 CC = AArch64CC::EQ;
847 break;
848 case AArch64::CBNZW:
849 Is64Bit = false;
850 CC = AArch64CC::NE;
851 break;
852 case AArch64::CBNZX:
853 Is64Bit = true;
854 CC = AArch64CC::NE;
855 break;
856 }
857 Register SrcReg = Cond[2].getReg();
858 if (Is64Bit) {
859 // cmp reg, #0 is actually subs xzr, reg, #0.
860 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
861 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
862 .addReg(SrcReg)
863 .addImm(0)
864 .addImm(0);
865 } else {
866 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
867 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
868 .addReg(SrcReg)
869 .addImm(0)
870 .addImm(0);
871 }
872 break;
873 }
874 case 4: { // tbz/tbnz
875 // We must insert a tst instruction.
876 switch (Cond[1].getImm()) {
877 default:
878 llvm_unreachable("Unknown branch opcode in Cond");
879 case AArch64::TBZW:
880 case AArch64::TBZX:
881 CC = AArch64CC::EQ;
882 break;
883 case AArch64::TBNZW:
884 case AArch64::TBNZX:
885 CC = AArch64CC::NE;
886 break;
887 }
888 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
889 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
890 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
891 .addReg(Cond[2].getReg())
892 .addImm(
894 else
895 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
896 .addReg(Cond[2].getReg())
897 .addImm(
899 break;
900 }
901 case 5: { // cb
902 // We must insert a cmp, that is a subs
903 // 0 1 2 3 4
904 // Cond is { -1, Opcode, CC, Op0, Op1 }
905 unsigned SUBSOpC, SUBSDestReg;
906 bool IsImm = false;
907 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
908 switch (Cond[1].getImm()) {
909 default:
910 llvm_unreachable("Unknown branch opcode in Cond");
911 case AArch64::CBWPri:
912 SUBSOpC = AArch64::SUBSWri;
913 SUBSDestReg = AArch64::WZR;
914 IsImm = true;
915 break;
916 case AArch64::CBXPri:
917 SUBSOpC = AArch64::SUBSXri;
918 SUBSDestReg = AArch64::XZR;
919 IsImm = true;
920 break;
921 case AArch64::CBWPrr:
922 SUBSOpC = AArch64::SUBSWrr;
923 SUBSDestReg = AArch64::WZR;
924 IsImm = false;
925 break;
926 case AArch64::CBXPrr:
927 SUBSOpC = AArch64::SUBSXrr;
928 SUBSDestReg = AArch64::XZR;
929 IsImm = false;
930 break;
931 }
932
933 if (IsImm)
934 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
935 .addReg(Cond[3].getReg())
936 .addImm(Cond[4].getImm())
937 .addImm(0);
938 else
939 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
940 .addReg(Cond[3].getReg())
941 .addReg(Cond[4].getReg());
942 }
943 }
944
945 unsigned Opc = 0;
946 const TargetRegisterClass *RC = nullptr;
947 bool TryFold = false;
948 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
949 RC = &AArch64::GPR64RegClass;
950 Opc = AArch64::CSELXr;
951 TryFold = true;
952 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
953 RC = &AArch64::GPR32RegClass;
954 Opc = AArch64::CSELWr;
955 TryFold = true;
956 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
957 RC = &AArch64::FPR64RegClass;
958 Opc = AArch64::FCSELDrrr;
959 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
960 RC = &AArch64::FPR32RegClass;
961 Opc = AArch64::FCSELSrrr;
962 }
963 assert(RC && "Unsupported regclass");
964
965 // Try folding simple instructions into the csel.
966 if (TryFold) {
967 unsigned NewVReg = 0;
968 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
969 if (FoldedOpc) {
970 // The folded opcodes csinc, csinc and csneg apply the operation to
971 // FalseReg, so we need to invert the condition.
973 TrueReg = FalseReg;
974 } else
975 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
976
977 // Fold the operation. Leave any dead instructions for DCE to clean up.
978 if (FoldedOpc) {
979 FalseReg = NewVReg;
980 Opc = FoldedOpc;
981 // The extends the live range of NewVReg.
982 MRI.clearKillFlags(NewVReg);
983 }
984 }
985
986 // Pull all virtual register into the appropriate class.
987 MRI.constrainRegClass(TrueReg, RC);
988 MRI.constrainRegClass(FalseReg, RC);
989
990 // Insert the csel.
991 BuildMI(MBB, I, DL, get(Opc), DstReg)
992 .addReg(TrueReg)
993 .addReg(FalseReg)
994 .addImm(CC);
995}
996
997// Return true if Imm can be loaded into a register by a "cheap" sequence of
998// instructions. For now, "cheap" means at most two instructions.
999static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1000 if (BitSize == 32)
1001 return true;
1002
1003 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1004 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1006 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1007
1008 return Is.size() <= 2;
1009}
1010
1011// FIXME: this implementation should be micro-architecture dependent, so a
1012// micro-architecture target hook should be introduced here in future.
1014 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1015 if (isExynosCheapAsMove(MI))
1016 return true;
1017 return MI.isAsCheapAsAMove();
1018 }
1019
1020 switch (MI.getOpcode()) {
1021 default:
1022 return MI.isAsCheapAsAMove();
1023
1024 case AArch64::ADDWrs:
1025 case AArch64::ADDXrs:
1026 case AArch64::SUBWrs:
1027 case AArch64::SUBXrs:
1028 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1029
1030 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1031 // ORRXri, it is as cheap as MOV.
1032 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1033 case AArch64::MOVi32imm:
1034 return isCheapImmediate(MI, 32);
1035 case AArch64::MOVi64imm:
1036 return isCheapImmediate(MI, 64);
1037 }
1038}
1039
1040bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1041 switch (MI.getOpcode()) {
1042 default:
1043 return false;
1044
1045 case AArch64::ADDWrs:
1046 case AArch64::ADDXrs:
1047 case AArch64::ADDSWrs:
1048 case AArch64::ADDSXrs: {
1049 unsigned Imm = MI.getOperand(3).getImm();
1050 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1051 if (ShiftVal == 0)
1052 return true;
1053 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1054 }
1055
1056 case AArch64::ADDWrx:
1057 case AArch64::ADDXrx:
1058 case AArch64::ADDXrx64:
1059 case AArch64::ADDSWrx:
1060 case AArch64::ADDSXrx:
1061 case AArch64::ADDSXrx64: {
1062 unsigned Imm = MI.getOperand(3).getImm();
1063 switch (AArch64_AM::getArithExtendType(Imm)) {
1064 default:
1065 return false;
1066 case AArch64_AM::UXTB:
1067 case AArch64_AM::UXTH:
1068 case AArch64_AM::UXTW:
1069 case AArch64_AM::UXTX:
1070 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1071 }
1072 }
1073
1074 case AArch64::SUBWrs:
1075 case AArch64::SUBSWrs: {
1076 unsigned Imm = MI.getOperand(3).getImm();
1077 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1078 return ShiftVal == 0 ||
1079 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1080 }
1081
1082 case AArch64::SUBXrs:
1083 case AArch64::SUBSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 return ShiftVal == 0 ||
1087 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1088 }
1089
1090 case AArch64::SUBWrx:
1091 case AArch64::SUBXrx:
1092 case AArch64::SUBXrx64:
1093 case AArch64::SUBSWrx:
1094 case AArch64::SUBSXrx:
1095 case AArch64::SUBSXrx64: {
1096 unsigned Imm = MI.getOperand(3).getImm();
1097 switch (AArch64_AM::getArithExtendType(Imm)) {
1098 default:
1099 return false;
1100 case AArch64_AM::UXTB:
1101 case AArch64_AM::UXTH:
1102 case AArch64_AM::UXTW:
1103 case AArch64_AM::UXTX:
1104 return AArch64_AM::getArithShiftValue(Imm) == 0;
1105 }
1106 }
1107
1108 case AArch64::LDRBBroW:
1109 case AArch64::LDRBBroX:
1110 case AArch64::LDRBroW:
1111 case AArch64::LDRBroX:
1112 case AArch64::LDRDroW:
1113 case AArch64::LDRDroX:
1114 case AArch64::LDRHHroW:
1115 case AArch64::LDRHHroX:
1116 case AArch64::LDRHroW:
1117 case AArch64::LDRHroX:
1118 case AArch64::LDRQroW:
1119 case AArch64::LDRQroX:
1120 case AArch64::LDRSBWroW:
1121 case AArch64::LDRSBWroX:
1122 case AArch64::LDRSBXroW:
1123 case AArch64::LDRSBXroX:
1124 case AArch64::LDRSHWroW:
1125 case AArch64::LDRSHWroX:
1126 case AArch64::LDRSHXroW:
1127 case AArch64::LDRSHXroX:
1128 case AArch64::LDRSWroW:
1129 case AArch64::LDRSWroX:
1130 case AArch64::LDRSroW:
1131 case AArch64::LDRSroX:
1132 case AArch64::LDRWroW:
1133 case AArch64::LDRWroX:
1134 case AArch64::LDRXroW:
1135 case AArch64::LDRXroX:
1136 case AArch64::PRFMroW:
1137 case AArch64::PRFMroX:
1138 case AArch64::STRBBroW:
1139 case AArch64::STRBBroX:
1140 case AArch64::STRBroW:
1141 case AArch64::STRBroX:
1142 case AArch64::STRDroW:
1143 case AArch64::STRDroX:
1144 case AArch64::STRHHroW:
1145 case AArch64::STRHHroX:
1146 case AArch64::STRHroW:
1147 case AArch64::STRHroX:
1148 case AArch64::STRQroW:
1149 case AArch64::STRQroX:
1150 case AArch64::STRSroW:
1151 case AArch64::STRSroX:
1152 case AArch64::STRWroW:
1153 case AArch64::STRWroX:
1154 case AArch64::STRXroW:
1155 case AArch64::STRXroX: {
1156 unsigned IsSigned = MI.getOperand(3).getImm();
1157 return !IsSigned;
1158 }
1159 }
1160}
1161
1162bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1163 unsigned Opc = MI.getOpcode();
1164 switch (Opc) {
1165 default:
1166 return false;
1167 case AArch64::SEH_StackAlloc:
1168 case AArch64::SEH_SaveFPLR:
1169 case AArch64::SEH_SaveFPLR_X:
1170 case AArch64::SEH_SaveReg:
1171 case AArch64::SEH_SaveReg_X:
1172 case AArch64::SEH_SaveRegP:
1173 case AArch64::SEH_SaveRegP_X:
1174 case AArch64::SEH_SaveFReg:
1175 case AArch64::SEH_SaveFReg_X:
1176 case AArch64::SEH_SaveFRegP:
1177 case AArch64::SEH_SaveFRegP_X:
1178 case AArch64::SEH_SetFP:
1179 case AArch64::SEH_AddFP:
1180 case AArch64::SEH_Nop:
1181 case AArch64::SEH_PrologEnd:
1182 case AArch64::SEH_EpilogStart:
1183 case AArch64::SEH_EpilogEnd:
1184 case AArch64::SEH_PACSignLR:
1185 case AArch64::SEH_SaveAnyRegQP:
1186 case AArch64::SEH_SaveAnyRegQPX:
1187 case AArch64::SEH_AllocZ:
1188 case AArch64::SEH_SaveZReg:
1189 case AArch64::SEH_SavePReg:
1190 return true;
1191 }
1192}
1193
1195 Register &SrcReg, Register &DstReg,
1196 unsigned &SubIdx) const {
1197 switch (MI.getOpcode()) {
1198 default:
1199 return false;
1200 case AArch64::SBFMXri: // aka sxtw
1201 case AArch64::UBFMXri: // aka uxtw
1202 // Check for the 32 -> 64 bit extension case, these instructions can do
1203 // much more.
1204 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1205 return false;
1206 // This is a signed or unsigned 32 -> 64 bit extension.
1207 SrcReg = MI.getOperand(1).getReg();
1208 DstReg = MI.getOperand(0).getReg();
1209 SubIdx = AArch64::sub_32;
1210 return true;
1211 }
1212}
1213
1215 const MachineInstr &MIa, const MachineInstr &MIb) const {
1217 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1218 int64_t OffsetA = 0, OffsetB = 0;
1219 TypeSize WidthA(0, false), WidthB(0, false);
1220 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1221
1222 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1223 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1224
1227 return false;
1228
1229 // Retrieve the base, offset from the base and width. Width
1230 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1231 // base are identical, and the offset of a lower memory access +
1232 // the width doesn't overlap the offset of a higher memory access,
1233 // then the memory accesses are different.
1234 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1235 // are assumed to have the same scale (vscale).
1236 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1237 WidthA, TRI) &&
1238 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1239 WidthB, TRI)) {
1240 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1241 OffsetAIsScalable == OffsetBIsScalable) {
1242 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1243 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1244 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1245 if (LowWidth.isScalable() == OffsetAIsScalable &&
1246 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1247 return true;
1248 }
1249 }
1250 return false;
1251}
1252
1254 const MachineBasicBlock *MBB,
1255 const MachineFunction &MF) const {
1257 return true;
1258
1259 // Do not move an instruction that can be recognized as a branch target.
1260 if (hasBTISemantics(MI))
1261 return true;
1262
1263 switch (MI.getOpcode()) {
1264 case AArch64::HINT:
1265 // CSDB hints are scheduling barriers.
1266 if (MI.getOperand(0).getImm() == 0x14)
1267 return true;
1268 break;
1269 case AArch64::DSB:
1270 case AArch64::ISB:
1271 // DSB and ISB also are scheduling barriers.
1272 return true;
1273 case AArch64::MSRpstatesvcrImm1:
1274 // SMSTART and SMSTOP are also scheduling barriers.
1275 return true;
1276 default:;
1277 }
1278 if (isSEHInstruction(MI))
1279 return true;
1280 auto Next = std::next(MI.getIterator());
1281 return Next != MBB->end() && Next->isCFIInstruction();
1282}
1283
1284/// analyzeCompare - For a comparison instruction, return the source registers
1285/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1286/// Return true if the comparison instruction can be analyzed.
1288 Register &SrcReg2, int64_t &CmpMask,
1289 int64_t &CmpValue) const {
1290 // The first operand can be a frame index where we'd normally expect a
1291 // register.
1292 // FIXME: Pass subregisters out of analyzeCompare
1293 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1294 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1295 return false;
1296
1297 switch (MI.getOpcode()) {
1298 default:
1299 break;
1300 case AArch64::PTEST_PP:
1301 case AArch64::PTEST_PP_ANY:
1302 case AArch64::PTEST_PP_FIRST:
1303 SrcReg = MI.getOperand(0).getReg();
1304 SrcReg2 = MI.getOperand(1).getReg();
1305 if (MI.getOperand(2).getSubReg())
1306 return false;
1307
1308 // Not sure about the mask and value for now...
1309 CmpMask = ~0;
1310 CmpValue = 0;
1311 return true;
1312 case AArch64::SUBSWrr:
1313 case AArch64::SUBSWrs:
1314 case AArch64::SUBSWrx:
1315 case AArch64::SUBSXrr:
1316 case AArch64::SUBSXrs:
1317 case AArch64::SUBSXrx:
1318 case AArch64::ADDSWrr:
1319 case AArch64::ADDSWrs:
1320 case AArch64::ADDSWrx:
1321 case AArch64::ADDSXrr:
1322 case AArch64::ADDSXrs:
1323 case AArch64::ADDSXrx:
1324 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1325 SrcReg = MI.getOperand(1).getReg();
1326 SrcReg2 = MI.getOperand(2).getReg();
1327
1328 // FIXME: Pass subregisters out of analyzeCompare
1329 if (MI.getOperand(2).getSubReg())
1330 return false;
1331
1332 CmpMask = ~0;
1333 CmpValue = 0;
1334 return true;
1335 case AArch64::SUBSWri:
1336 case AArch64::ADDSWri:
1337 case AArch64::SUBSXri:
1338 case AArch64::ADDSXri:
1339 SrcReg = MI.getOperand(1).getReg();
1340 SrcReg2 = 0;
1341 CmpMask = ~0;
1342 CmpValue = MI.getOperand(2).getImm();
1343 return true;
1344 case AArch64::ANDSWri:
1345 case AArch64::ANDSXri:
1346 // ANDS does not use the same encoding scheme as the others xxxS
1347 // instructions.
1348 SrcReg = MI.getOperand(1).getReg();
1349 SrcReg2 = 0;
1350 CmpMask = ~0;
1352 MI.getOperand(2).getImm(),
1353 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1354 return true;
1355 }
1356
1357 return false;
1358}
1359
1361 MachineBasicBlock *MBB = Instr.getParent();
1362 assert(MBB && "Can't get MachineBasicBlock here");
1363 MachineFunction *MF = MBB->getParent();
1364 assert(MF && "Can't get MachineFunction here");
1368
1369 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1370 ++OpIdx) {
1371 MachineOperand &MO = Instr.getOperand(OpIdx);
1372 const TargetRegisterClass *OpRegCstraints =
1373 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1374
1375 // If there's no constraint, there's nothing to do.
1376 if (!OpRegCstraints)
1377 continue;
1378 // If the operand is a frame index, there's nothing to do here.
1379 // A frame index operand will resolve correctly during PEI.
1380 if (MO.isFI())
1381 continue;
1382
1383 assert(MO.isReg() &&
1384 "Operand has register constraints without being a register!");
1385
1386 Register Reg = MO.getReg();
1387 if (Reg.isPhysical()) {
1388 if (!OpRegCstraints->contains(Reg))
1389 return false;
1390 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1391 !MRI->constrainRegClass(Reg, OpRegCstraints))
1392 return false;
1393 }
1394
1395 return true;
1396}
1397
1398/// Return the opcode that does not set flags when possible - otherwise
1399/// return the original opcode. The caller is responsible to do the actual
1400/// substitution and legality checking.
1402 // Don't convert all compare instructions, because for some the zero register
1403 // encoding becomes the sp register.
1404 bool MIDefinesZeroReg = false;
1405 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1406 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1407 MIDefinesZeroReg = true;
1408
1409 switch (MI.getOpcode()) {
1410 default:
1411 return MI.getOpcode();
1412 case AArch64::ADDSWrr:
1413 return AArch64::ADDWrr;
1414 case AArch64::ADDSWri:
1415 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1416 case AArch64::ADDSWrs:
1417 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1418 case AArch64::ADDSWrx:
1419 return AArch64::ADDWrx;
1420 case AArch64::ADDSXrr:
1421 return AArch64::ADDXrr;
1422 case AArch64::ADDSXri:
1423 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1424 case AArch64::ADDSXrs:
1425 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1426 case AArch64::ADDSXrx:
1427 return AArch64::ADDXrx;
1428 case AArch64::SUBSWrr:
1429 return AArch64::SUBWrr;
1430 case AArch64::SUBSWri:
1431 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1432 case AArch64::SUBSWrs:
1433 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1434 case AArch64::SUBSWrx:
1435 return AArch64::SUBWrx;
1436 case AArch64::SUBSXrr:
1437 return AArch64::SUBXrr;
1438 case AArch64::SUBSXri:
1439 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1440 case AArch64::SUBSXrs:
1441 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1442 case AArch64::SUBSXrx:
1443 return AArch64::SUBXrx;
1444 }
1445}
1446
1447enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1448
1449/// True when condition flags are accessed (either by writing or reading)
1450/// on the instruction trace starting at From and ending at To.
1451///
1452/// Note: If From and To are from different blocks it's assumed CC are accessed
1453/// on the path.
1456 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1457 // Early exit if To is at the beginning of the BB.
1458 if (To == To->getParent()->begin())
1459 return true;
1460
1461 // Check whether the instructions are in the same basic block
1462 // If not, assume the condition flags might get modified somewhere.
1463 if (To->getParent() != From->getParent())
1464 return true;
1465
1466 // From must be above To.
1467 assert(std::any_of(
1468 ++To.getReverse(), To->getParent()->rend(),
1469 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1470
1471 // We iterate backward starting at \p To until we hit \p From.
1472 for (const MachineInstr &Instr :
1474 if (((AccessToCheck & AK_Write) &&
1475 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1476 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1477 return true;
1478 }
1479 return false;
1480}
1481
1482std::optional<unsigned>
1483AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1484 MachineInstr *Pred,
1485 const MachineRegisterInfo *MRI) const {
1486 unsigned MaskOpcode = Mask->getOpcode();
1487 unsigned PredOpcode = Pred->getOpcode();
1488 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1489 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1490
1491 if (PredIsWhileLike) {
1492 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1493 // instruction and the condition is "any" since WHILcc does an implicit
1494 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1495 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1496 return PredOpcode;
1497
1498 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1499 // redundant since WHILE performs an implicit PTEST with an all active
1500 // mask.
1501 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1502 getElementSizeForOpcode(MaskOpcode) ==
1503 getElementSizeForOpcode(PredOpcode))
1504 return PredOpcode;
1505
1506 return {};
1507 }
1508
1509 if (PredIsPTestLike) {
1510 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1511 // instruction that sets the flags as PTEST would and the condition is
1512 // "any" since PG is always a subset of the governing predicate of the
1513 // ptest-like instruction.
1514 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1515 return PredOpcode;
1516
1517 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1518
1519 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1520 // to look through a copy and try again. This is because some instructions
1521 // take a predicate whose register class is a subset of its result class.
1522 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1523 PTestLikeMask->getOperand(1).getReg().isVirtual())
1524 PTestLikeMask =
1525 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1526
1527 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1528 // the element size matches and either the PTEST_LIKE instruction uses
1529 // the same all active mask or the condition is "any".
1530 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1531 getElementSizeForOpcode(MaskOpcode) ==
1532 getElementSizeForOpcode(PredOpcode)) {
1533 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1534 return PredOpcode;
1535 }
1536
1537 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1538 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1539 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1540 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1541 // performed by the compare could consider fewer lanes for these element
1542 // sizes.
1543 //
1544 // For example, consider
1545 //
1546 // ptrue p0.b ; P0=1111-1111-1111-1111
1547 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1548 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1549 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1550 // ; ^ last active
1551 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1552 // ; ^ last active
1553 //
1554 // where the compare generates a canonical all active 32-bit predicate
1555 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1556 // active flag, whereas the PTEST instruction with the same mask doesn't.
1557 // For PTEST_ANY this doesn't apply as the flags in this case would be
1558 // identical regardless of element size.
1559 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1560 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1561 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1562 return PredOpcode;
1563
1564 return {};
1565 }
1566
1567 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1568 // opcode so the PTEST becomes redundant.
1569 switch (PredOpcode) {
1570 case AArch64::AND_PPzPP:
1571 case AArch64::BIC_PPzPP:
1572 case AArch64::EOR_PPzPP:
1573 case AArch64::NAND_PPzPP:
1574 case AArch64::NOR_PPzPP:
1575 case AArch64::ORN_PPzPP:
1576 case AArch64::ORR_PPzPP:
1577 case AArch64::BRKA_PPzP:
1578 case AArch64::BRKPA_PPzPP:
1579 case AArch64::BRKB_PPzP:
1580 case AArch64::BRKPB_PPzPP:
1581 case AArch64::RDFFR_PPz: {
1582 // Check to see if our mask is the same. If not the resulting flag bits
1583 // may be different and we can't remove the ptest.
1584 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1585 if (Mask != PredMask)
1586 return {};
1587 break;
1588 }
1589 case AArch64::BRKN_PPzP: {
1590 // BRKN uses an all active implicit mask to set flags unlike the other
1591 // flag-setting instructions.
1592 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1593 if ((MaskOpcode != AArch64::PTRUE_B) ||
1594 (Mask->getOperand(1).getImm() != 31))
1595 return {};
1596 break;
1597 }
1598 case AArch64::PTRUE_B:
1599 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1600 break;
1601 default:
1602 // Bail out if we don't recognize the input
1603 return {};
1604 }
1605
1606 return convertToFlagSettingOpc(PredOpcode);
1607}
1608
1609/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1610/// operation which could set the flags in an identical manner
1611bool AArch64InstrInfo::optimizePTestInstr(
1612 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1613 const MachineRegisterInfo *MRI) const {
1614 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1615 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1616 unsigned PredOpcode = Pred->getOpcode();
1617 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1618 if (!NewOp)
1619 return false;
1620
1621 const TargetRegisterInfo *TRI = &getRegisterInfo();
1622
1623 // If another instruction between Pred and PTest accesses flags, don't remove
1624 // the ptest or update the earlier instruction to modify them.
1625 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1626 return false;
1627
1628 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1629 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1630 // operand to be replaced with an equivalent instruction that also sets the
1631 // flags.
1632 PTest->eraseFromParent();
1633 if (*NewOp != PredOpcode) {
1634 Pred->setDesc(get(*NewOp));
1635 bool succeeded = UpdateOperandRegClass(*Pred);
1636 (void)succeeded;
1637 assert(succeeded && "Operands have incompatible register classes!");
1638 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1639 }
1640
1641 // Ensure that the flags def is live.
1642 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1643 unsigned i = 0, e = Pred->getNumOperands();
1644 for (; i != e; ++i) {
1645 MachineOperand &MO = Pred->getOperand(i);
1646 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1647 MO.setIsDead(false);
1648 break;
1649 }
1650 }
1651 }
1652 return true;
1653}
1654
1655/// Try to optimize a compare instruction. A compare instruction is an
1656/// instruction which produces AArch64::NZCV. It can be truly compare
1657/// instruction
1658/// when there are no uses of its destination register.
1659///
1660/// The following steps are tried in order:
1661/// 1. Convert CmpInstr into an unconditional version.
1662/// 2. Remove CmpInstr if above there is an instruction producing a needed
1663/// condition code or an instruction which can be converted into such an
1664/// instruction.
1665/// Only comparison with zero is supported.
1667 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1668 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1669 assert(CmpInstr.getParent());
1670 assert(MRI);
1671
1672 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1673 int DeadNZCVIdx =
1674 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1675 if (DeadNZCVIdx != -1) {
1676 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1677 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1678 CmpInstr.eraseFromParent();
1679 return true;
1680 }
1681 unsigned Opc = CmpInstr.getOpcode();
1682 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1683 if (NewOpc == Opc)
1684 return false;
1685 const MCInstrDesc &MCID = get(NewOpc);
1686 CmpInstr.setDesc(MCID);
1687 CmpInstr.removeOperand(DeadNZCVIdx);
1688 bool succeeded = UpdateOperandRegClass(CmpInstr);
1689 (void)succeeded;
1690 assert(succeeded && "Some operands reg class are incompatible!");
1691 return true;
1692 }
1693
1694 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1695 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1696 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1697 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1698
1699 if (SrcReg2 != 0)
1700 return false;
1701
1702 // CmpInstr is a Compare instruction if destination register is not used.
1703 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1704 return false;
1705
1706 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1707 return true;
1708 return (CmpValue == 0 || CmpValue == 1) &&
1709 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1710}
1711
1712/// Get opcode of S version of Instr.
1713/// If Instr is S version its opcode is returned.
1714/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1715/// or we are not interested in it.
1716static unsigned sForm(MachineInstr &Instr) {
1717 switch (Instr.getOpcode()) {
1718 default:
1719 return AArch64::INSTRUCTION_LIST_END;
1720
1721 case AArch64::ADDSWrr:
1722 case AArch64::ADDSWri:
1723 case AArch64::ADDSXrr:
1724 case AArch64::ADDSXri:
1725 case AArch64::SUBSWrr:
1726 case AArch64::SUBSWri:
1727 case AArch64::SUBSXrr:
1728 case AArch64::SUBSXri:
1729 return Instr.getOpcode();
1730
1731 case AArch64::ADDWrr:
1732 return AArch64::ADDSWrr;
1733 case AArch64::ADDWri:
1734 return AArch64::ADDSWri;
1735 case AArch64::ADDXrr:
1736 return AArch64::ADDSXrr;
1737 case AArch64::ADDXri:
1738 return AArch64::ADDSXri;
1739 case AArch64::ADCWr:
1740 return AArch64::ADCSWr;
1741 case AArch64::ADCXr:
1742 return AArch64::ADCSXr;
1743 case AArch64::SUBWrr:
1744 return AArch64::SUBSWrr;
1745 case AArch64::SUBWri:
1746 return AArch64::SUBSWri;
1747 case AArch64::SUBXrr:
1748 return AArch64::SUBSXrr;
1749 case AArch64::SUBXri:
1750 return AArch64::SUBSXri;
1751 case AArch64::SBCWr:
1752 return AArch64::SBCSWr;
1753 case AArch64::SBCXr:
1754 return AArch64::SBCSXr;
1755 case AArch64::ANDWri:
1756 return AArch64::ANDSWri;
1757 case AArch64::ANDXri:
1758 return AArch64::ANDSXri;
1759 }
1760}
1761
1762/// Check if AArch64::NZCV should be alive in successors of MBB.
1764 for (auto *BB : MBB->successors())
1765 if (BB->isLiveIn(AArch64::NZCV))
1766 return true;
1767 return false;
1768}
1769
1770/// \returns The condition code operand index for \p Instr if it is a branch
1771/// or select and -1 otherwise.
1772static int
1774 switch (Instr.getOpcode()) {
1775 default:
1776 return -1;
1777
1778 case AArch64::Bcc: {
1779 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1780 assert(Idx >= 2);
1781 return Idx - 2;
1782 }
1783
1784 case AArch64::CSINVWr:
1785 case AArch64::CSINVXr:
1786 case AArch64::CSINCWr:
1787 case AArch64::CSINCXr:
1788 case AArch64::CSELWr:
1789 case AArch64::CSELXr:
1790 case AArch64::CSNEGWr:
1791 case AArch64::CSNEGXr:
1792 case AArch64::FCSELSrrr:
1793 case AArch64::FCSELDrrr: {
1794 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1795 assert(Idx >= 1);
1796 return Idx - 1;
1797 }
1798 }
1799}
1800
1801/// Find a condition code used by the instruction.
1802/// Returns AArch64CC::Invalid if either the instruction does not use condition
1803/// codes or we don't optimize CmpInstr in the presence of such instructions.
1806 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1807 Instr.getOperand(CCIdx).getImm())
1809}
1810
1813 UsedNZCV UsedFlags;
1814 switch (CC) {
1815 default:
1816 break;
1817
1818 case AArch64CC::EQ: // Z set
1819 case AArch64CC::NE: // Z clear
1820 UsedFlags.Z = true;
1821 break;
1822
1823 case AArch64CC::HI: // Z clear and C set
1824 case AArch64CC::LS: // Z set or C clear
1825 UsedFlags.Z = true;
1826 [[fallthrough]];
1827 case AArch64CC::HS: // C set
1828 case AArch64CC::LO: // C clear
1829 UsedFlags.C = true;
1830 break;
1831
1832 case AArch64CC::MI: // N set
1833 case AArch64CC::PL: // N clear
1834 UsedFlags.N = true;
1835 break;
1836
1837 case AArch64CC::VS: // V set
1838 case AArch64CC::VC: // V clear
1839 UsedFlags.V = true;
1840 break;
1841
1842 case AArch64CC::GT: // Z clear, N and V the same
1843 case AArch64CC::LE: // Z set, N and V differ
1844 UsedFlags.Z = true;
1845 [[fallthrough]];
1846 case AArch64CC::GE: // N and V the same
1847 case AArch64CC::LT: // N and V differ
1848 UsedFlags.N = true;
1849 UsedFlags.V = true;
1850 break;
1851 }
1852 return UsedFlags;
1853}
1854
1855/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1856/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1857/// \returns std::nullopt otherwise.
1858///
1859/// Collect instructions using that flags in \p CCUseInstrs if provided.
1860std::optional<UsedNZCV>
1862 const TargetRegisterInfo &TRI,
1863 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1864 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1865 if (MI.getParent() != CmpParent)
1866 return std::nullopt;
1867
1868 if (areCFlagsAliveInSuccessors(CmpParent))
1869 return std::nullopt;
1870
1871 UsedNZCV NZCVUsedAfterCmp;
1873 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1874 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1876 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1877 return std::nullopt;
1878 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1879 if (CCUseInstrs)
1880 CCUseInstrs->push_back(&Instr);
1881 }
1882 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1883 break;
1884 }
1885 return NZCVUsedAfterCmp;
1886}
1887
1888static bool isADDSRegImm(unsigned Opcode) {
1889 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1890}
1891
1892static bool isSUBSRegImm(unsigned Opcode) {
1893 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1894}
1895
1896/// Check if CmpInstr can be substituted by MI.
1897///
1898/// CmpInstr can be substituted:
1899/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1900/// - and, MI and CmpInstr are from the same MachineBB
1901/// - and, condition flags are not alive in successors of the CmpInstr parent
1902/// - and, if MI opcode is the S form there must be no defs of flags between
1903/// MI and CmpInstr
1904/// or if MI opcode is not the S form there must be neither defs of flags
1905/// nor uses of flags between MI and CmpInstr.
1906/// - and, if C/V flags are not used after CmpInstr
1907/// or if N flag is used but MI produces poison value if signed overflow
1908/// occurs.
1910 const TargetRegisterInfo &TRI) {
1911 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1912 // that may or may not set flags.
1913 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1914
1915 const unsigned CmpOpcode = CmpInstr.getOpcode();
1916 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1917 return false;
1918
1919 assert((CmpInstr.getOperand(2).isImm() &&
1920 CmpInstr.getOperand(2).getImm() == 0) &&
1921 "Caller guarantees that CmpInstr compares with constant 0");
1922
1923 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1924 if (!NZVCUsed || NZVCUsed->C)
1925 return false;
1926
1927 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1928 // '%vreg = add ...' or '%vreg = sub ...'.
1929 // Condition flag V is used to indicate signed overflow.
1930 // 1) MI and CmpInstr set N and V to the same value.
1931 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1932 // signed overflow occurs, so CmpInstr could still be simplified away.
1933 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1934 return false;
1935
1936 AccessKind AccessToCheck = AK_Write;
1937 if (sForm(MI) != MI.getOpcode())
1938 AccessToCheck = AK_All;
1939 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1940}
1941
1942/// Substitute an instruction comparing to zero with another instruction
1943/// which produces needed condition flags.
1944///
1945/// Return true on success.
1946bool AArch64InstrInfo::substituteCmpToZero(
1947 MachineInstr &CmpInstr, unsigned SrcReg,
1948 const MachineRegisterInfo &MRI) const {
1949 // Get the unique definition of SrcReg.
1950 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1951 if (!MI)
1952 return false;
1953
1954 const TargetRegisterInfo &TRI = getRegisterInfo();
1955
1956 unsigned NewOpc = sForm(*MI);
1957 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1958 return false;
1959
1960 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1961 return false;
1962
1963 // Update the instruction to set NZCV.
1964 MI->setDesc(get(NewOpc));
1965 CmpInstr.eraseFromParent();
1967 (void)succeeded;
1968 assert(succeeded && "Some operands reg class are incompatible!");
1969 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1970 return true;
1971}
1972
1973/// \returns True if \p CmpInstr can be removed.
1974///
1975/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1976/// codes used in \p CCUseInstrs must be inverted.
1978 int CmpValue, const TargetRegisterInfo &TRI,
1980 bool &IsInvertCC) {
1981 assert((CmpValue == 0 || CmpValue == 1) &&
1982 "Only comparisons to 0 or 1 considered for removal!");
1983
1984 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1985 unsigned MIOpc = MI.getOpcode();
1986 if (MIOpc == AArch64::CSINCWr) {
1987 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1988 MI.getOperand(2).getReg() != AArch64::WZR)
1989 return false;
1990 } else if (MIOpc == AArch64::CSINCXr) {
1991 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1992 MI.getOperand(2).getReg() != AArch64::XZR)
1993 return false;
1994 } else {
1995 return false;
1996 }
1998 if (MICC == AArch64CC::Invalid)
1999 return false;
2000
2001 // NZCV needs to be defined
2002 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2003 return false;
2004
2005 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2006 const unsigned CmpOpcode = CmpInstr.getOpcode();
2007 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2008 if (CmpValue && !IsSubsRegImm)
2009 return false;
2010 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2011 return false;
2012
2013 // MI conditions allowed: eq, ne, mi, pl
2014 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2015 if (MIUsedNZCV.C || MIUsedNZCV.V)
2016 return false;
2017
2018 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2019 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2020 // Condition flags are not used in CmpInstr basic block successors and only
2021 // Z or N flags allowed to be used after CmpInstr within its basic block
2022 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2023 return false;
2024 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2025 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2026 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2027 return false;
2028 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2029 if (MIUsedNZCV.N && !CmpValue)
2030 return false;
2031
2032 // There must be no defs of flags between MI and CmpInstr
2033 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2034 return false;
2035
2036 // Condition code is inverted in the following cases:
2037 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2038 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2039 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2040 (!CmpValue && MICC == AArch64CC::NE);
2041 return true;
2042}
2043
2044/// Remove comparison in csinc-cmp sequence
2045///
2046/// Examples:
2047/// 1. \code
2048/// csinc w9, wzr, wzr, ne
2049/// cmp w9, #0
2050/// b.eq
2051/// \endcode
2052/// to
2053/// \code
2054/// csinc w9, wzr, wzr, ne
2055/// b.ne
2056/// \endcode
2057///
2058/// 2. \code
2059/// csinc x2, xzr, xzr, mi
2060/// cmp x2, #1
2061/// b.pl
2062/// \endcode
2063/// to
2064/// \code
2065/// csinc x2, xzr, xzr, mi
2066/// b.pl
2067/// \endcode
2068///
2069/// \param CmpInstr comparison instruction
2070/// \return True when comparison removed
2071bool AArch64InstrInfo::removeCmpToZeroOrOne(
2072 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2073 const MachineRegisterInfo &MRI) const {
2074 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2075 if (!MI)
2076 return false;
2077 const TargetRegisterInfo &TRI = getRegisterInfo();
2078 SmallVector<MachineInstr *, 4> CCUseInstrs;
2079 bool IsInvertCC = false;
2080 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2081 IsInvertCC))
2082 return false;
2083 // Make transformation
2084 CmpInstr.eraseFromParent();
2085 if (IsInvertCC) {
2086 // Invert condition codes in CmpInstr CC users
2087 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2088 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2089 assert(Idx >= 0 && "Unexpected instruction using CC.");
2090 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2092 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2093 CCOperand.setImm(CCUse);
2094 }
2095 }
2096 return true;
2097}
2098
2099bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2100 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2101 MI.getOpcode() != AArch64::CATCHRET)
2102 return false;
2103
2104 MachineBasicBlock &MBB = *MI.getParent();
2105 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2106 auto TRI = Subtarget.getRegisterInfo();
2107 DebugLoc DL = MI.getDebugLoc();
2108
2109 if (MI.getOpcode() == AArch64::CATCHRET) {
2110 // Skip to the first instruction before the epilog.
2111 const TargetInstrInfo *TII =
2113 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2115 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2116 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2117 FirstEpilogSEH != MBB.begin())
2118 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2119 if (FirstEpilogSEH != MBB.begin())
2120 FirstEpilogSEH = std::next(FirstEpilogSEH);
2121 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2122 .addReg(AArch64::X0, RegState::Define)
2123 .addMBB(TargetMBB);
2124 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2125 .addReg(AArch64::X0, RegState::Define)
2126 .addReg(AArch64::X0)
2127 .addMBB(TargetMBB)
2128 .addImm(0);
2129 TargetMBB->setMachineBlockAddressTaken();
2130 return true;
2131 }
2132
2133 Register Reg = MI.getOperand(0).getReg();
2135 if (M.getStackProtectorGuard() == "sysreg") {
2136 const AArch64SysReg::SysReg *SrcReg =
2137 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2138 if (!SrcReg)
2139 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2140
2141 // mrs xN, sysreg
2142 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2144 .addImm(SrcReg->Encoding);
2145 int Offset = M.getStackProtectorGuardOffset();
2146 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2147 // ldr xN, [xN, #offset]
2148 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2149 .addDef(Reg)
2151 .addImm(Offset / 8);
2152 } else if (Offset >= -256 && Offset <= 255) {
2153 // ldur xN, [xN, #offset]
2154 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2155 .addDef(Reg)
2157 .addImm(Offset);
2158 } else if (Offset >= -4095 && Offset <= 4095) {
2159 if (Offset > 0) {
2160 // add xN, xN, #offset
2161 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2162 .addDef(Reg)
2164 .addImm(Offset)
2165 .addImm(0);
2166 } else {
2167 // sub xN, xN, #offset
2168 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2169 .addDef(Reg)
2171 .addImm(-Offset)
2172 .addImm(0);
2173 }
2174 // ldr xN, [xN]
2175 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2176 .addDef(Reg)
2178 .addImm(0);
2179 } else {
2180 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2181 // than 23760.
2182 // It might be nice to use AArch64::MOVi32imm here, which would get
2183 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2184 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2185 // AArch64FrameLowering might help us find such a scratch register
2186 // though. If we failed to find a scratch register, we could emit a
2187 // stream of add instructions to build up the immediate. Or, we could try
2188 // to insert a AArch64::MOVi32imm before register allocation so that we
2189 // didn't need to scavenge for a scratch register.
2190 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2191 }
2192 MBB.erase(MI);
2193 return true;
2194 }
2195
2196 const GlobalValue *GV =
2197 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2198 const TargetMachine &TM = MBB.getParent()->getTarget();
2199 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2200 const unsigned char MO_NC = AArch64II::MO_NC;
2201
2202 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2203 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2204 .addGlobalAddress(GV, 0, OpFlags);
2205 if (Subtarget.isTargetILP32()) {
2206 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2207 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2208 .addDef(Reg32, RegState::Dead)
2210 .addImm(0)
2211 .addMemOperand(*MI.memoperands_begin())
2213 } else {
2214 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2216 .addImm(0)
2217 .addMemOperand(*MI.memoperands_begin());
2218 }
2219 } else if (TM.getCodeModel() == CodeModel::Large) {
2220 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2221 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2222 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2223 .addImm(0);
2224 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2226 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2227 .addImm(16);
2228 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2230 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2231 .addImm(32);
2232 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2235 .addImm(48);
2236 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2238 .addImm(0)
2239 .addMemOperand(*MI.memoperands_begin());
2240 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2241 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2242 .addGlobalAddress(GV, 0, OpFlags);
2243 } else {
2244 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2245 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2246 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2247 if (Subtarget.isTargetILP32()) {
2248 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2249 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2250 .addDef(Reg32, RegState::Dead)
2252 .addGlobalAddress(GV, 0, LoFlags)
2253 .addMemOperand(*MI.memoperands_begin())
2255 } else {
2256 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2258 .addGlobalAddress(GV, 0, LoFlags)
2259 .addMemOperand(*MI.memoperands_begin());
2260 }
2261 }
2262
2263 MBB.erase(MI);
2264
2265 return true;
2266}
2267
2268// Return true if this instruction simply sets its single destination register
2269// to zero. This is equivalent to a register rename of the zero-register.
2271 switch (MI.getOpcode()) {
2272 default:
2273 break;
2274 case AArch64::MOVZWi:
2275 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2276 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2277 assert(MI.getDesc().getNumOperands() == 3 &&
2278 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2279 return true;
2280 }
2281 break;
2282 case AArch64::ANDWri: // and Rd, Rzr, #imm
2283 return MI.getOperand(1).getReg() == AArch64::WZR;
2284 case AArch64::ANDXri:
2285 return MI.getOperand(1).getReg() == AArch64::XZR;
2286 case TargetOpcode::COPY:
2287 return MI.getOperand(1).getReg() == AArch64::WZR;
2288 }
2289 return false;
2290}
2291
2292// Return true if this instruction simply renames a general register without
2293// modifying bits.
2295 switch (MI.getOpcode()) {
2296 default:
2297 break;
2298 case TargetOpcode::COPY: {
2299 // GPR32 copies will by lowered to ORRXrs
2300 Register DstReg = MI.getOperand(0).getReg();
2301 return (AArch64::GPR32RegClass.contains(DstReg) ||
2302 AArch64::GPR64RegClass.contains(DstReg));
2303 }
2304 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2305 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2306 assert(MI.getDesc().getNumOperands() == 4 &&
2307 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2308 return true;
2309 }
2310 break;
2311 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2312 if (MI.getOperand(2).getImm() == 0) {
2313 assert(MI.getDesc().getNumOperands() == 4 &&
2314 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2315 return true;
2316 }
2317 break;
2318 }
2319 return false;
2320}
2321
2322// Return true if this instruction simply renames a general register without
2323// modifying bits.
2325 switch (MI.getOpcode()) {
2326 default:
2327 break;
2328 case TargetOpcode::COPY: {
2329 Register DstReg = MI.getOperand(0).getReg();
2330 return AArch64::FPR128RegClass.contains(DstReg);
2331 }
2332 case AArch64::ORRv16i8:
2333 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2334 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2335 "invalid ORRv16i8 operands");
2336 return true;
2337 }
2338 break;
2339 }
2340 return false;
2341}
2342
2344 int &FrameIndex) const {
2345 switch (MI.getOpcode()) {
2346 default:
2347 break;
2348 case AArch64::LDRWui:
2349 case AArch64::LDRXui:
2350 case AArch64::LDRBui:
2351 case AArch64::LDRHui:
2352 case AArch64::LDRSui:
2353 case AArch64::LDRDui:
2354 case AArch64::LDRQui:
2355 case AArch64::LDR_PXI:
2356 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2357 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2358 FrameIndex = MI.getOperand(1).getIndex();
2359 return MI.getOperand(0).getReg();
2360 }
2361 break;
2362 }
2363
2364 return 0;
2365}
2366
2368 int &FrameIndex) const {
2369 switch (MI.getOpcode()) {
2370 default:
2371 break;
2372 case AArch64::STRWui:
2373 case AArch64::STRXui:
2374 case AArch64::STRBui:
2375 case AArch64::STRHui:
2376 case AArch64::STRSui:
2377 case AArch64::STRDui:
2378 case AArch64::STRQui:
2379 case AArch64::STR_PXI:
2380 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2381 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2382 FrameIndex = MI.getOperand(1).getIndex();
2383 return MI.getOperand(0).getReg();
2384 }
2385 break;
2386 }
2387 return 0;
2388}
2389
2390/// Check all MachineMemOperands for a hint to suppress pairing.
2392 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2393 return MMO->getFlags() & MOSuppressPair;
2394 });
2395}
2396
2397/// Set a flag on the first MachineMemOperand to suppress pairing.
2399 if (MI.memoperands_empty())
2400 return;
2401 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2402}
2403
2404/// Check all MachineMemOperands for a hint that the load/store is strided.
2406 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2407 return MMO->getFlags() & MOStridedAccess;
2408 });
2409}
2410
2412 switch (Opc) {
2413 default:
2414 return false;
2415 case AArch64::STURSi:
2416 case AArch64::STRSpre:
2417 case AArch64::STURDi:
2418 case AArch64::STRDpre:
2419 case AArch64::STURQi:
2420 case AArch64::STRQpre:
2421 case AArch64::STURBBi:
2422 case AArch64::STURHHi:
2423 case AArch64::STURWi:
2424 case AArch64::STRWpre:
2425 case AArch64::STURXi:
2426 case AArch64::STRXpre:
2427 case AArch64::LDURSi:
2428 case AArch64::LDRSpre:
2429 case AArch64::LDURDi:
2430 case AArch64::LDRDpre:
2431 case AArch64::LDURQi:
2432 case AArch64::LDRQpre:
2433 case AArch64::LDURWi:
2434 case AArch64::LDRWpre:
2435 case AArch64::LDURXi:
2436 case AArch64::LDRXpre:
2437 case AArch64::LDRSWpre:
2438 case AArch64::LDURSWi:
2439 case AArch64::LDURHHi:
2440 case AArch64::LDURBBi:
2441 case AArch64::LDURSBWi:
2442 case AArch64::LDURSHWi:
2443 return true;
2444 }
2445}
2446
2447std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2448 switch (Opc) {
2449 default: return {};
2450 case AArch64::PRFMui: return AArch64::PRFUMi;
2451 case AArch64::LDRXui: return AArch64::LDURXi;
2452 case AArch64::LDRWui: return AArch64::LDURWi;
2453 case AArch64::LDRBui: return AArch64::LDURBi;
2454 case AArch64::LDRHui: return AArch64::LDURHi;
2455 case AArch64::LDRSui: return AArch64::LDURSi;
2456 case AArch64::LDRDui: return AArch64::LDURDi;
2457 case AArch64::LDRQui: return AArch64::LDURQi;
2458 case AArch64::LDRBBui: return AArch64::LDURBBi;
2459 case AArch64::LDRHHui: return AArch64::LDURHHi;
2460 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2461 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2462 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2463 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2464 case AArch64::LDRSWui: return AArch64::LDURSWi;
2465 case AArch64::STRXui: return AArch64::STURXi;
2466 case AArch64::STRWui: return AArch64::STURWi;
2467 case AArch64::STRBui: return AArch64::STURBi;
2468 case AArch64::STRHui: return AArch64::STURHi;
2469 case AArch64::STRSui: return AArch64::STURSi;
2470 case AArch64::STRDui: return AArch64::STURDi;
2471 case AArch64::STRQui: return AArch64::STURQi;
2472 case AArch64::STRBBui: return AArch64::STURBBi;
2473 case AArch64::STRHHui: return AArch64::STURHHi;
2474 }
2475}
2476
2478 switch (Opc) {
2479 default:
2480 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2481 case AArch64::ADDG:
2482 case AArch64::LDAPURBi:
2483 case AArch64::LDAPURHi:
2484 case AArch64::LDAPURi:
2485 case AArch64::LDAPURSBWi:
2486 case AArch64::LDAPURSBXi:
2487 case AArch64::LDAPURSHWi:
2488 case AArch64::LDAPURSHXi:
2489 case AArch64::LDAPURSWi:
2490 case AArch64::LDAPURXi:
2491 case AArch64::LDR_PPXI:
2492 case AArch64::LDR_PXI:
2493 case AArch64::LDR_ZXI:
2494 case AArch64::LDR_ZZXI:
2495 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2496 case AArch64::LDR_ZZZXI:
2497 case AArch64::LDR_ZZZZXI:
2498 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2499 case AArch64::LDRBBui:
2500 case AArch64::LDRBui:
2501 case AArch64::LDRDui:
2502 case AArch64::LDRHHui:
2503 case AArch64::LDRHui:
2504 case AArch64::LDRQui:
2505 case AArch64::LDRSBWui:
2506 case AArch64::LDRSBXui:
2507 case AArch64::LDRSHWui:
2508 case AArch64::LDRSHXui:
2509 case AArch64::LDRSui:
2510 case AArch64::LDRSWui:
2511 case AArch64::LDRWui:
2512 case AArch64::LDRXui:
2513 case AArch64::LDURBBi:
2514 case AArch64::LDURBi:
2515 case AArch64::LDURDi:
2516 case AArch64::LDURHHi:
2517 case AArch64::LDURHi:
2518 case AArch64::LDURQi:
2519 case AArch64::LDURSBWi:
2520 case AArch64::LDURSBXi:
2521 case AArch64::LDURSHWi:
2522 case AArch64::LDURSHXi:
2523 case AArch64::LDURSi:
2524 case AArch64::LDURSWi:
2525 case AArch64::LDURWi:
2526 case AArch64::LDURXi:
2527 case AArch64::PRFMui:
2528 case AArch64::PRFUMi:
2529 case AArch64::ST2Gi:
2530 case AArch64::STGi:
2531 case AArch64::STLURBi:
2532 case AArch64::STLURHi:
2533 case AArch64::STLURWi:
2534 case AArch64::STLURXi:
2535 case AArch64::StoreSwiftAsyncContext:
2536 case AArch64::STR_PPXI:
2537 case AArch64::STR_PXI:
2538 case AArch64::STR_ZXI:
2539 case AArch64::STR_ZZXI:
2540 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2541 case AArch64::STR_ZZZXI:
2542 case AArch64::STR_ZZZZXI:
2543 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2544 case AArch64::STRBBui:
2545 case AArch64::STRBui:
2546 case AArch64::STRDui:
2547 case AArch64::STRHHui:
2548 case AArch64::STRHui:
2549 case AArch64::STRQui:
2550 case AArch64::STRSui:
2551 case AArch64::STRWui:
2552 case AArch64::STRXui:
2553 case AArch64::STURBBi:
2554 case AArch64::STURBi:
2555 case AArch64::STURDi:
2556 case AArch64::STURHHi:
2557 case AArch64::STURHi:
2558 case AArch64::STURQi:
2559 case AArch64::STURSi:
2560 case AArch64::STURWi:
2561 case AArch64::STURXi:
2562 case AArch64::STZ2Gi:
2563 case AArch64::STZGi:
2564 case AArch64::TAGPstack:
2565 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2566 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
2567 return 2;
2568 case AArch64::LD1B_D_IMM:
2569 case AArch64::LD1B_H_IMM:
2570 case AArch64::LD1B_IMM:
2571 case AArch64::LD1B_S_IMM:
2572 case AArch64::LD1D_IMM:
2573 case AArch64::LD1H_D_IMM:
2574 case AArch64::LD1H_IMM:
2575 case AArch64::LD1H_S_IMM:
2576 case AArch64::LD1RB_D_IMM:
2577 case AArch64::LD1RB_H_IMM:
2578 case AArch64::LD1RB_IMM:
2579 case AArch64::LD1RB_S_IMM:
2580 case AArch64::LD1RD_IMM:
2581 case AArch64::LD1RH_D_IMM:
2582 case AArch64::LD1RH_IMM:
2583 case AArch64::LD1RH_S_IMM:
2584 case AArch64::LD1RSB_D_IMM:
2585 case AArch64::LD1RSB_H_IMM:
2586 case AArch64::LD1RSB_S_IMM:
2587 case AArch64::LD1RSH_D_IMM:
2588 case AArch64::LD1RSH_S_IMM:
2589 case AArch64::LD1RSW_IMM:
2590 case AArch64::LD1RW_D_IMM:
2591 case AArch64::LD1RW_IMM:
2592 case AArch64::LD1SB_D_IMM:
2593 case AArch64::LD1SB_H_IMM:
2594 case AArch64::LD1SB_S_IMM:
2595 case AArch64::LD1SH_D_IMM:
2596 case AArch64::LD1SH_S_IMM:
2597 case AArch64::LD1SW_D_IMM:
2598 case AArch64::LD1W_D_IMM:
2599 case AArch64::LD1W_IMM:
2600 case AArch64::LD2B_IMM:
2601 case AArch64::LD2D_IMM:
2602 case AArch64::LD2H_IMM:
2603 case AArch64::LD2W_IMM:
2604 case AArch64::LD3B_IMM:
2605 case AArch64::LD3D_IMM:
2606 case AArch64::LD3H_IMM:
2607 case AArch64::LD3W_IMM:
2608 case AArch64::LD4B_IMM:
2609 case AArch64::LD4D_IMM:
2610 case AArch64::LD4H_IMM:
2611 case AArch64::LD4W_IMM:
2612 case AArch64::LDG:
2613 case AArch64::LDNF1B_D_IMM:
2614 case AArch64::LDNF1B_H_IMM:
2615 case AArch64::LDNF1B_IMM:
2616 case AArch64::LDNF1B_S_IMM:
2617 case AArch64::LDNF1D_IMM:
2618 case AArch64::LDNF1H_D_IMM:
2619 case AArch64::LDNF1H_IMM:
2620 case AArch64::LDNF1H_S_IMM:
2621 case AArch64::LDNF1SB_D_IMM:
2622 case AArch64::LDNF1SB_H_IMM:
2623 case AArch64::LDNF1SB_S_IMM:
2624 case AArch64::LDNF1SH_D_IMM:
2625 case AArch64::LDNF1SH_S_IMM:
2626 case AArch64::LDNF1SW_D_IMM:
2627 case AArch64::LDNF1W_D_IMM:
2628 case AArch64::LDNF1W_IMM:
2629 case AArch64::LDNPDi:
2630 case AArch64::LDNPQi:
2631 case AArch64::LDNPSi:
2632 case AArch64::LDNPWi:
2633 case AArch64::LDNPXi:
2634 case AArch64::LDNT1B_ZRI:
2635 case AArch64::LDNT1D_ZRI:
2636 case AArch64::LDNT1H_ZRI:
2637 case AArch64::LDNT1W_ZRI:
2638 case AArch64::LDPDi:
2639 case AArch64::LDPQi:
2640 case AArch64::LDPSi:
2641 case AArch64::LDPWi:
2642 case AArch64::LDPXi:
2643 case AArch64::LDRBBpost:
2644 case AArch64::LDRBBpre:
2645 case AArch64::LDRBpost:
2646 case AArch64::LDRBpre:
2647 case AArch64::LDRDpost:
2648 case AArch64::LDRDpre:
2649 case AArch64::LDRHHpost:
2650 case AArch64::LDRHHpre:
2651 case AArch64::LDRHpost:
2652 case AArch64::LDRHpre:
2653 case AArch64::LDRQpost:
2654 case AArch64::LDRQpre:
2655 case AArch64::LDRSpost:
2656 case AArch64::LDRSpre:
2657 case AArch64::LDRWpost:
2658 case AArch64::LDRWpre:
2659 case AArch64::LDRXpost:
2660 case AArch64::LDRXpre:
2661 case AArch64::ST1B_D_IMM:
2662 case AArch64::ST1B_H_IMM:
2663 case AArch64::ST1B_IMM:
2664 case AArch64::ST1B_S_IMM:
2665 case AArch64::ST1D_IMM:
2666 case AArch64::ST1H_D_IMM:
2667 case AArch64::ST1H_IMM:
2668 case AArch64::ST1H_S_IMM:
2669 case AArch64::ST1W_D_IMM:
2670 case AArch64::ST1W_IMM:
2671 case AArch64::ST2B_IMM:
2672 case AArch64::ST2D_IMM:
2673 case AArch64::ST2H_IMM:
2674 case AArch64::ST2W_IMM:
2675 case AArch64::ST3B_IMM:
2676 case AArch64::ST3D_IMM:
2677 case AArch64::ST3H_IMM:
2678 case AArch64::ST3W_IMM:
2679 case AArch64::ST4B_IMM:
2680 case AArch64::ST4D_IMM:
2681 case AArch64::ST4H_IMM:
2682 case AArch64::ST4W_IMM:
2683 case AArch64::STGPi:
2684 case AArch64::STGPreIndex:
2685 case AArch64::STZGPreIndex:
2686 case AArch64::ST2GPreIndex:
2687 case AArch64::STZ2GPreIndex:
2688 case AArch64::STGPostIndex:
2689 case AArch64::STZGPostIndex:
2690 case AArch64::ST2GPostIndex:
2691 case AArch64::STZ2GPostIndex:
2692 case AArch64::STNPDi:
2693 case AArch64::STNPQi:
2694 case AArch64::STNPSi:
2695 case AArch64::STNPWi:
2696 case AArch64::STNPXi:
2697 case AArch64::STNT1B_ZRI:
2698 case AArch64::STNT1D_ZRI:
2699 case AArch64::STNT1H_ZRI:
2700 case AArch64::STNT1W_ZRI:
2701 case AArch64::STPDi:
2702 case AArch64::STPQi:
2703 case AArch64::STPSi:
2704 case AArch64::STPWi:
2705 case AArch64::STPXi:
2706 case AArch64::STRBBpost:
2707 case AArch64::STRBBpre:
2708 case AArch64::STRBpost:
2709 case AArch64::STRBpre:
2710 case AArch64::STRDpost:
2711 case AArch64::STRDpre:
2712 case AArch64::STRHHpost:
2713 case AArch64::STRHHpre:
2714 case AArch64::STRHpost:
2715 case AArch64::STRHpre:
2716 case AArch64::STRQpost:
2717 case AArch64::STRQpre:
2718 case AArch64::STRSpost:
2719 case AArch64::STRSpre:
2720 case AArch64::STRWpost:
2721 case AArch64::STRWpre:
2722 case AArch64::STRXpost:
2723 case AArch64::STRXpre:
2724 return 3;
2725 case AArch64::LDPDpost:
2726 case AArch64::LDPDpre:
2727 case AArch64::LDPQpost:
2728 case AArch64::LDPQpre:
2729 case AArch64::LDPSpost:
2730 case AArch64::LDPSpre:
2731 case AArch64::LDPWpost:
2732 case AArch64::LDPWpre:
2733 case AArch64::LDPXpost:
2734 case AArch64::LDPXpre:
2735 case AArch64::STGPpre:
2736 case AArch64::STGPpost:
2737 case AArch64::STPDpost:
2738 case AArch64::STPDpre:
2739 case AArch64::STPQpost:
2740 case AArch64::STPQpre:
2741 case AArch64::STPSpost:
2742 case AArch64::STPSpre:
2743 case AArch64::STPWpost:
2744 case AArch64::STPWpre:
2745 case AArch64::STPXpost:
2746 case AArch64::STPXpre:
2747 return 4;
2748 }
2749}
2750
2752 switch (MI.getOpcode()) {
2753 default:
2754 return false;
2755 // Scaled instructions.
2756 case AArch64::STRSui:
2757 case AArch64::STRDui:
2758 case AArch64::STRQui:
2759 case AArch64::STRXui:
2760 case AArch64::STRWui:
2761 case AArch64::LDRSui:
2762 case AArch64::LDRDui:
2763 case AArch64::LDRQui:
2764 case AArch64::LDRXui:
2765 case AArch64::LDRWui:
2766 case AArch64::LDRSWui:
2767 // Unscaled instructions.
2768 case AArch64::STURSi:
2769 case AArch64::STRSpre:
2770 case AArch64::STURDi:
2771 case AArch64::STRDpre:
2772 case AArch64::STURQi:
2773 case AArch64::STRQpre:
2774 case AArch64::STURWi:
2775 case AArch64::STRWpre:
2776 case AArch64::STURXi:
2777 case AArch64::STRXpre:
2778 case AArch64::LDURSi:
2779 case AArch64::LDRSpre:
2780 case AArch64::LDURDi:
2781 case AArch64::LDRDpre:
2782 case AArch64::LDURQi:
2783 case AArch64::LDRQpre:
2784 case AArch64::LDURWi:
2785 case AArch64::LDRWpre:
2786 case AArch64::LDURXi:
2787 case AArch64::LDRXpre:
2788 case AArch64::LDURSWi:
2789 case AArch64::LDRSWpre:
2790 // SVE instructions.
2791 case AArch64::LDR_ZXI:
2792 case AArch64::STR_ZXI:
2793 return true;
2794 }
2795}
2796
2798 switch (MI.getOpcode()) {
2799 default:
2800 assert((!MI.isCall() || !MI.isReturn()) &&
2801 "Unexpected instruction - was a new tail call opcode introduced?");
2802 return false;
2803 case AArch64::TCRETURNdi:
2804 case AArch64::TCRETURNri:
2805 case AArch64::TCRETURNrix16x17:
2806 case AArch64::TCRETURNrix17:
2807 case AArch64::TCRETURNrinotx16:
2808 case AArch64::TCRETURNriALL:
2809 case AArch64::AUTH_TCRETURN:
2810 case AArch64::AUTH_TCRETURN_BTI:
2811 return true;
2812 }
2813}
2814
2816 switch (Opc) {
2817 default:
2818 llvm_unreachable("Opcode has no flag setting equivalent!");
2819 // 32-bit cases:
2820 case AArch64::ADDWri:
2821 return AArch64::ADDSWri;
2822 case AArch64::ADDWrr:
2823 return AArch64::ADDSWrr;
2824 case AArch64::ADDWrs:
2825 return AArch64::ADDSWrs;
2826 case AArch64::ADDWrx:
2827 return AArch64::ADDSWrx;
2828 case AArch64::ANDWri:
2829 return AArch64::ANDSWri;
2830 case AArch64::ANDWrr:
2831 return AArch64::ANDSWrr;
2832 case AArch64::ANDWrs:
2833 return AArch64::ANDSWrs;
2834 case AArch64::BICWrr:
2835 return AArch64::BICSWrr;
2836 case AArch64::BICWrs:
2837 return AArch64::BICSWrs;
2838 case AArch64::SUBWri:
2839 return AArch64::SUBSWri;
2840 case AArch64::SUBWrr:
2841 return AArch64::SUBSWrr;
2842 case AArch64::SUBWrs:
2843 return AArch64::SUBSWrs;
2844 case AArch64::SUBWrx:
2845 return AArch64::SUBSWrx;
2846 // 64-bit cases:
2847 case AArch64::ADDXri:
2848 return AArch64::ADDSXri;
2849 case AArch64::ADDXrr:
2850 return AArch64::ADDSXrr;
2851 case AArch64::ADDXrs:
2852 return AArch64::ADDSXrs;
2853 case AArch64::ADDXrx:
2854 return AArch64::ADDSXrx;
2855 case AArch64::ANDXri:
2856 return AArch64::ANDSXri;
2857 case AArch64::ANDXrr:
2858 return AArch64::ANDSXrr;
2859 case AArch64::ANDXrs:
2860 return AArch64::ANDSXrs;
2861 case AArch64::BICXrr:
2862 return AArch64::BICSXrr;
2863 case AArch64::BICXrs:
2864 return AArch64::BICSXrs;
2865 case AArch64::SUBXri:
2866 return AArch64::SUBSXri;
2867 case AArch64::SUBXrr:
2868 return AArch64::SUBSXrr;
2869 case AArch64::SUBXrs:
2870 return AArch64::SUBSXrs;
2871 case AArch64::SUBXrx:
2872 return AArch64::SUBSXrx;
2873 // SVE instructions:
2874 case AArch64::AND_PPzPP:
2875 return AArch64::ANDS_PPzPP;
2876 case AArch64::BIC_PPzPP:
2877 return AArch64::BICS_PPzPP;
2878 case AArch64::EOR_PPzPP:
2879 return AArch64::EORS_PPzPP;
2880 case AArch64::NAND_PPzPP:
2881 return AArch64::NANDS_PPzPP;
2882 case AArch64::NOR_PPzPP:
2883 return AArch64::NORS_PPzPP;
2884 case AArch64::ORN_PPzPP:
2885 return AArch64::ORNS_PPzPP;
2886 case AArch64::ORR_PPzPP:
2887 return AArch64::ORRS_PPzPP;
2888 case AArch64::BRKA_PPzP:
2889 return AArch64::BRKAS_PPzP;
2890 case AArch64::BRKPA_PPzPP:
2891 return AArch64::BRKPAS_PPzPP;
2892 case AArch64::BRKB_PPzP:
2893 return AArch64::BRKBS_PPzP;
2894 case AArch64::BRKPB_PPzPP:
2895 return AArch64::BRKPBS_PPzPP;
2896 case AArch64::BRKN_PPzP:
2897 return AArch64::BRKNS_PPzP;
2898 case AArch64::RDFFR_PPz:
2899 return AArch64::RDFFRS_PPz;
2900 case AArch64::PTRUE_B:
2901 return AArch64::PTRUES_B;
2902 }
2903}
2904
2905// Is this a candidate for ld/st merging or pairing? For example, we don't
2906// touch volatiles or load/stores that have a hint to avoid pair formation.
2908
2909 bool IsPreLdSt = isPreLdSt(MI);
2910
2911 // If this is a volatile load/store, don't mess with it.
2912 if (MI.hasOrderedMemoryRef())
2913 return false;
2914
2915 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2916 // For Pre-inc LD/ST, the operand is shifted by one.
2917 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2918 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2919 "Expected a reg or frame index operand.");
2920
2921 // For Pre-indexed addressing quadword instructions, the third operand is the
2922 // immediate value.
2923 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2924
2925 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2926 return false;
2927
2928 // Can't merge/pair if the instruction modifies the base register.
2929 // e.g., ldr x0, [x0]
2930 // This case will never occur with an FI base.
2931 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2932 // STR<S,D,Q,W,X>pre, it can be merged.
2933 // For example:
2934 // ldr q0, [x11, #32]!
2935 // ldr q1, [x11, #16]
2936 // to
2937 // ldp q0, q1, [x11, #32]!
2938 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2939 Register BaseReg = MI.getOperand(1).getReg();
2941 if (MI.modifiesRegister(BaseReg, TRI))
2942 return false;
2943 }
2944
2945 // Pairing SVE fills/spills is only valid for little-endian targets that
2946 // implement VLS 128.
2947 switch (MI.getOpcode()) {
2948 default:
2949 break;
2950 case AArch64::LDR_ZXI:
2951 case AArch64::STR_ZXI:
2952 if (!Subtarget.isLittleEndian() ||
2953 Subtarget.getSVEVectorSizeInBits() != 128)
2954 return false;
2955 }
2956
2957 // Check if this load/store has a hint to avoid pair formation.
2958 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2960 return false;
2961
2962 // Do not pair any callee-save store/reload instructions in the
2963 // prologue/epilogue if the CFI information encoded the operations as separate
2964 // instructions, as that will cause the size of the actual prologue to mismatch
2965 // with the prologue size recorded in the Windows CFI.
2966 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2967 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2968 MI.getMF()->getFunction().needsUnwindTableEntry();
2969 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2971 return false;
2972
2973 // On some CPUs quad load/store pairs are slower than two single load/stores.
2974 if (Subtarget.isPaired128Slow()) {
2975 switch (MI.getOpcode()) {
2976 default:
2977 break;
2978 case AArch64::LDURQi:
2979 case AArch64::STURQi:
2980 case AArch64::LDRQui:
2981 case AArch64::STRQui:
2982 return false;
2983 }
2984 }
2985
2986 return true;
2987}
2988
2991 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2992 const TargetRegisterInfo *TRI) const {
2993 if (!LdSt.mayLoadOrStore())
2994 return false;
2995
2996 const MachineOperand *BaseOp;
2997 TypeSize WidthN(0, false);
2998 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2999 WidthN, TRI))
3000 return false;
3001 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3002 // vector.
3003 Width = LocationSize::precise(WidthN);
3004 BaseOps.push_back(BaseOp);
3005 return true;
3006}
3007
3008std::optional<ExtAddrMode>
3010 const TargetRegisterInfo *TRI) const {
3011 const MachineOperand *Base; // Filled with the base operand of MI.
3012 int64_t Offset; // Filled with the offset of MI.
3013 bool OffsetIsScalable;
3014 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3015 return std::nullopt;
3016
3017 if (!Base->isReg())
3018 return std::nullopt;
3019 ExtAddrMode AM;
3020 AM.BaseReg = Base->getReg();
3021 AM.Displacement = Offset;
3022 AM.ScaledReg = 0;
3023 AM.Scale = 0;
3024 return AM;
3025}
3026
3028 Register Reg,
3029 const MachineInstr &AddrI,
3030 ExtAddrMode &AM) const {
3031 // Filter out instructions into which we cannot fold.
3032 unsigned NumBytes;
3033 int64_t OffsetScale = 1;
3034 switch (MemI.getOpcode()) {
3035 default:
3036 return false;
3037
3038 case AArch64::LDURQi:
3039 case AArch64::STURQi:
3040 NumBytes = 16;
3041 break;
3042
3043 case AArch64::LDURDi:
3044 case AArch64::STURDi:
3045 case AArch64::LDURXi:
3046 case AArch64::STURXi:
3047 NumBytes = 8;
3048 break;
3049
3050 case AArch64::LDURWi:
3051 case AArch64::LDURSWi:
3052 case AArch64::STURWi:
3053 NumBytes = 4;
3054 break;
3055
3056 case AArch64::LDURHi:
3057 case AArch64::STURHi:
3058 case AArch64::LDURHHi:
3059 case AArch64::STURHHi:
3060 case AArch64::LDURSHXi:
3061 case AArch64::LDURSHWi:
3062 NumBytes = 2;
3063 break;
3064
3065 case AArch64::LDRBroX:
3066 case AArch64::LDRBBroX:
3067 case AArch64::LDRSBXroX:
3068 case AArch64::LDRSBWroX:
3069 case AArch64::STRBroX:
3070 case AArch64::STRBBroX:
3071 case AArch64::LDURBi:
3072 case AArch64::LDURBBi:
3073 case AArch64::LDURSBXi:
3074 case AArch64::LDURSBWi:
3075 case AArch64::STURBi:
3076 case AArch64::STURBBi:
3077 case AArch64::LDRBui:
3078 case AArch64::LDRBBui:
3079 case AArch64::LDRSBXui:
3080 case AArch64::LDRSBWui:
3081 case AArch64::STRBui:
3082 case AArch64::STRBBui:
3083 NumBytes = 1;
3084 break;
3085
3086 case AArch64::LDRQroX:
3087 case AArch64::STRQroX:
3088 case AArch64::LDRQui:
3089 case AArch64::STRQui:
3090 NumBytes = 16;
3091 OffsetScale = 16;
3092 break;
3093
3094 case AArch64::LDRDroX:
3095 case AArch64::STRDroX:
3096 case AArch64::LDRXroX:
3097 case AArch64::STRXroX:
3098 case AArch64::LDRDui:
3099 case AArch64::STRDui:
3100 case AArch64::LDRXui:
3101 case AArch64::STRXui:
3102 NumBytes = 8;
3103 OffsetScale = 8;
3104 break;
3105
3106 case AArch64::LDRWroX:
3107 case AArch64::LDRSWroX:
3108 case AArch64::STRWroX:
3109 case AArch64::LDRWui:
3110 case AArch64::LDRSWui:
3111 case AArch64::STRWui:
3112 NumBytes = 4;
3113 OffsetScale = 4;
3114 break;
3115
3116 case AArch64::LDRHroX:
3117 case AArch64::STRHroX:
3118 case AArch64::LDRHHroX:
3119 case AArch64::STRHHroX:
3120 case AArch64::LDRSHXroX:
3121 case AArch64::LDRSHWroX:
3122 case AArch64::LDRHui:
3123 case AArch64::STRHui:
3124 case AArch64::LDRHHui:
3125 case AArch64::STRHHui:
3126 case AArch64::LDRSHXui:
3127 case AArch64::LDRSHWui:
3128 NumBytes = 2;
3129 OffsetScale = 2;
3130 break;
3131 }
3132
3133 // Check the fold operand is not the loaded/stored value.
3134 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3135 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3136 return false;
3137
3138 // Handle memory instructions with a [Reg, Reg] addressing mode.
3139 if (MemI.getOperand(2).isReg()) {
3140 // Bail if the addressing mode already includes extension of the offset
3141 // register.
3142 if (MemI.getOperand(3).getImm())
3143 return false;
3144
3145 // Check if we actually have a scaled offset.
3146 if (MemI.getOperand(4).getImm() == 0)
3147 OffsetScale = 1;
3148
3149 // If the address instructions is folded into the base register, then the
3150 // addressing mode must not have a scale. Then we can swap the base and the
3151 // scaled registers.
3152 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3153 return false;
3154
3155 switch (AddrI.getOpcode()) {
3156 default:
3157 return false;
3158
3159 case AArch64::SBFMXri:
3160 // sxtw Xa, Wm
3161 // ldr Xd, [Xn, Xa, lsl #N]
3162 // ->
3163 // ldr Xd, [Xn, Wm, sxtw #N]
3164 if (AddrI.getOperand(2).getImm() != 0 ||
3165 AddrI.getOperand(3).getImm() != 31)
3166 return false;
3167
3168 AM.BaseReg = MemI.getOperand(1).getReg();
3169 if (AM.BaseReg == Reg)
3170 AM.BaseReg = MemI.getOperand(2).getReg();
3171 AM.ScaledReg = AddrI.getOperand(1).getReg();
3172 AM.Scale = OffsetScale;
3173 AM.Displacement = 0;
3175 return true;
3176
3177 case TargetOpcode::SUBREG_TO_REG: {
3178 // mov Wa, Wm
3179 // ldr Xd, [Xn, Xa, lsl #N]
3180 // ->
3181 // ldr Xd, [Xn, Wm, uxtw #N]
3182
3183 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3184 if (AddrI.getOperand(1).getImm() != 0 ||
3185 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3186 return false;
3187
3188 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3189 Register OffsetReg = AddrI.getOperand(2).getReg();
3190 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3191 return false;
3192
3193 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3194 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3195 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3196 DefMI.getOperand(3).getImm() != 0)
3197 return false;
3198
3199 AM.BaseReg = MemI.getOperand(1).getReg();
3200 if (AM.BaseReg == Reg)
3201 AM.BaseReg = MemI.getOperand(2).getReg();
3202 AM.ScaledReg = DefMI.getOperand(2).getReg();
3203 AM.Scale = OffsetScale;
3204 AM.Displacement = 0;
3206 return true;
3207 }
3208 }
3209 }
3210
3211 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3212
3213 // Check we are not breaking a potential conversion to an LDP.
3214 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3215 int64_t NewOffset) -> bool {
3216 int64_t MinOffset, MaxOffset;
3217 switch (NumBytes) {
3218 default:
3219 return true;
3220 case 4:
3221 MinOffset = -256;
3222 MaxOffset = 252;
3223 break;
3224 case 8:
3225 MinOffset = -512;
3226 MaxOffset = 504;
3227 break;
3228 case 16:
3229 MinOffset = -1024;
3230 MaxOffset = 1008;
3231 break;
3232 }
3233 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3234 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3235 };
3236 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3237 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3238 int64_t NewOffset = OldOffset + Disp;
3239 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3240 return false;
3241 // If the old offset would fit into an LDP, but the new offset wouldn't,
3242 // bail out.
3243 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3244 return false;
3245 AM.BaseReg = AddrI.getOperand(1).getReg();
3246 AM.ScaledReg = 0;
3247 AM.Scale = 0;
3248 AM.Displacement = NewOffset;
3250 return true;
3251 };
3252
3253 auto canFoldAddRegIntoAddrMode =
3254 [&](int64_t Scale,
3256 if (MemI.getOperand(2).getImm() != 0)
3257 return false;
3258 if ((unsigned)Scale != Scale)
3259 return false;
3260 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3261 return false;
3262 AM.BaseReg = AddrI.getOperand(1).getReg();
3263 AM.ScaledReg = AddrI.getOperand(2).getReg();
3264 AM.Scale = Scale;
3265 AM.Displacement = 0;
3266 AM.Form = Form;
3267 return true;
3268 };
3269
3270 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3271 unsigned Opcode = MemI.getOpcode();
3272 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3273 Subtarget.isSTRQroSlow();
3274 };
3275
3276 int64_t Disp = 0;
3277 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3278 switch (AddrI.getOpcode()) {
3279 default:
3280 return false;
3281
3282 case AArch64::ADDXri:
3283 // add Xa, Xn, #N
3284 // ldr Xd, [Xa, #M]
3285 // ->
3286 // ldr Xd, [Xn, #N'+M]
3287 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3288 return canFoldAddSubImmIntoAddrMode(Disp);
3289
3290 case AArch64::SUBXri:
3291 // sub Xa, Xn, #N
3292 // ldr Xd, [Xa, #M]
3293 // ->
3294 // ldr Xd, [Xn, #N'+M]
3295 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3296 return canFoldAddSubImmIntoAddrMode(-Disp);
3297
3298 case AArch64::ADDXrs: {
3299 // add Xa, Xn, Xm, lsl #N
3300 // ldr Xd, [Xa]
3301 // ->
3302 // ldr Xd, [Xn, Xm, lsl #N]
3303
3304 // Don't fold the add if the result would be slower, unless optimising for
3305 // size.
3306 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3308 return false;
3309 Shift = AArch64_AM::getShiftValue(Shift);
3310 if (!OptSize) {
3311 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3312 return false;
3313 if (avoidSlowSTRQ(MemI))
3314 return false;
3315 }
3316 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3317 }
3318
3319 case AArch64::ADDXrr:
3320 // add Xa, Xn, Xm
3321 // ldr Xd, [Xa]
3322 // ->
3323 // ldr Xd, [Xn, Xm, lsl #0]
3324
3325 // Don't fold the add if the result would be slower, unless optimising for
3326 // size.
3327 if (!OptSize && avoidSlowSTRQ(MemI))
3328 return false;
3329 return canFoldAddRegIntoAddrMode(1);
3330
3331 case AArch64::ADDXrx:
3332 // add Xa, Xn, Wm, {s,u}xtw #N
3333 // ldr Xd, [Xa]
3334 // ->
3335 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3336
3337 // Don't fold the add if the result would be slower, unless optimising for
3338 // size.
3339 if (!OptSize && avoidSlowSTRQ(MemI))
3340 return false;
3341
3342 // Can fold only sign-/zero-extend of a word.
3343 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3345 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3346 return false;
3347
3348 return canFoldAddRegIntoAddrMode(
3349 1ULL << AArch64_AM::getArithShiftValue(Imm),
3352 }
3353}
3354
3355// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3356// return the opcode of an instruction performing the same operation, but using
3357// the [Reg, Reg] addressing mode.
3358static unsigned regOffsetOpcode(unsigned Opcode) {
3359 switch (Opcode) {
3360 default:
3361 llvm_unreachable("Address folding not implemented for instruction");
3362
3363 case AArch64::LDURQi:
3364 case AArch64::LDRQui:
3365 return AArch64::LDRQroX;
3366 case AArch64::STURQi:
3367 case AArch64::STRQui:
3368 return AArch64::STRQroX;
3369 case AArch64::LDURDi:
3370 case AArch64::LDRDui:
3371 return AArch64::LDRDroX;
3372 case AArch64::STURDi:
3373 case AArch64::STRDui:
3374 return AArch64::STRDroX;
3375 case AArch64::LDURXi:
3376 case AArch64::LDRXui:
3377 return AArch64::LDRXroX;
3378 case AArch64::STURXi:
3379 case AArch64::STRXui:
3380 return AArch64::STRXroX;
3381 case AArch64::LDURWi:
3382 case AArch64::LDRWui:
3383 return AArch64::LDRWroX;
3384 case AArch64::LDURSWi:
3385 case AArch64::LDRSWui:
3386 return AArch64::LDRSWroX;
3387 case AArch64::STURWi:
3388 case AArch64::STRWui:
3389 return AArch64::STRWroX;
3390 case AArch64::LDURHi:
3391 case AArch64::LDRHui:
3392 return AArch64::LDRHroX;
3393 case AArch64::STURHi:
3394 case AArch64::STRHui:
3395 return AArch64::STRHroX;
3396 case AArch64::LDURHHi:
3397 case AArch64::LDRHHui:
3398 return AArch64::LDRHHroX;
3399 case AArch64::STURHHi:
3400 case AArch64::STRHHui:
3401 return AArch64::STRHHroX;
3402 case AArch64::LDURSHXi:
3403 case AArch64::LDRSHXui:
3404 return AArch64::LDRSHXroX;
3405 case AArch64::LDURSHWi:
3406 case AArch64::LDRSHWui:
3407 return AArch64::LDRSHWroX;
3408 case AArch64::LDURBi:
3409 case AArch64::LDRBui:
3410 return AArch64::LDRBroX;
3411 case AArch64::LDURBBi:
3412 case AArch64::LDRBBui:
3413 return AArch64::LDRBBroX;
3414 case AArch64::LDURSBXi:
3415 case AArch64::LDRSBXui:
3416 return AArch64::LDRSBXroX;
3417 case AArch64::LDURSBWi:
3418 case AArch64::LDRSBWui:
3419 return AArch64::LDRSBWroX;
3420 case AArch64::STURBi:
3421 case AArch64::STRBui:
3422 return AArch64::STRBroX;
3423 case AArch64::STURBBi:
3424 case AArch64::STRBBui:
3425 return AArch64::STRBBroX;
3426 }
3427}
3428
3429// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3430// the opcode of an instruction performing the same operation, but using the
3431// [Reg, #Imm] addressing mode with scaled offset.
3432unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3433 switch (Opcode) {
3434 default:
3435 llvm_unreachable("Address folding not implemented for instruction");
3436
3437 case AArch64::LDURQi:
3438 Scale = 16;
3439 return AArch64::LDRQui;
3440 case AArch64::STURQi:
3441 Scale = 16;
3442 return AArch64::STRQui;
3443 case AArch64::LDURDi:
3444 Scale = 8;
3445 return AArch64::LDRDui;
3446 case AArch64::STURDi:
3447 Scale = 8;
3448 return AArch64::STRDui;
3449 case AArch64::LDURXi:
3450 Scale = 8;
3451 return AArch64::LDRXui;
3452 case AArch64::STURXi:
3453 Scale = 8;
3454 return AArch64::STRXui;
3455 case AArch64::LDURWi:
3456 Scale = 4;
3457 return AArch64::LDRWui;
3458 case AArch64::LDURSWi:
3459 Scale = 4;
3460 return AArch64::LDRSWui;
3461 case AArch64::STURWi:
3462 Scale = 4;
3463 return AArch64::STRWui;
3464 case AArch64::LDURHi:
3465 Scale = 2;
3466 return AArch64::LDRHui;
3467 case AArch64::STURHi:
3468 Scale = 2;
3469 return AArch64::STRHui;
3470 case AArch64::LDURHHi:
3471 Scale = 2;
3472 return AArch64::LDRHHui;
3473 case AArch64::STURHHi:
3474 Scale = 2;
3475 return AArch64::STRHHui;
3476 case AArch64::LDURSHXi:
3477 Scale = 2;
3478 return AArch64::LDRSHXui;
3479 case AArch64::LDURSHWi:
3480 Scale = 2;
3481 return AArch64::LDRSHWui;
3482 case AArch64::LDURBi:
3483 Scale = 1;
3484 return AArch64::LDRBui;
3485 case AArch64::LDURBBi:
3486 Scale = 1;
3487 return AArch64::LDRBBui;
3488 case AArch64::LDURSBXi:
3489 Scale = 1;
3490 return AArch64::LDRSBXui;
3491 case AArch64::LDURSBWi:
3492 Scale = 1;
3493 return AArch64::LDRSBWui;
3494 case AArch64::STURBi:
3495 Scale = 1;
3496 return AArch64::STRBui;
3497 case AArch64::STURBBi:
3498 Scale = 1;
3499 return AArch64::STRBBui;
3500 case AArch64::LDRQui:
3501 case AArch64::STRQui:
3502 Scale = 16;
3503 return Opcode;
3504 case AArch64::LDRDui:
3505 case AArch64::STRDui:
3506 case AArch64::LDRXui:
3507 case AArch64::STRXui:
3508 Scale = 8;
3509 return Opcode;
3510 case AArch64::LDRWui:
3511 case AArch64::LDRSWui:
3512 case AArch64::STRWui:
3513 Scale = 4;
3514 return Opcode;
3515 case AArch64::LDRHui:
3516 case AArch64::STRHui:
3517 case AArch64::LDRHHui:
3518 case AArch64::STRHHui:
3519 case AArch64::LDRSHXui:
3520 case AArch64::LDRSHWui:
3521 Scale = 2;
3522 return Opcode;
3523 case AArch64::LDRBui:
3524 case AArch64::LDRBBui:
3525 case AArch64::LDRSBXui:
3526 case AArch64::LDRSBWui:
3527 case AArch64::STRBui:
3528 case AArch64::STRBBui:
3529 Scale = 1;
3530 return Opcode;
3531 }
3532}
3533
3534// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3535// the opcode of an instruction performing the same operation, but using the
3536// [Reg, #Imm] addressing mode with unscaled offset.
3537unsigned unscaledOffsetOpcode(unsigned Opcode) {
3538 switch (Opcode) {
3539 default:
3540 llvm_unreachable("Address folding not implemented for instruction");
3541
3542 case AArch64::LDURQi:
3543 case AArch64::STURQi:
3544 case AArch64::LDURDi:
3545 case AArch64::STURDi:
3546 case AArch64::LDURXi:
3547 case AArch64::STURXi:
3548 case AArch64::LDURWi:
3549 case AArch64::LDURSWi:
3550 case AArch64::STURWi:
3551 case AArch64::LDURHi:
3552 case AArch64::STURHi:
3553 case AArch64::LDURHHi:
3554 case AArch64::STURHHi:
3555 case AArch64::LDURSHXi:
3556 case AArch64::LDURSHWi:
3557 case AArch64::LDURBi:
3558 case AArch64::STURBi:
3559 case AArch64::LDURBBi:
3560 case AArch64::STURBBi:
3561 case AArch64::LDURSBWi:
3562 case AArch64::LDURSBXi:
3563 return Opcode;
3564 case AArch64::LDRQui:
3565 return AArch64::LDURQi;
3566 case AArch64::STRQui:
3567 return AArch64::STURQi;
3568 case AArch64::LDRDui:
3569 return AArch64::LDURDi;
3570 case AArch64::STRDui:
3571 return AArch64::STURDi;
3572 case AArch64::LDRXui:
3573 return AArch64::LDURXi;
3574 case AArch64::STRXui:
3575 return AArch64::STURXi;
3576 case AArch64::LDRWui:
3577 return AArch64::LDURWi;
3578 case AArch64::LDRSWui:
3579 return AArch64::LDURSWi;
3580 case AArch64::STRWui:
3581 return AArch64::STURWi;
3582 case AArch64::LDRHui:
3583 return AArch64::LDURHi;
3584 case AArch64::STRHui:
3585 return AArch64::STURHi;
3586 case AArch64::LDRHHui:
3587 return AArch64::LDURHHi;
3588 case AArch64::STRHHui:
3589 return AArch64::STURHHi;
3590 case AArch64::LDRSHXui:
3591 return AArch64::LDURSHXi;
3592 case AArch64::LDRSHWui:
3593 return AArch64::LDURSHWi;
3594 case AArch64::LDRBBui:
3595 return AArch64::LDURBBi;
3596 case AArch64::LDRBui:
3597 return AArch64::LDURBi;
3598 case AArch64::STRBBui:
3599 return AArch64::STURBBi;
3600 case AArch64::STRBui:
3601 return AArch64::STURBi;
3602 case AArch64::LDRSBWui:
3603 return AArch64::LDURSBWi;
3604 case AArch64::LDRSBXui:
3605 return AArch64::LDURSBXi;
3606 }
3607}
3608
3609// Given the opcode of a memory load/store instruction, return the opcode of an
3610// instruction performing the same operation, but using
3611// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3612// offset register.
3613static unsigned offsetExtendOpcode(unsigned Opcode) {
3614 switch (Opcode) {
3615 default:
3616 llvm_unreachable("Address folding not implemented for instruction");
3617
3618 case AArch64::LDRQroX:
3619 case AArch64::LDURQi:
3620 case AArch64::LDRQui:
3621 return AArch64::LDRQroW;
3622 case AArch64::STRQroX:
3623 case AArch64::STURQi:
3624 case AArch64::STRQui:
3625 return AArch64::STRQroW;
3626 case AArch64::LDRDroX:
3627 case AArch64::LDURDi:
3628 case AArch64::LDRDui:
3629 return AArch64::LDRDroW;
3630 case AArch64::STRDroX:
3631 case AArch64::STURDi:
3632 case AArch64::STRDui:
3633 return AArch64::STRDroW;
3634 case AArch64::LDRXroX:
3635 case AArch64::LDURXi:
3636 case AArch64::LDRXui:
3637 return AArch64::LDRXroW;
3638 case AArch64::STRXroX:
3639 case AArch64::STURXi:
3640 case AArch64::STRXui:
3641 return AArch64::STRXroW;
3642 case AArch64::LDRWroX:
3643 case AArch64::LDURWi:
3644 case AArch64::LDRWui:
3645 return AArch64::LDRWroW;
3646 case AArch64::LDRSWroX:
3647 case AArch64::LDURSWi:
3648 case AArch64::LDRSWui:
3649 return AArch64::LDRSWroW;
3650 case AArch64::STRWroX:
3651 case AArch64::STURWi:
3652 case AArch64::STRWui:
3653 return AArch64::STRWroW;
3654 case AArch64::LDRHroX:
3655 case AArch64::LDURHi:
3656 case AArch64::LDRHui:
3657 return AArch64::LDRHroW;
3658 case AArch64::STRHroX:
3659 case AArch64::STURHi:
3660 case AArch64::STRHui:
3661 return AArch64::STRHroW;
3662 case AArch64::LDRHHroX:
3663 case AArch64::LDURHHi:
3664 case AArch64::LDRHHui:
3665 return AArch64::LDRHHroW;
3666 case AArch64::STRHHroX:
3667 case AArch64::STURHHi:
3668 case AArch64::STRHHui:
3669 return AArch64::STRHHroW;
3670 case AArch64::LDRSHXroX:
3671 case AArch64::LDURSHXi:
3672 case AArch64::LDRSHXui:
3673 return AArch64::LDRSHXroW;
3674 case AArch64::LDRSHWroX:
3675 case AArch64::LDURSHWi:
3676 case AArch64::LDRSHWui:
3677 return AArch64::LDRSHWroW;
3678 case AArch64::LDRBroX:
3679 case AArch64::LDURBi:
3680 case AArch64::LDRBui:
3681 return AArch64::LDRBroW;
3682 case AArch64::LDRBBroX:
3683 case AArch64::LDURBBi:
3684 case AArch64::LDRBBui:
3685 return AArch64::LDRBBroW;
3686 case AArch64::LDRSBXroX:
3687 case AArch64::LDURSBXi:
3688 case AArch64::LDRSBXui:
3689 return AArch64::LDRSBXroW;
3690 case AArch64::LDRSBWroX:
3691 case AArch64::LDURSBWi:
3692 case AArch64::LDRSBWui:
3693 return AArch64::LDRSBWroW;
3694 case AArch64::STRBroX:
3695 case AArch64::STURBi:
3696 case AArch64::STRBui:
3697 return AArch64::STRBroW;
3698 case AArch64::STRBBroX:
3699 case AArch64::STURBBi:
3700 case AArch64::STRBBui:
3701 return AArch64::STRBBroW;
3702 }
3703}
3704
3706 const ExtAddrMode &AM) const {
3707
3708 const DebugLoc &DL = MemI.getDebugLoc();
3709 MachineBasicBlock &MBB = *MemI.getParent();
3711
3713 if (AM.ScaledReg) {
3714 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3715 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3716 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3717 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3718 .addReg(MemI.getOperand(0).getReg(),
3719 MemI.mayLoad() ? RegState::Define : 0)
3720 .addReg(AM.BaseReg)
3721 .addReg(AM.ScaledReg)
3722 .addImm(0)
3723 .addImm(AM.Scale > 1)
3724 .setMemRefs(MemI.memoperands())
3725 .setMIFlags(MemI.getFlags());
3726 return B.getInstr();
3727 }
3728
3729 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3730 "Addressing mode not supported for folding");
3731
3732 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3733 unsigned Scale = 1;
3734 unsigned Opcode = MemI.getOpcode();
3735 if (isInt<9>(AM.Displacement))
3736 Opcode = unscaledOffsetOpcode(Opcode);
3737 else
3738 Opcode = scaledOffsetOpcode(Opcode, Scale);
3739
3740 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3741 .addReg(MemI.getOperand(0).getReg(),
3742 MemI.mayLoad() ? RegState::Define : 0)
3743 .addReg(AM.BaseReg)
3744 .addImm(AM.Displacement / Scale)
3745 .setMemRefs(MemI.memoperands())
3746 .setMIFlags(MemI.getFlags());
3747 return B.getInstr();
3748 }
3749
3752 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3753 assert(AM.ScaledReg && !AM.Displacement &&
3754 "Address offset can be a register or an immediate, but not both");
3755 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3756 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3757 // Make sure the offset register is in the correct register class.
3758 Register OffsetReg = AM.ScaledReg;
3759 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3760 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3761 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3762 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3763 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3764 }
3765 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3766 .addReg(MemI.getOperand(0).getReg(),
3767 MemI.mayLoad() ? RegState::Define : 0)
3768 .addReg(AM.BaseReg)
3769 .addReg(OffsetReg)
3771 .addImm(AM.Scale != 1)
3772 .setMemRefs(MemI.memoperands())
3773 .setMIFlags(MemI.getFlags());
3774
3775 return B.getInstr();
3776 }
3777
3779 "Function must not be called with an addressing mode it can't handle");
3780}
3781
3782/// Return true if the opcode is a post-index ld/st instruction, which really
3783/// loads from base+0.
3784static bool isPostIndexLdStOpcode(unsigned Opcode) {
3785 switch (Opcode) {
3786 default:
3787 return false;
3788 case AArch64::LD1Fourv16b_POST:
3789 case AArch64::LD1Fourv1d_POST:
3790 case AArch64::LD1Fourv2d_POST:
3791 case AArch64::LD1Fourv2s_POST:
3792 case AArch64::LD1Fourv4h_POST:
3793 case AArch64::LD1Fourv4s_POST:
3794 case AArch64::LD1Fourv8b_POST:
3795 case AArch64::LD1Fourv8h_POST:
3796 case AArch64::LD1Onev16b_POST:
3797 case AArch64::LD1Onev1d_POST:
3798 case AArch64::LD1Onev2d_POST:
3799 case AArch64::LD1Onev2s_POST:
3800 case AArch64::LD1Onev4h_POST:
3801 case AArch64::LD1Onev4s_POST:
3802 case AArch64::LD1Onev8b_POST:
3803 case AArch64::LD1Onev8h_POST:
3804 case AArch64::LD1Rv16b_POST:
3805 case AArch64::LD1Rv1d_POST:
3806 case AArch64::LD1Rv2d_POST:
3807 case AArch64::LD1Rv2s_POST:
3808 case AArch64::LD1Rv4h_POST:
3809 case AArch64::LD1Rv4s_POST:
3810 case AArch64::LD1Rv8b_POST:
3811 case AArch64::LD1Rv8h_POST:
3812 case AArch64::LD1Threev16b_POST:
3813 case AArch64::LD1Threev1d_POST:
3814 case AArch64::LD1Threev2d_POST:
3815 case AArch64::LD1Threev2s_POST:
3816 case AArch64::LD1Threev4h_POST:
3817 case AArch64::LD1Threev4s_POST:
3818 case AArch64::LD1Threev8b_POST:
3819 case AArch64::LD1Threev8h_POST:
3820 case AArch64::LD1Twov16b_POST:
3821 case AArch64::LD1Twov1d_POST:
3822 case AArch64::LD1Twov2d_POST:
3823 case AArch64::LD1Twov2s_POST:
3824 case AArch64::LD1Twov4h_POST:
3825 case AArch64::LD1Twov4s_POST:
3826 case AArch64::LD1Twov8b_POST:
3827 case AArch64::LD1Twov8h_POST:
3828 case AArch64::LD1i16_POST:
3829 case AArch64::LD1i32_POST:
3830 case AArch64::LD1i64_POST:
3831 case AArch64::LD1i8_POST:
3832 case AArch64::LD2Rv16b_POST:
3833 case AArch64::LD2Rv1d_POST:
3834 case AArch64::LD2Rv2d_POST:
3835 case AArch64::LD2Rv2s_POST:
3836 case AArch64::LD2Rv4h_POST:
3837 case AArch64::LD2Rv4s_POST:
3838 case AArch64::LD2Rv8b_POST:
3839 case AArch64::LD2Rv8h_POST:
3840 case AArch64::LD2Twov16b_POST:
3841 case AArch64::LD2Twov2d_POST:
3842 case AArch64::LD2Twov2s_POST:
3843 case AArch64::LD2Twov4h_POST:
3844 case AArch64::LD2Twov4s_POST:
3845 case AArch64::LD2Twov8b_POST:
3846 case AArch64::LD2Twov8h_POST:
3847 case AArch64::LD2i16_POST:
3848 case AArch64::LD2i32_POST:
3849 case AArch64::LD2i64_POST:
3850 case AArch64::LD2i8_POST:
3851 case AArch64::LD3Rv16b_POST:
3852 case AArch64::LD3Rv1d_POST:
3853 case AArch64::LD3Rv2d_POST:
3854 case AArch64::LD3Rv2s_POST:
3855 case AArch64::LD3Rv4h_POST:
3856 case AArch64::LD3Rv4s_POST:
3857 case AArch64::LD3Rv8b_POST:
3858 case AArch64::LD3Rv8h_POST:
3859 case AArch64::LD3Threev16b_POST:
3860 case AArch64::LD3Threev2d_POST:
3861 case AArch64::LD3Threev2s_POST:
3862 case AArch64::LD3Threev4h_POST:
3863 case AArch64::LD3Threev4s_POST:
3864 case AArch64::LD3Threev8b_POST:
3865 case AArch64::LD3Threev8h_POST:
3866 case AArch64::LD3i16_POST:
3867 case AArch64::LD3i32_POST:
3868 case AArch64::LD3i64_POST:
3869 case AArch64::LD3i8_POST:
3870 case AArch64::LD4Fourv16b_POST:
3871 case AArch64::LD4Fourv2d_POST:
3872 case AArch64::LD4Fourv2s_POST:
3873 case AArch64::LD4Fourv4h_POST:
3874 case AArch64::LD4Fourv4s_POST:
3875 case AArch64::LD4Fourv8b_POST:
3876 case AArch64::LD4Fourv8h_POST:
3877 case AArch64::LD4Rv16b_POST:
3878 case AArch64::LD4Rv1d_POST:
3879 case AArch64::LD4Rv2d_POST:
3880 case AArch64::LD4Rv2s_POST:
3881 case AArch64::LD4Rv4h_POST:
3882 case AArch64::LD4Rv4s_POST:
3883 case AArch64::LD4Rv8b_POST:
3884 case AArch64::LD4Rv8h_POST:
3885 case AArch64::LD4i16_POST:
3886 case AArch64::LD4i32_POST:
3887 case AArch64::LD4i64_POST:
3888 case AArch64::LD4i8_POST:
3889 case AArch64::LDAPRWpost:
3890 case AArch64::LDAPRXpost:
3891 case AArch64::LDIAPPWpost:
3892 case AArch64::LDIAPPXpost:
3893 case AArch64::LDPDpost:
3894 case AArch64::LDPQpost:
3895 case AArch64::LDPSWpost:
3896 case AArch64::LDPSpost:
3897 case AArch64::LDPWpost:
3898 case AArch64::LDPXpost:
3899 case AArch64::LDRBBpost:
3900 case AArch64::LDRBpost:
3901 case AArch64::LDRDpost:
3902 case AArch64::LDRHHpost:
3903 case AArch64::LDRHpost:
3904 case AArch64::LDRQpost:
3905 case AArch64::LDRSBWpost:
3906 case AArch64::LDRSBXpost:
3907 case AArch64::LDRSHWpost:
3908 case AArch64::LDRSHXpost:
3909 case AArch64::LDRSWpost:
3910 case AArch64::LDRSpost:
3911 case AArch64::LDRWpost:
3912 case AArch64::LDRXpost:
3913 case AArch64::ST1Fourv16b_POST:
3914 case AArch64::ST1Fourv1d_POST:
3915 case AArch64::ST1Fourv2d_POST:
3916 case AArch64::ST1Fourv2s_POST:
3917 case AArch64::ST1Fourv4h_POST:
3918 case AArch64::ST1Fourv4s_POST:
3919 case AArch64::ST1Fourv8b_POST:
3920 case AArch64::ST1Fourv8h_POST:
3921 case AArch64::ST1Onev16b_POST:
3922 case AArch64::ST1Onev1d_POST:
3923 case AArch64::ST1Onev2d_POST:
3924 case AArch64::ST1Onev2s_POST:
3925 case AArch64::ST1Onev4h_POST:
3926 case AArch64::ST1Onev4s_POST:
3927 case AArch64::ST1Onev8b_POST:
3928 case AArch64::ST1Onev8h_POST:
3929 case AArch64::ST1Threev16b_POST:
3930 case AArch64::ST1Threev1d_POST:
3931 case AArch64::ST1Threev2d_POST:
3932 case AArch64::ST1Threev2s_POST:
3933 case AArch64::ST1Threev4h_POST:
3934 case AArch64::ST1Threev4s_POST:
3935 case AArch64::ST1Threev8b_POST:
3936 case AArch64::ST1Threev8h_POST:
3937 case AArch64::ST1Twov16b_POST:
3938 case AArch64::ST1Twov1d_POST:
3939 case AArch64::ST1Twov2d_POST:
3940 case AArch64::ST1Twov2s_POST:
3941 case AArch64::ST1Twov4h_POST:
3942 case AArch64::ST1Twov4s_POST:
3943 case AArch64::ST1Twov8b_POST:
3944 case AArch64::ST1Twov8h_POST:
3945 case AArch64::ST1i16_POST:
3946 case AArch64::ST1i32_POST:
3947 case AArch64::ST1i64_POST:
3948 case AArch64::ST1i8_POST:
3949 case AArch64::ST2GPostIndex:
3950 case AArch64::ST2Twov16b_POST:
3951 case AArch64::ST2Twov2d_POST:
3952 case AArch64::ST2Twov2s_POST:
3953 case AArch64::ST2Twov4h_POST:
3954 case AArch64::ST2Twov4s_POST:
3955 case AArch64::ST2Twov8b_POST:
3956 case AArch64::ST2Twov8h_POST:
3957 case AArch64::ST2i16_POST:
3958 case AArch64::ST2i32_POST:
3959 case AArch64::ST2i64_POST:
3960 case AArch64::ST2i8_POST:
3961 case AArch64::ST3Threev16b_POST:
3962 case AArch64::ST3Threev2d_POST:
3963 case AArch64::ST3Threev2s_POST:
3964 case AArch64::ST3Threev4h_POST:
3965 case AArch64::ST3Threev4s_POST:
3966 case AArch64::ST3Threev8b_POST:
3967 case AArch64::ST3Threev8h_POST:
3968 case AArch64::ST3i16_POST:
3969 case AArch64::ST3i32_POST:
3970 case AArch64::ST3i64_POST:
3971 case AArch64::ST3i8_POST:
3972 case AArch64::ST4Fourv16b_POST:
3973 case AArch64::ST4Fourv2d_POST:
3974 case AArch64::ST4Fourv2s_POST:
3975 case AArch64::ST4Fourv4h_POST:
3976 case AArch64::ST4Fourv4s_POST:
3977 case AArch64::ST4Fourv8b_POST:
3978 case AArch64::ST4Fourv8h_POST:
3979 case AArch64::ST4i16_POST:
3980 case AArch64::ST4i32_POST:
3981 case AArch64::ST4i64_POST:
3982 case AArch64::ST4i8_POST:
3983 case AArch64::STGPostIndex:
3984 case AArch64::STGPpost:
3985 case AArch64::STPDpost:
3986 case AArch64::STPQpost:
3987 case AArch64::STPSpost:
3988 case AArch64::STPWpost:
3989 case AArch64::STPXpost:
3990 case AArch64::STRBBpost:
3991 case AArch64::STRBpost:
3992 case AArch64::STRDpost:
3993 case AArch64::STRHHpost:
3994 case AArch64::STRHpost:
3995 case AArch64::STRQpost:
3996 case AArch64::STRSpost:
3997 case AArch64::STRWpost:
3998 case AArch64::STRXpost:
3999 case AArch64::STZ2GPostIndex:
4000 case AArch64::STZGPostIndex:
4001 return true;
4002 }
4003}
4004
4006 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4007 bool &OffsetIsScalable, TypeSize &Width,
4008 const TargetRegisterInfo *TRI) const {
4009 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4010 // Handle only loads/stores with base register followed by immediate offset.
4011 if (LdSt.getNumExplicitOperands() == 3) {
4012 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4013 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4014 !LdSt.getOperand(2).isImm())
4015 return false;
4016 } else if (LdSt.getNumExplicitOperands() == 4) {
4017 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4018 if (!LdSt.getOperand(1).isReg() ||
4019 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4020 !LdSt.getOperand(3).isImm())
4021 return false;
4022 } else
4023 return false;
4024
4025 // Get the scaling factor for the instruction and set the width for the
4026 // instruction.
4027 TypeSize Scale(0U, false);
4028 int64_t Dummy1, Dummy2;
4029
4030 // If this returns false, then it's an instruction we don't want to handle.
4031 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4032 return false;
4033
4034 // Compute the offset. Offset is calculated as the immediate operand
4035 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4036 // set to 1. Postindex are a special case which have an offset of 0.
4037 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4038 BaseOp = &LdSt.getOperand(2);
4039 Offset = 0;
4040 } else if (LdSt.getNumExplicitOperands() == 3) {
4041 BaseOp = &LdSt.getOperand(1);
4042 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4043 } else {
4044 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4045 BaseOp = &LdSt.getOperand(2);
4046 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4047 }
4048 OffsetIsScalable = Scale.isScalable();
4049
4050 return BaseOp->isReg() || BaseOp->isFI();
4051}
4052
4055 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4056 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4057 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4058 return OfsOp;
4059}
4060
4061bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4062 TypeSize &Width, int64_t &MinOffset,
4063 int64_t &MaxOffset) {
4064 switch (Opcode) {
4065 // Not a memory operation or something we want to handle.
4066 default:
4067 Scale = TypeSize::getFixed(0);
4068 Width = TypeSize::getFixed(0);
4069 MinOffset = MaxOffset = 0;
4070 return false;
4071 // LDR / STR
4072 case AArch64::LDRQui:
4073 case AArch64::STRQui:
4074 Scale = TypeSize::getFixed(16);
4075 Width = TypeSize::getFixed(16);
4076 MinOffset = 0;
4077 MaxOffset = 4095;
4078 break;
4079 case AArch64::LDRXui:
4080 case AArch64::LDRDui:
4081 case AArch64::STRXui:
4082 case AArch64::STRDui:
4083 case AArch64::PRFMui:
4084 Scale = TypeSize::getFixed(8);
4085 Width = TypeSize::getFixed(8);
4086 MinOffset = 0;
4087 MaxOffset = 4095;
4088 break;
4089 case AArch64::LDRWui:
4090 case AArch64::LDRSui:
4091 case AArch64::LDRSWui:
4092 case AArch64::STRWui:
4093 case AArch64::STRSui:
4094 Scale = TypeSize::getFixed(4);
4095 Width = TypeSize::getFixed(4);
4096 MinOffset = 0;
4097 MaxOffset = 4095;
4098 break;
4099 case AArch64::LDRHui:
4100 case AArch64::LDRHHui:
4101 case AArch64::LDRSHWui:
4102 case AArch64::LDRSHXui:
4103 case AArch64::STRHui:
4104 case AArch64::STRHHui:
4105 Scale = TypeSize::getFixed(2);
4106 Width = TypeSize::getFixed(2);
4107 MinOffset = 0;
4108 MaxOffset = 4095;
4109 break;
4110 case AArch64::LDRBui:
4111 case AArch64::LDRBBui:
4112 case AArch64::LDRSBWui:
4113 case AArch64::LDRSBXui:
4114 case AArch64::STRBui:
4115 case AArch64::STRBBui:
4116 Scale = TypeSize::getFixed(1);
4117 Width = TypeSize::getFixed(1);
4118 MinOffset = 0;
4119 MaxOffset = 4095;
4120 break;
4121 // post/pre inc
4122 case AArch64::STRQpre:
4123 case AArch64::LDRQpost:
4124 Scale = TypeSize::getFixed(1);
4125 Width = TypeSize::getFixed(16);
4126 MinOffset = -256;
4127 MaxOffset = 255;
4128 break;
4129 case AArch64::LDRDpost:
4130 case AArch64::LDRDpre:
4131 case AArch64::LDRXpost:
4132 case AArch64::LDRXpre:
4133 case AArch64::STRDpost:
4134 case AArch64::STRDpre:
4135 case AArch64::STRXpost:
4136 case AArch64::STRXpre:
4137 Scale = TypeSize::getFixed(1);
4138 Width = TypeSize::getFixed(8);
4139 MinOffset = -256;
4140 MaxOffset = 255;
4141 break;
4142 case AArch64::STRWpost:
4143 case AArch64::STRWpre:
4144 case AArch64::LDRWpost:
4145 case AArch64::LDRWpre:
4146 case AArch64::STRSpost:
4147 case AArch64::STRSpre:
4148 case AArch64::LDRSpost:
4149 case AArch64::LDRSpre:
4150 Scale = TypeSize::getFixed(1);
4151 Width = TypeSize::getFixed(4);
4152 MinOffset = -256;
4153 MaxOffset = 255;
4154 break;
4155 case AArch64::LDRHpost:
4156 case AArch64::LDRHpre:
4157 case AArch64::STRHpost:
4158 case AArch64::STRHpre:
4159 case AArch64::LDRHHpost:
4160 case AArch64::LDRHHpre:
4161 case AArch64::STRHHpost:
4162 case AArch64::STRHHpre:
4163 Scale = TypeSize::getFixed(1);
4164 Width = TypeSize::getFixed(2);
4165 MinOffset = -256;
4166 MaxOffset = 255;
4167 break;
4168 case AArch64::LDRBpost:
4169 case AArch64::LDRBpre:
4170 case AArch64::STRBpost:
4171 case AArch64::STRBpre:
4172 case AArch64::LDRBBpost:
4173 case AArch64::LDRBBpre:
4174 case AArch64::STRBBpost:
4175 case AArch64::STRBBpre:
4176 Scale = TypeSize::getFixed(1);
4177 Width = TypeSize::getFixed(1);
4178 MinOffset = -256;
4179 MaxOffset = 255;
4180 break;
4181 // Unscaled
4182 case AArch64::LDURQi:
4183 case AArch64::STURQi:
4184 Scale = TypeSize::getFixed(1);
4185 Width = TypeSize::getFixed(16);
4186 MinOffset = -256;
4187 MaxOffset = 255;
4188 break;
4189 case AArch64::LDURXi:
4190 case AArch64::LDURDi:
4191 case AArch64::LDAPURXi:
4192 case AArch64::STURXi:
4193 case AArch64::STURDi:
4194 case AArch64::STLURXi:
4195 case AArch64::PRFUMi:
4196 Scale = TypeSize::getFixed(1);
4197 Width = TypeSize::getFixed(8);
4198 MinOffset = -256;
4199 MaxOffset = 255;
4200 break;
4201 case AArch64::LDURWi:
4202 case AArch64::LDURSi:
4203 case AArch64::LDURSWi:
4204 case AArch64::LDAPURi:
4205 case AArch64::LDAPURSWi:
4206 case AArch64::STURWi:
4207 case AArch64::STURSi:
4208 case AArch64::STLURWi:
4209 Scale = TypeSize::getFixed(1);
4210 Width = TypeSize::getFixed(4);
4211 MinOffset = -256;
4212 MaxOffset = 255;
4213 break;
4214 case AArch64::LDURHi:
4215 case AArch64::LDURHHi:
4216 case AArch64::LDURSHXi:
4217 case AArch64::LDURSHWi:
4218 case AArch64::LDAPURHi:
4219 case AArch64::LDAPURSHWi:
4220 case AArch64::LDAPURSHXi:
4221 case AArch64::STURHi:
4222 case AArch64::STURHHi:
4223 case AArch64::STLURHi:
4224 Scale = TypeSize::getFixed(1);
4225 Width = TypeSize::getFixed(2);
4226 MinOffset = -256;
4227 MaxOffset = 255;
4228 break;
4229 case AArch64::LDURBi:
4230 case AArch64::LDURBBi:
4231 case AArch64::LDURSBXi:
4232 case AArch64::LDURSBWi:
4233 case AArch64::LDAPURBi:
4234 case AArch64::LDAPURSBWi:
4235 case AArch64::LDAPURSBXi:
4236 case AArch64::STURBi:
4237 case AArch64::STURBBi:
4238 case AArch64::STLURBi:
4239 Scale = TypeSize::getFixed(1);
4240 Width = TypeSize::getFixed(1);
4241 MinOffset = -256;
4242 MaxOffset = 255;
4243 break;
4244 // LDP / STP (including pre/post inc)
4245 case AArch64::LDPQi:
4246 case AArch64::LDNPQi:
4247 case AArch64::STPQi:
4248 case AArch64::STNPQi:
4249 case AArch64::LDPQpost:
4250 case AArch64::LDPQpre:
4251 case AArch64::STPQpost:
4252 case AArch64::STPQpre:
4253 Scale = TypeSize::getFixed(16);
4254 Width = TypeSize::getFixed(16 * 2);
4255 MinOffset = -64;
4256 MaxOffset = 63;
4257 break;
4258 case AArch64::LDPXi:
4259 case AArch64::LDPDi:
4260 case AArch64::LDNPXi:
4261 case AArch64::LDNPDi:
4262 case AArch64::STPXi:
4263 case AArch64::STPDi:
4264 case AArch64::STNPXi:
4265 case AArch64::STNPDi:
4266 case AArch64::LDPDpost:
4267 case AArch64::LDPDpre:
4268 case AArch64::LDPXpost:
4269 case AArch64::LDPXpre:
4270 case AArch64::STPDpost:
4271 case AArch64::STPDpre:
4272 case AArch64::STPXpost:
4273 case AArch64::STPXpre:
4274 Scale = TypeSize::getFixed(8);
4275 Width = TypeSize::getFixed(8 * 2);
4276 MinOffset = -64;
4277 MaxOffset = 63;
4278 break;
4279 case AArch64::LDPWi:
4280 case AArch64::LDPSi:
4281 case AArch64::LDNPWi:
4282 case AArch64::LDNPSi:
4283 case AArch64::STPWi:
4284 case AArch64::STPSi:
4285 case AArch64::STNPWi:
4286 case AArch64::STNPSi:
4287 case AArch64::LDPSpost:
4288 case AArch64::LDPSpre:
4289 case AArch64::LDPWpost:
4290 case AArch64::LDPWpre:
4291 case AArch64::STPSpost:
4292 case AArch64::STPSpre:
4293 case AArch64::STPWpost:
4294 case AArch64::STPWpre:
4295 Scale = TypeSize::getFixed(4);
4296 Width = TypeSize::getFixed(4 * 2);
4297 MinOffset = -64;
4298 MaxOffset = 63;
4299 break;
4300 case AArch64::StoreSwiftAsyncContext:
4301 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4302 Scale = TypeSize::getFixed(1);
4303 Width = TypeSize::getFixed(8);
4304 MinOffset = 0;
4305 MaxOffset = 4095;
4306 break;
4307 case AArch64::ADDG:
4308 Scale = TypeSize::getFixed(16);
4309 Width = TypeSize::getFixed(0);
4310 MinOffset = 0;
4311 MaxOffset = 63;
4312 break;
4313 case AArch64::TAGPstack:
4314 Scale = TypeSize::getFixed(16);
4315 Width = TypeSize::getFixed(0);
4316 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4317 // of 63 (not 64!).
4318 MinOffset = -63;
4319 MaxOffset = 63;
4320 break;
4321 case AArch64::LDG:
4322 case AArch64::STGi:
4323 case AArch64::STGPreIndex:
4324 case AArch64::STGPostIndex:
4325 case AArch64::STZGi:
4326 case AArch64::STZGPreIndex:
4327 case AArch64::STZGPostIndex:
4328 Scale = TypeSize::getFixed(16);
4329 Width = TypeSize::getFixed(16);
4330 MinOffset = -256;
4331 MaxOffset = 255;
4332 break;
4333 // SVE
4334 case AArch64::STR_ZZZZXI:
4335 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4336 case AArch64::LDR_ZZZZXI:
4337 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4338 Scale = TypeSize::getScalable(16);
4339 Width = TypeSize::getScalable(16 * 4);
4340 MinOffset = -256;
4341 MaxOffset = 252;
4342 break;
4343 case AArch64::STR_ZZZXI:
4344 case AArch64::LDR_ZZZXI:
4345 Scale = TypeSize::getScalable(16);
4346 Width = TypeSize::getScalable(16 * 3);
4347 MinOffset = -256;
4348 MaxOffset = 253;
4349 break;
4350 case AArch64::STR_ZZXI:
4351 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4352 case AArch64::LDR_ZZXI:
4353 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4354 Scale = TypeSize::getScalable(16);
4355 Width = TypeSize::getScalable(16 * 2);
4356 MinOffset = -256;
4357 MaxOffset = 254;
4358 break;
4359 case AArch64::LDR_PXI:
4360 case AArch64::STR_PXI:
4361 Scale = TypeSize::getScalable(2);
4362 Width = TypeSize::getScalable(2);
4363 MinOffset = -256;
4364 MaxOffset = 255;
4365 break;
4366 case AArch64::LDR_PPXI:
4367 case AArch64::STR_PPXI:
4368 Scale = TypeSize::getScalable(2);
4369 Width = TypeSize::getScalable(2 * 2);
4370 MinOffset = -256;
4371 MaxOffset = 254;
4372 break;
4373 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4374 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4375 case AArch64::LDR_ZXI:
4376 case AArch64::STR_ZXI:
4377 Scale = TypeSize::getScalable(16);
4378 Width = TypeSize::getScalable(16);
4379 MinOffset = -256;
4380 MaxOffset = 255;
4381 break;
4382 case AArch64::LD1B_IMM:
4383 case AArch64::LD1H_IMM:
4384 case AArch64::LD1W_IMM:
4385 case AArch64::LD1D_IMM:
4386 case AArch64::LDNT1B_ZRI:
4387 case AArch64::LDNT1H_ZRI:
4388 case AArch64::LDNT1W_ZRI:
4389 case AArch64::LDNT1D_ZRI:
4390 case AArch64::ST1B_IMM:
4391 case AArch64::ST1H_IMM:
4392 case AArch64::ST1W_IMM:
4393 case AArch64::ST1D_IMM:
4394 case AArch64::STNT1B_ZRI:
4395 case AArch64::STNT1H_ZRI:
4396 case AArch64::STNT1W_ZRI:
4397 case AArch64::STNT1D_ZRI:
4398 case AArch64::LDNF1B_IMM:
4399 case AArch64::LDNF1H_IMM:
4400 case AArch64::LDNF1W_IMM:
4401 case AArch64::LDNF1D_IMM:
4402 // A full vectors worth of data
4403 // Width = mbytes * elements
4404 Scale = TypeSize::getScalable(16);
4405 Width = TypeSize::getScalable(16);
4406 MinOffset = -8;
4407 MaxOffset = 7;
4408 break;
4409 case AArch64::LD2B_IMM:
4410 case AArch64::LD2H_IMM:
4411 case AArch64::LD2W_IMM:
4412 case AArch64::LD2D_IMM:
4413 case AArch64::ST2B_IMM:
4414 case AArch64::ST2H_IMM:
4415 case AArch64::ST2W_IMM:
4416 case AArch64::ST2D_IMM:
4417 Scale = TypeSize::getScalable(32);
4418 Width = TypeSize::getScalable(16 * 2);
4419 MinOffset = -8;
4420 MaxOffset = 7;
4421 break;
4422 case AArch64::LD3B_IMM:
4423 case AArch64::LD3H_IMM:
4424 case AArch64::LD3W_IMM:
4425 case AArch64::LD3D_IMM:
4426 case AArch64::ST3B_IMM:
4427 case AArch64::ST3H_IMM:
4428 case AArch64::ST3W_IMM:
4429 case AArch64::ST3D_IMM:
4430 Scale = TypeSize::getScalable(48);
4431 Width = TypeSize::getScalable(16 * 3);
4432 MinOffset = -8;
4433 MaxOffset = 7;
4434 break;
4435 case AArch64::LD4B_IMM:
4436 case AArch64::LD4H_IMM:
4437 case AArch64::LD4W_IMM:
4438 case AArch64::LD4D_IMM:
4439 case AArch64::ST4B_IMM:
4440 case AArch64::ST4H_IMM:
4441 case AArch64::ST4W_IMM:
4442 case AArch64::ST4D_IMM:
4443 Scale = TypeSize::getScalable(64);
4444 Width = TypeSize::getScalable(16 * 4);
4445 MinOffset = -8;
4446 MaxOffset = 7;
4447 break;
4448 case AArch64::LD1B_H_IMM:
4449 case AArch64::LD1SB_H_IMM:
4450 case AArch64::LD1H_S_IMM:
4451 case AArch64::LD1SH_S_IMM:
4452 case AArch64::LD1W_D_IMM:
4453 case AArch64::LD1SW_D_IMM:
4454 case AArch64::ST1B_H_IMM:
4455 case AArch64::ST1H_S_IMM:
4456 case AArch64::ST1W_D_IMM:
4457 case AArch64::LDNF1B_H_IMM:
4458 case AArch64::LDNF1SB_H_IMM:
4459 case AArch64::LDNF1H_S_IMM:
4460 case AArch64::LDNF1SH_S_IMM:
4461 case AArch64::LDNF1W_D_IMM:
4462 case AArch64::LDNF1SW_D_IMM:
4463 // A half vector worth of data
4464 // Width = mbytes * elements
4465 Scale = TypeSize::getScalable(8);
4466 Width = TypeSize::getScalable(8);
4467 MinOffset = -8;
4468 MaxOffset = 7;
4469 break;
4470 case AArch64::LD1B_S_IMM:
4471 case AArch64::LD1SB_S_IMM:
4472 case AArch64::LD1H_D_IMM:
4473 case AArch64::LD1SH_D_IMM:
4474 case AArch64::ST1B_S_IMM:
4475 case AArch64::ST1H_D_IMM:
4476 case AArch64::LDNF1B_S_IMM:
4477 case AArch64::LDNF1SB_S_IMM:
4478 case AArch64::LDNF1H_D_IMM:
4479 case AArch64::LDNF1SH_D_IMM:
4480 // A quarter vector worth of data
4481 // Width = mbytes * elements
4482 Scale = TypeSize::getScalable(4);
4483 Width = TypeSize::getScalable(4);
4484 MinOffset = -8;
4485 MaxOffset = 7;
4486 break;
4487 case AArch64::LD1B_D_IMM:
4488 case AArch64::LD1SB_D_IMM:
4489 case AArch64::ST1B_D_IMM:
4490 case AArch64::LDNF1B_D_IMM:
4491 case AArch64::LDNF1SB_D_IMM:
4492 // A eighth vector worth of data
4493 // Width = mbytes * elements
4494 Scale = TypeSize::getScalable(2);
4495 Width = TypeSize::getScalable(2);
4496 MinOffset = -8;
4497 MaxOffset = 7;
4498 break;
4499 case AArch64::ST2Gi:
4500 case AArch64::ST2GPreIndex:
4501 case AArch64::ST2GPostIndex:
4502 case AArch64::STZ2Gi:
4503 case AArch64::STZ2GPreIndex:
4504 case AArch64::STZ2GPostIndex:
4505 Scale = TypeSize::getFixed(16);
4506 Width = TypeSize::getFixed(32);
4507 MinOffset = -256;
4508 MaxOffset = 255;
4509 break;
4510 case AArch64::STGPi:
4511 case AArch64::STGPpost:
4512 case AArch64::STGPpre:
4513 Scale = TypeSize::getFixed(16);
4514 Width = TypeSize::getFixed(16);
4515 MinOffset = -64;
4516 MaxOffset = 63;
4517 break;
4518 case AArch64::LD1RB_IMM:
4519 case AArch64::LD1RB_H_IMM:
4520 case AArch64::LD1RB_S_IMM:
4521 case AArch64::LD1RB_D_IMM:
4522 case AArch64::LD1RSB_H_IMM:
4523 case AArch64::LD1RSB_S_IMM:
4524 case AArch64::LD1RSB_D_IMM:
4525 Scale = TypeSize::getFixed(1);
4526 Width = TypeSize::getFixed(1);
4527 MinOffset = 0;
4528 MaxOffset = 63;
4529 break;
4530 case AArch64::LD1RH_IMM:
4531 case AArch64::LD1RH_S_IMM:
4532 case AArch64::LD1RH_D_IMM:
4533 case AArch64::LD1RSH_S_IMM:
4534 case AArch64::LD1RSH_D_IMM:
4535 Scale = TypeSize::getFixed(2);
4536 Width = TypeSize::getFixed(2);
4537 MinOffset = 0;
4538 MaxOffset = 63;
4539 break;
4540 case AArch64::LD1RW_IMM:
4541 case AArch64::LD1RW_D_IMM:
4542 case AArch64::LD1RSW_IMM:
4543 Scale = TypeSize::getFixed(4);
4544 Width = TypeSize::getFixed(4);
4545 MinOffset = 0;
4546 MaxOffset = 63;
4547 break;
4548 case AArch64::LD1RD_IMM:
4549 Scale = TypeSize::getFixed(8);
4550 Width = TypeSize::getFixed(8);
4551 MinOffset = 0;
4552 MaxOffset = 63;
4553 break;
4554 }
4555
4556 return true;
4557}
4558
4559// Scaling factor for unscaled load or store.
4561 switch (Opc) {
4562 default:
4563 llvm_unreachable("Opcode has unknown scale!");
4564 case AArch64::LDRBBui:
4565 case AArch64::LDURBBi:
4566 case AArch64::LDRSBWui:
4567 case AArch64::LDURSBWi:
4568 case AArch64::STRBBui:
4569 case AArch64::STURBBi:
4570 return 1;
4571 case AArch64::LDRHHui:
4572 case AArch64::LDURHHi:
4573 case AArch64::LDRSHWui:
4574 case AArch64::LDURSHWi:
4575 case AArch64::STRHHui:
4576 case AArch64::STURHHi:
4577 return 2;
4578 case AArch64::LDRSui:
4579 case AArch64::LDURSi:
4580 case AArch64::LDRSpre:
4581 case AArch64::LDRSWui:
4582 case AArch64::LDURSWi:
4583 case AArch64::LDRSWpre:
4584 case AArch64::LDRWpre:
4585 case AArch64::LDRWui:
4586 case AArch64::LDURWi:
4587 case AArch64::STRSui:
4588 case AArch64::STURSi:
4589 case AArch64::STRSpre:
4590 case AArch64::STRWui:
4591 case AArch64::STURWi:
4592 case AArch64::STRWpre:
4593 case AArch64::LDPSi:
4594 case AArch64::LDPSWi:
4595 case AArch64::LDPWi:
4596 case AArch64::STPSi:
4597 case AArch64::STPWi:
4598 return 4;
4599 case AArch64::LDRDui:
4600 case AArch64::LDURDi:
4601 case AArch64::LDRDpre:
4602 case AArch64::LDRXui:
4603 case AArch64::LDURXi:
4604 case AArch64::LDRXpre:
4605 case AArch64::STRDui:
4606 case AArch64::STURDi:
4607 case AArch64::STRDpre:
4608 case AArch64::STRXui:
4609 case AArch64::STURXi:
4610 case AArch64::STRXpre:
4611 case AArch64::LDPDi:
4612 case AArch64::LDPXi:
4613 case AArch64::STPDi:
4614 case AArch64::STPXi:
4615 return 8;
4616 case AArch64::LDRQui:
4617 case AArch64::LDURQi:
4618 case AArch64::STRQui:
4619 case AArch64::STURQi:
4620 case AArch64::STRQpre:
4621 case AArch64::LDPQi:
4622 case AArch64::LDRQpre:
4623 case AArch64::STPQi:
4624 case AArch64::STGi:
4625 case AArch64::STZGi:
4626 case AArch64::ST2Gi:
4627 case AArch64::STZ2Gi:
4628 case AArch64::STGPi:
4629 return 16;
4630 }
4631}
4632
4634 switch (MI.getOpcode()) {
4635 default:
4636 return false;
4637 case AArch64::LDRWpre:
4638 case AArch64::LDRXpre:
4639 case AArch64::LDRSWpre:
4640 case AArch64::LDRSpre:
4641 case AArch64::LDRDpre:
4642 case AArch64::LDRQpre:
4643 return true;
4644 }
4645}
4646
4648 switch (MI.getOpcode()) {
4649 default:
4650 return false;
4651 case AArch64::STRWpre:
4652 case AArch64::STRXpre:
4653 case AArch64::STRSpre:
4654 case AArch64::STRDpre:
4655 case AArch64::STRQpre:
4656 return true;
4657 }
4658}
4659
4661 return isPreLd(MI) || isPreSt(MI);
4662}
4663
4665 switch (MI.getOpcode()) {
4666 default:
4667 return false;
4668 case AArch64::LDPSi:
4669 case AArch64::LDPSWi:
4670 case AArch64::LDPDi:
4671 case AArch64::LDPQi:
4672 case AArch64::LDPWi:
4673 case AArch64::LDPXi:
4674 case AArch64::STPSi:
4675 case AArch64::STPDi:
4676 case AArch64::STPQi:
4677 case AArch64::STPWi:
4678 case AArch64::STPXi:
4679 case AArch64::STGPi:
4680 return true;
4681 }
4682}
4683
4685 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4686 unsigned Idx =
4688 : 1;
4689 return MI.getOperand(Idx);
4690}
4691
4692const MachineOperand &
4694 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4695 unsigned Idx =
4697 : 2;
4698 return MI.getOperand(Idx);
4699}
4700
4701const MachineOperand &
4703 switch (MI.getOpcode()) {
4704 default:
4705 llvm_unreachable("Unexpected opcode");
4706 case AArch64::LDRBroX:
4707 case AArch64::LDRBBroX:
4708 case AArch64::LDRSBXroX:
4709 case AArch64::LDRSBWroX:
4710 case AArch64::LDRHroX:
4711 case AArch64::LDRHHroX:
4712 case AArch64::LDRSHXroX:
4713 case AArch64::LDRSHWroX:
4714 case AArch64::LDRWroX:
4715 case AArch64::LDRSroX:
4716 case AArch64::LDRSWroX:
4717 case AArch64::LDRDroX:
4718 case AArch64::LDRXroX:
4719 case AArch64::LDRQroX:
4720 return MI.getOperand(4);
4721 }
4722}
4723
4725 Register Reg) {
4726 if (MI.getParent() == nullptr)
4727 return nullptr;
4728 const MachineFunction *MF = MI.getParent()->getParent();
4729 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4730}
4731
4733 auto IsHFPR = [&](const MachineOperand &Op) {
4734 if (!Op.isReg())
4735 return false;
4736 auto Reg = Op.getReg();
4737 if (Reg.isPhysical())
4738 return AArch64::FPR16RegClass.contains(Reg);
4739 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4740 return TRC == &AArch64::FPR16RegClass ||
4741 TRC == &AArch64::FPR16_loRegClass;
4742 };
4743 return llvm::any_of(MI.operands(), IsHFPR);
4744}
4745
4747 auto IsQFPR = [&](const MachineOperand &Op) {
4748 if (!Op.isReg())
4749 return false;
4750 auto Reg = Op.getReg();
4751 if (Reg.isPhysical())
4752 return AArch64::FPR128RegClass.contains(Reg);
4753 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4754 return TRC == &AArch64::FPR128RegClass ||
4755 TRC == &AArch64::FPR128_loRegClass;
4756 };
4757 return llvm::any_of(MI.operands(), IsQFPR);
4758}
4759
4761 switch (MI.getOpcode()) {
4762 case AArch64::BRK:
4763 case AArch64::HLT:
4764 case AArch64::PACIASP:
4765 case AArch64::PACIBSP:
4766 // Implicit BTI behavior.
4767 return true;
4768 case AArch64::PAUTH_PROLOGUE:
4769 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4770 return true;
4771 case AArch64::HINT: {
4772 unsigned Imm = MI.getOperand(0).getImm();
4773 // Explicit BTI instruction.
4774 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4775 return true;
4776 // PACI(A|B)SP instructions.
4777 if (Imm == 25 || Imm == 27)
4778 return true;
4779 return false;
4780 }
4781 default:
4782 return false;
4783 }
4784}
4785
4787 if (Reg == 0)
4788 return false;
4789 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4790 return AArch64::FPR128RegClass.contains(Reg) ||
4791 AArch64::FPR64RegClass.contains(Reg) ||
4792 AArch64::FPR32RegClass.contains(Reg) ||
4793 AArch64::FPR16RegClass.contains(Reg) ||
4794 AArch64::FPR8RegClass.contains(Reg);
4795}
4796
4798 auto IsFPR = [&](const MachineOperand &Op) {
4799 if (!Op.isReg())
4800 return false;
4801 auto Reg = Op.getReg();
4802 if (Reg.isPhysical())
4803 return isFpOrNEON(Reg);
4804
4805 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4806 return TRC == &AArch64::FPR128RegClass ||
4807 TRC == &AArch64::FPR128_loRegClass ||
4808 TRC == &AArch64::FPR64RegClass ||
4809 TRC == &AArch64::FPR64_loRegClass ||
4810 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4811 TRC == &AArch64::FPR8RegClass;
4812 };
4813 return llvm::any_of(MI.operands(), IsFPR);
4814}
4815
4816// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4817// scaled.
4818static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4820
4821 // If the byte-offset isn't a multiple of the stride, we can't scale this
4822 // offset.
4823 if (Offset % Scale != 0)
4824 return false;
4825
4826 // Convert the byte-offset used by unscaled into an "element" offset used
4827 // by the scaled pair load/store instructions.
4828 Offset /= Scale;
4829 return true;
4830}
4831
4832static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4833 if (FirstOpc == SecondOpc)
4834 return true;
4835 // We can also pair sign-ext and zero-ext instructions.
4836 switch (FirstOpc) {
4837 default:
4838 return false;
4839 case AArch64::STRSui:
4840 case AArch64::STURSi:
4841 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4842 case AArch64::STRDui:
4843 case AArch64::STURDi:
4844 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4845 case AArch64::STRQui:
4846 case AArch64::STURQi:
4847 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4848 case AArch64::STRWui:
4849 case AArch64::STURWi:
4850 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4851 case AArch64::STRXui:
4852 case AArch64::STURXi:
4853 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4854 case AArch64::LDRSui:
4855 case AArch64::LDURSi:
4856 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4857 case AArch64::LDRDui:
4858 case AArch64::LDURDi:
4859 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4860 case AArch64::LDRQui:
4861 case AArch64::LDURQi:
4862 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4863 case AArch64::LDRWui:
4864 case AArch64::LDURWi:
4865 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4866 case AArch64::LDRSWui:
4867 case AArch64::LDURSWi:
4868 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4869 case AArch64::LDRXui:
4870 case AArch64::LDURXi:
4871 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4872 }
4873 // These instructions can't be paired based on their opcodes.
4874 return false;
4875}
4876
4877static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4878 int64_t Offset1, unsigned Opcode1, int FI2,
4879 int64_t Offset2, unsigned Opcode2) {
4880 // Accesses through fixed stack object frame indices may access a different
4881 // fixed stack slot. Check that the object offsets + offsets match.
4882 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4883 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4884 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4885 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4886 // Convert to scaled object offsets.
4887 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4888 if (ObjectOffset1 % Scale1 != 0)
4889 return false;
4890 ObjectOffset1 /= Scale1;
4891 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4892 if (ObjectOffset2 % Scale2 != 0)
4893 return false;
4894 ObjectOffset2 /= Scale2;
4895 ObjectOffset1 += Offset1;
4896 ObjectOffset2 += Offset2;
4897 return ObjectOffset1 + 1 == ObjectOffset2;
4898 }
4899
4900 return FI1 == FI2;
4901}
4902
4903/// Detect opportunities for ldp/stp formation.
4904///
4905/// Only called for LdSt for which getMemOperandWithOffset returns true.
4907 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4908 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4909 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4910 unsigned NumBytes) const {
4911 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4912 const MachineOperand &BaseOp1 = *BaseOps1.front();
4913 const MachineOperand &BaseOp2 = *BaseOps2.front();
4914 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4915 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4916 if (BaseOp1.getType() != BaseOp2.getType())
4917 return false;
4918
4919 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4920 "Only base registers and frame indices are supported.");
4921
4922 // Check for both base regs and base FI.
4923 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4924 return false;
4925
4926 // Only cluster up to a single pair.
4927 if (ClusterSize > 2)
4928 return false;
4929
4930 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4931 return false;
4932
4933 // Can we pair these instructions based on their opcodes?
4934 unsigned FirstOpc = FirstLdSt.getOpcode();
4935 unsigned SecondOpc = SecondLdSt.getOpcode();
4936 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4937 return false;
4938
4939 // Can't merge volatiles or load/stores that have a hint to avoid pair
4940 // formation, for example.
4941 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4942 !isCandidateToMergeOrPair(SecondLdSt))
4943 return false;
4944
4945 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4946 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4947 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4948 return false;
4949
4950 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4951 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4952 return false;
4953
4954 // Pairwise instructions have a 7-bit signed offset field.
4955 if (Offset1 > 63 || Offset1 < -64)
4956 return false;
4957
4958 // The caller should already have ordered First/SecondLdSt by offset.
4959 // Note: except for non-equal frame index bases
4960 if (BaseOp1.isFI()) {
4961 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4962 "Caller should have ordered offsets.");
4963
4964 const MachineFrameInfo &MFI =
4965 FirstLdSt.getParent()->getParent()->getFrameInfo();
4966 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4967 BaseOp2.getIndex(), Offset2, SecondOpc);
4968 }
4969
4970 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4971
4972 return Offset1 + 1 == Offset2;
4973}
4974
4976 MCRegister Reg, unsigned SubIdx,
4977 unsigned State,
4978 const TargetRegisterInfo *TRI) {
4979 if (!SubIdx)
4980 return MIB.addReg(Reg, State);
4981
4982 if (Reg.isPhysical())
4983 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4984 return MIB.addReg(Reg, State, SubIdx);
4985}
4986
4987static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4988 unsigned NumRegs) {
4989 // We really want the positive remainder mod 32 here, that happens to be
4990 // easily obtainable with a mask.
4991 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4992}
4993
4996 const DebugLoc &DL, MCRegister DestReg,
4997 MCRegister SrcReg, bool KillSrc,
4998 unsigned Opcode,
4999 ArrayRef<unsigned> Indices) const {
5000 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5002 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5003 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5004 unsigned NumRegs = Indices.size();
5005
5006 int SubReg = 0, End = NumRegs, Incr = 1;
5007 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5008 SubReg = NumRegs - 1;
5009 End = -1;
5010 Incr = -1;
5011 }
5012
5013 for (; SubReg != End; SubReg += Incr) {
5014 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5015 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5016 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5017 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5018 }
5019}
5020
5023 const DebugLoc &DL, MCRegister DestReg,
5024 MCRegister SrcReg, bool KillSrc,
5025 unsigned Opcode, unsigned ZeroReg,
5026 llvm::ArrayRef<unsigned> Indices) const {
5028 unsigned NumRegs = Indices.size();
5029
5030#ifndef NDEBUG
5031 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5032 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5033 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5034 "GPR reg sequences should not be able to overlap");
5035#endif
5036
5037 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5038 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5039 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5040 MIB.addReg(ZeroReg);
5041 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5042 MIB.addImm(0);
5043 }
5044}
5045
5048 const DebugLoc &DL, Register DestReg,
5049 Register SrcReg, bool KillSrc,
5050 bool RenamableDest,
5051 bool RenamableSrc) const {
5052 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5053 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
5055
5056 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5057 // If either operand is WSP, expand to ADD #0.
5058 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5059 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5060 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5061 MCRegister DestRegX = TRI->getMatchingSuperReg(
5062 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5063 MCRegister SrcRegX = TRI->getMatchingSuperReg(
5064 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5065 // This instruction is reading and writing X registers. This may upset
5066 // the register scavenger and machine verifier, so we need to indicate
5067 // that we are reading an undefined value from SrcRegX, but a proper
5068 // value from SrcReg.
5069 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5070 .addReg(SrcRegX, RegState::Undef)
5071 .addImm(0)
5073 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5074 } else {
5075 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5076 .addReg(SrcReg, getKillRegState(KillSrc))
5077 .addImm(0)
5079 }
5080 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
5081 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5082 .addImm(0)
5084 } else {
5085 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5086 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5087 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5088 MCRegister DestRegX = TRI->getMatchingSuperReg(
5089 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5090 assert(DestRegX.isValid() && "Destination super-reg not valid");
5091 MCRegister SrcRegX =
5092 SrcReg == AArch64::WZR
5093 ? AArch64::XZR
5094 : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
5095 &AArch64::GPR64spRegClass);
5096 assert(SrcRegX.isValid() && "Source super-reg not valid");
5097 // This instruction is reading and writing X registers. This may upset
5098 // the register scavenger and machine verifier, so we need to indicate
5099 // that we are reading an undefined value from SrcRegX, but a proper
5100 // value from SrcReg.
5101 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5102 .addReg(AArch64::XZR)
5103 .addReg(SrcRegX, RegState::Undef)
5104 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5105 } else {
5106 // Otherwise, expand to ORR WZR.
5107 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5108 .addReg(AArch64::WZR)
5109 .addReg(SrcReg, getKillRegState(KillSrc));
5110 }
5111 }
5112 return;
5113 }
5114
5115 // Copy a Predicate register by ORRing with itself.
5116 if (AArch64::PPRRegClass.contains(DestReg) &&
5117 AArch64::PPRRegClass.contains(SrcReg)) {
5118 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5119 "Unexpected SVE register.");
5120 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5121 .addReg(SrcReg) // Pg
5122 .addReg(SrcReg)
5123 .addReg(SrcReg, getKillRegState(KillSrc));
5124 return;
5125 }
5126
5127 // Copy a predicate-as-counter register by ORRing with itself as if it
5128 // were a regular predicate (mask) register.
5129 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5130 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5131 if (DestIsPNR || SrcIsPNR) {
5132 auto ToPPR = [](MCRegister R) -> MCRegister {
5133 return (R - AArch64::PN0) + AArch64::P0;
5134 };
5135 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5136 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5137
5138 if (PPRSrcReg != PPRDestReg) {
5139 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5140 .addReg(PPRSrcReg) // Pg
5141 .addReg(PPRSrcReg)
5142 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5143 if (DestIsPNR)
5144 NewMI.addDef(DestReg, RegState::Implicit);
5145 }
5146 return;
5147 }
5148
5149 // Copy a Z register by ORRing with itself.
5150 if (AArch64::ZPRRegClass.contains(DestReg) &&
5151 AArch64::ZPRRegClass.contains(SrcReg)) {
5152 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5153 "Unexpected SVE register.");
5154 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5155 .addReg(SrcReg)
5156 .addReg(SrcReg, getKillRegState(KillSrc));
5157 return;
5158 }
5159
5160 // Copy a Z register pair by copying the individual sub-registers.
5161 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5162 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5163 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5164 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5165 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5166 "Unexpected SVE register.");
5167 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5168 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5169 Indices);
5170 return;
5171 }
5172
5173 // Copy a Z register triple by copying the individual sub-registers.
5174 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5175 AArch64::ZPR3RegClass.contains(SrcReg)) {
5176 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5177 "Unexpected SVE register.");
5178 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5179 AArch64::zsub2};
5180 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5181 Indices);
5182 return;
5183 }
5184
5185 // Copy a Z register quad by copying the individual sub-registers.
5186 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5187 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5188 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5189 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5190 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5191 "Unexpected SVE register.");
5192 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5193 AArch64::zsub2, AArch64::zsub3};
5194 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5195 Indices);
5196 return;
5197 }
5198
5199 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5200 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5201 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5202 // If either operand is SP, expand to ADD #0.
5203 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5204 .addReg(SrcReg, getKillRegState(KillSrc))
5205 .addImm(0)
5207 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
5208 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5209 .addImm(0)
5211 } else {
5212 // Otherwise, expand to ORR XZR.
5213 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5214 .addReg(AArch64::XZR)
5215 .addReg(SrcReg, getKillRegState(KillSrc));
5216 }
5217 return;
5218 }
5219
5220 // Copy a DDDD register quad by copying the individual sub-registers.
5221 if (AArch64::DDDDRegClass.contains(DestReg) &&
5222 AArch64::DDDDRegClass.contains(SrcReg)) {
5223 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5224 AArch64::dsub2, AArch64::dsub3};
5225 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5226 Indices);
5227 return;
5228 }
5229
5230 // Copy a DDD register triple by copying the individual sub-registers.
5231 if (AArch64::DDDRegClass.contains(DestReg) &&
5232 AArch64::DDDRegClass.contains(SrcReg)) {
5233 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5234 AArch64::dsub2};
5235 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5236 Indices);
5237 return;
5238 }
5239
5240 // Copy a DD register pair by copying the individual sub-registers.
5241 if (AArch64::DDRegClass.contains(DestReg) &&
5242 AArch64::DDRegClass.contains(SrcReg)) {
5243 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5244 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5245 Indices);
5246 return;
5247 }
5248
5249 // Copy a QQQQ register quad by copying the individual sub-registers.
5250 if (AArch64::QQQQRegClass.contains(DestReg) &&
5251 AArch64::QQQQRegClass.contains(SrcReg)) {
5252 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5253 AArch64::qsub2, AArch64::qsub3};
5254 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5255 Indices);
5256 return;
5257 }
5258
5259 // Copy a QQQ register triple by copying the individual sub-registers.
5260 if (AArch64::QQQRegClass.contains(DestReg) &&
5261 AArch64::QQQRegClass.contains(SrcReg)) {
5262 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5263 AArch64::qsub2};
5264 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5265 Indices);
5266 return;
5267 }
5268
5269 // Copy a QQ register pair by copying the individual sub-registers.
5270 if (AArch64::QQRegClass.contains(DestReg) &&
5271 AArch64::QQRegClass.contains(SrcReg)) {
5272 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5273 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5274 Indices);
5275 return;
5276 }
5277
5278 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5279 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5280 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5281 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5282 AArch64::XZR, Indices);
5283 return;
5284 }
5285
5286 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5287 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5288 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5289 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5290 AArch64::WZR, Indices);
5291 return;
5292 }
5293
5294 if (AArch64::FPR128RegClass.contains(DestReg) &&
5295 AArch64::FPR128RegClass.contains(SrcReg)) {
5296 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5297 !Subtarget.isNeonAvailable())
5298 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5299 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5300 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5301 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5302 else if (Subtarget.isNeonAvailable())
5303 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5304 .addReg(SrcReg)
5305 .addReg(SrcReg, getKillRegState(KillSrc));
5306 else {
5307 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5308 .addReg(AArch64::SP, RegState::Define)
5309 .addReg(SrcReg, getKillRegState(KillSrc))
5310 .addReg(AArch64::SP)
5311 .addImm(-16);
5312 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5313 .addReg(AArch64::SP, RegState::Define)
5314 .addReg(DestReg, RegState::Define)
5315 .addReg(AArch64::SP)
5316 .addImm(16);
5317 }
5318 return;
5319 }
5320
5321 if (AArch64::FPR64RegClass.contains(DestReg) &&
5322 AArch64::FPR64RegClass.contains(SrcReg)) {
5323 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5324 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5325 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5327 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
5328 &AArch64::FPR128RegClass);
5329 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
5330 &AArch64::FPR128RegClass);
5331 // This instruction is reading and writing Q registers. This may upset
5332 // the register scavenger and machine verifier, so we need to indicate
5333 // that we are reading an undefined value from SrcRegQ, but a proper
5334 // value from SrcReg.
5335 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5336 .addReg(SrcRegQ, RegState::Undef)
5337 .addReg(SrcRegQ, RegState::Undef)
5338 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5339 } else {
5340 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5341 .addReg(SrcReg, getKillRegState(KillSrc));
5342 }
5343 return;
5344 }
5345
5346 if (AArch64::FPR32RegClass.contains(DestReg) &&
5347 AArch64::FPR32RegClass.contains(SrcReg)) {
5348 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5349 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5350 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5352 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5353 &AArch64::FPR128RegClass);
5354 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5355 &AArch64::FPR128RegClass);
5356 // This instruction is reading and writing Q registers. This may upset
5357 // the register scavenger and machine verifier, so we need to indicate
5358 // that we are reading an undefined value from SrcRegQ, but a proper
5359 // value from SrcReg.
5360 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5361 .addReg(SrcRegQ, RegState::Undef)
5362 .addReg(SrcRegQ, RegState::Undef)
5363 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5364 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5365 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5367 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5368 &AArch64::FPR64RegClass);
5369 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5370 &AArch64::FPR64RegClass);
5371 // This instruction is reading and writing D registers. This may upset
5372 // the register scavenger and machine verifier, so we need to indicate
5373 // that we are reading an undefined value from SrcRegD, but a proper
5374 // value from SrcReg.
5375 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5376 .addReg(SrcRegD, RegState::Undef)
5377 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5378 } else {
5379 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5380 .addReg(SrcReg, getKillRegState(KillSrc));
5381 }
5382 return;
5383 }
5384
5385 if (AArch64::FPR16RegClass.contains(DestReg) &&
5386 AArch64::FPR16RegClass.contains(SrcReg)) {
5387 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5388 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5389 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5391 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5392 &AArch64::FPR128RegClass);
5393 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5394 &AArch64::FPR128RegClass);
5395 // This instruction is reading and writing Q registers. This may upset
5396 // the register scavenger and machine verifier, so we need to indicate
5397 // that we are reading an undefined value from SrcRegQ, but a proper
5398 // value from SrcReg.
5399 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5400 .addReg(SrcRegQ, RegState::Undef)
5401 .addReg(SrcRegQ, RegState::Undef)
5402 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5403 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5404 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5406 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5407 &AArch64::FPR64RegClass);
5408 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5409 &AArch64::FPR64RegClass);
5410 // This instruction is reading and writing D registers. This may upset
5411 // the register scavenger and machine verifier, so we need to indicate
5412 // that we are reading an undefined value from SrcRegD, but a proper
5413 // value from SrcReg.
5414 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5415 .addReg(SrcRegD, RegState::Undef)
5416 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5417 } else {
5418 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5419 &AArch64::FPR32RegClass);
5420 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5421 &AArch64::FPR32RegClass);
5422 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5423 .addReg(SrcReg, getKillRegState(KillSrc));
5424 }
5425 return;
5426 }
5427
5428 if (AArch64::FPR8RegClass.contains(DestReg) &&
5429 AArch64::FPR8RegClass.contains(SrcReg)) {
5430 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5431 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5432 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5434 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5435 &AArch64::FPR128RegClass);
5436 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5437 &AArch64::FPR128RegClass);
5438 // This instruction is reading and writing Q registers. This may upset
5439 // the register scavenger and machine verifier, so we need to indicate
5440 // that we are reading an undefined value from SrcRegQ, but a proper
5441 // value from SrcReg.
5442 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5443 .addReg(SrcRegQ, RegState::Undef)
5444 .addReg(SrcRegQ, RegState::Undef)
5445 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5446 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5447 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5449 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5450 &AArch64::FPR64RegClass);
5451 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5452 &AArch64::FPR64RegClass);
5453 // This instruction is reading and writing D registers. This may upset
5454 // the register scavenger and machine verifier, so we need to indicate
5455 // that we are reading an undefined value from SrcRegD, but a proper
5456 // value from SrcReg.
5457 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5458 .addReg(SrcRegD, RegState::Undef)
5459 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5460 } else {
5461 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5462 &AArch64::FPR32RegClass);
5463 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5464 &AArch64::FPR32RegClass);
5465 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5466 .addReg(SrcReg, getKillRegState(KillSrc));
5467 }
5468 return;
5469 }
5470
5471 // Copies between GPR64 and FPR64.
5472 if (AArch64::FPR64RegClass.contains(DestReg) &&
5473 AArch64::GPR64RegClass.contains(SrcReg)) {
5474 if (AArch64::XZR == SrcReg) {
5475 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5476 } else {
5477 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5478 .addReg(SrcReg, getKillRegState(KillSrc));
5479 }
5480 return;
5481 }
5482 if (AArch64::GPR64RegClass.contains(DestReg) &&
5483 AArch64::FPR64RegClass.contains(SrcReg)) {
5484 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5485 .addReg(SrcReg, getKillRegState(KillSrc));
5486 return;
5487 }
5488 // Copies between GPR32 and FPR32.
5489 if (AArch64::FPR32RegClass.contains(DestReg) &&
5490 AArch64::GPR32RegClass.contains(SrcReg)) {
5491 if (AArch64::WZR == SrcReg) {
5492 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5493 } else {
5494 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5495 .addReg(SrcReg, getKillRegState(KillSrc));
5496 }
5497 return;
5498 }
5499 if (AArch64::GPR32RegClass.contains(DestReg) &&
5500 AArch64::FPR32RegClass.contains(SrcReg)) {
5501 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5502 .addReg(SrcReg, getKillRegState(KillSrc));
5503 return;
5504 }
5505
5506 if (DestReg == AArch64::NZCV) {
5507 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5508 BuildMI(MBB, I, DL, get(AArch64::MSR))
5509 .addImm(AArch64SysReg::NZCV)
5510 .addReg(SrcReg, getKillRegState(KillSrc))
5511 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5512 return;
5513 }
5514
5515 if (SrcReg == AArch64::NZCV) {
5516 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5517 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5518 .addImm(AArch64SysReg::NZCV)
5519 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5520 return;
5521 }
5522
5523#ifndef NDEBUG
5525 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5526 << TRI.getRegAsmName(SrcReg) << "\n";
5527#endif
5528 llvm_unreachable("unimplemented reg-to-reg copy");
5529}
5530
5533 MachineBasicBlock::iterator InsertBefore,
5534 const MCInstrDesc &MCID,
5535 Register SrcReg, bool IsKill,
5536 unsigned SubIdx0, unsigned SubIdx1, int FI,
5537 MachineMemOperand *MMO) {
5538 Register SrcReg0 = SrcReg;
5539 Register SrcReg1 = SrcReg;
5540 if (SrcReg.isPhysical()) {
5541 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5542 SubIdx0 = 0;
5543 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5544 SubIdx1 = 0;
5545 }
5546 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5547 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5548 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5549 .addFrameIndex(FI)
5550 .addImm(0)
5551 .addMemOperand(MMO);
5552}
5553
5556 Register SrcReg, bool isKill, int FI,
5557 const TargetRegisterClass *RC,
5558 const TargetRegisterInfo *TRI,
5559 Register VReg,
5560 MachineInstr::MIFlag Flags) const {
5561 MachineFunction &MF = *MBB.getParent();
5562 MachineFrameInfo &MFI = MF.getFrameInfo();
5563
5565 MachineMemOperand *MMO =
5567 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5568 unsigned Opc = 0;
5569 bool Offset = true;
5571 unsigned StackID = TargetStackID::Default;
5572 switch (TRI->getSpillSize(*RC)) {
5573 case 1:
5574 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5575 Opc = AArch64::STRBui;
5576 break;
5577 case 2: {
5578 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5579 Opc = AArch64::STRHui;
5580 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5581 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5582 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5583 "Unexpected register store without SVE store instructions");
5584 Opc = AArch64::STR_PXI;
5586 }
5587 break;
5588 }
5589 case 4:
5590 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5591 Opc = AArch64::STRWui;
5592 if (SrcReg.isVirtual())
5593 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5594 else
5595 assert(SrcReg != AArch64::WSP);
5596 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5597 Opc = AArch64::STRSui;
5598 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5599 Opc = AArch64::STR_PPXI;
5601 }
5602 break;
5603 case 8:
5604 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5605 Opc = AArch64::STRXui;
5606 if (SrcReg.isVirtual())
5607 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5608 else
5609 assert(SrcReg != AArch64::SP);
5610 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5611 Opc = AArch64::STRDui;
5612 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5614 get(AArch64::STPWi), SrcReg, isKill,
5615 AArch64::sube32, AArch64::subo32, FI, MMO);
5616 return;
5617 }
5618 break;
5619 case 16:
5620 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5621 Opc = AArch64::STRQui;
5622 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5623 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5624 Opc = AArch64::ST1Twov1d;
5625 Offset = false;
5626 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5628 get(AArch64::STPXi), SrcReg, isKill,
5629 AArch64::sube64, AArch64::subo64, FI, MMO);
5630 return;
5631 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5632 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5633 "Unexpected register store without SVE store instructions");
5634 Opc = AArch64::STR_ZXI;
5636 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5637 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5638 "Unexpected predicate store without SVE store instructions");
5639 Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5641 }
5642 break;
5643 case 24:
5644 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5645 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5646 Opc = AArch64::ST1Threev1d;
5647 Offset = false;
5648 }
5649 break;
5650 case 32:
5651 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5652 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5653 Opc = AArch64::ST1Fourv1d;
5654 Offset = false;
5655 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5656 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5657 Opc = AArch64::ST1Twov2d;
5658 Offset = false;
5659 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5660 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5661 "Unexpected register store without SVE store instructions");
5662 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5664 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5665 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5666 "Unexpected register store without SVE store instructions");
5667 Opc = AArch64::STR_ZZXI;
5669 }
5670 break;
5671 case 48:
5672 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5673 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5674 Opc = AArch64::ST1Threev2d;
5675 Offset = false;
5676 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5677 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5678 "Unexpected register store without SVE store instructions");
5679 Opc = AArch64::STR_ZZZXI;
5681 }
5682 break;
5683 case 64:
5684 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5685 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5686 Opc = AArch64::ST1Fourv2d;
5687 Offset = false;
5688 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5689 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5690 "Unexpected register store without SVE store instructions");
5691 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5693 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5694 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5695 "Unexpected register store without SVE store instructions");
5696 Opc = AArch64::STR_ZZZZXI;
5698 }
5699 break;
5700 }
5701 assert(Opc && "Unknown register class");
5702 MFI.setStackID(FI, StackID);
5703
5705 .addReg(SrcReg, getKillRegState(isKill))
5706 .addFrameIndex(FI);
5707
5708 if (Offset)
5709 MI.addImm(0);
5710 if (PNRReg.isValid())
5711 MI.addDef(PNRReg, RegState::Implicit);
5712 MI.addMemOperand(MMO);
5713}
5714
5717 MachineBasicBlock::iterator InsertBefore,
5718 const MCInstrDesc &MCID,
5719 Register DestReg, unsigned SubIdx0,
5720 unsigned SubIdx1, int FI,
5721 MachineMemOperand *MMO) {
5722 Register DestReg0 = DestReg;
5723 Register DestReg1 = DestReg;
5724 bool IsUndef = true;
5725 if (DestReg.isPhysical()) {
5726 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5727 SubIdx0 = 0;
5728 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5729 SubIdx1 = 0;
5730 IsUndef = false;
5731 }
5732 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5733 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5734 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5735 .addFrameIndex(FI)
5736 .addImm(0)
5737 .addMemOperand(MMO);
5738}
5739
5742 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5743 Register VReg, MachineInstr::MIFlag Flags) const {
5744 MachineFunction &MF = *MBB.getParent();
5745 MachineFrameInfo &MFI = MF.getFrameInfo();
5747 MachineMemOperand *MMO =
5749 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5750
5751 unsigned Opc = 0;
5752 bool Offset = true;
5753 unsigned StackID = TargetStackID::Default;
5755 switch (TRI->getSpillSize(*RC)) {
5756 case 1:
5757 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5758 Opc = AArch64::LDRBui;
5759 break;
5760 case 2: {
5761 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5762 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5763 Opc = AArch64::LDRHui;
5764 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5765 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5766 "Unexpected register load without SVE load instructions");
5767 if (IsPNR)
5768 PNRReg = DestReg;
5769 Opc = AArch64::LDR_PXI;
5771 }
5772 break;
5773 }
5774 case 4:
5775 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5776 Opc = AArch64::LDRWui;
5777 if (DestReg.isVirtual())
5778 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5779 else
5780 assert(DestReg != AArch64::WSP);
5781 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5782 Opc = AArch64::LDRSui;
5783 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5784 Opc = AArch64::LDR_PPXI;
5786 }
5787 break;
5788 case 8:
5789 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5790 Opc = AArch64::LDRXui;
5791 if (DestReg.isVirtual())
5792 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5793 else
5794 assert(DestReg != AArch64::SP);
5795 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5796 Opc = AArch64::LDRDui;
5797 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5799 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5800 AArch64::subo32, FI, MMO);
5801 return;
5802 }
5803 break;
5804 case 16:
5805 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5806 Opc = AArch64::LDRQui;
5807 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5808 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5809 Opc = AArch64::LD1Twov1d;
5810 Offset = false;
5811 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5813 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5814 AArch64::subo64, FI, MMO);
5815 return;
5816 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5817 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5818 "Unexpected register load without SVE load instructions");
5819 Opc = AArch64::LDR_ZXI;
5821 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5822 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5823 "Unexpected predicate load without SVE load instructions");
5824 Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5826 }
5827 break;
5828 case 24:
5829 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5830 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5831 Opc = AArch64::LD1Threev1d;
5832 Offset = false;
5833 }
5834 break;
5835 case 32:
5836 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5837 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5838 Opc = AArch64::LD1Fourv1d;
5839 Offset = false;
5840 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5841 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5842 Opc = AArch64::LD1Twov2d;
5843 Offset = false;
5844 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5845 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5846 "Unexpected register load without SVE load instructions");
5847 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5849 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5850 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5851 "Unexpected register load without SVE load instructions");
5852 Opc = AArch64::LDR_ZZXI;
5854 }
5855 break;
5856 case 48:
5857 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5858 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5859 Opc = AArch64::LD1Threev2d;
5860 Offset = false;
5861 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5862 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5863 "Unexpected register load without SVE load instructions");
5864 Opc = AArch64::LDR_ZZZXI;
5866 }
5867 break;
5868 case 64:
5869 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5870 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5871 Opc = AArch64::LD1Fourv2d;
5872 Offset = false;
5873 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5874 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5875 "Unexpected register load without SVE load instructions");
5876 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5878 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5879 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5880 "Unexpected register load without SVE load instructions");
5881 Opc = AArch64::LDR_ZZZZXI;
5883 }
5884 break;
5885 }
5886
5887 assert(Opc && "Unknown register class");
5888 MFI.setStackID(FI, StackID);
5889
5891 .addReg(DestReg, getDefRegState(true))
5892 .addFrameIndex(FI);
5893 if (Offset)
5894 MI.addImm(0);
5895 if (PNRReg.isValid() && !PNRReg.isVirtual())
5896 MI.addDef(PNRReg, RegState::Implicit);
5897 MI.addMemOperand(MMO);
5898}
5899
5901 const MachineInstr &UseMI,
5902 const TargetRegisterInfo *TRI) {
5903 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5904 UseMI.getIterator()),
5905 [TRI](const MachineInstr &I) {
5906 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5907 I.readsRegister(AArch64::NZCV, TRI);
5908 });
5909}
5910
5911void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5912 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5913 // The smallest scalable element supported by scaled SVE addressing
5914 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5915 // byte offset must always be a multiple of 2.
5916 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5917
5918 // VGSized offsets are divided by '2', because the VG register is the
5919 // the number of 64bit granules as opposed to 128bit vector chunks,
5920 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5921 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5922 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5923 ByteSized = Offset.getFixed();
5924 VGSized = Offset.getScalable() / 2;
5925}
5926
5927/// Returns the offset in parts to which this frame offset can be
5928/// decomposed for the purpose of describing a frame offset.
5929/// For non-scalable offsets this is simply its byte size.
5930void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5931 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5932 int64_t &NumDataVectors) {
5933 // The smallest scalable element supported by scaled SVE addressing
5934 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5935 // byte offset must always be a multiple of 2.
5936 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5937
5938 NumBytes = Offset.getFixed();
5939 NumDataVectors = 0;
5940 NumPredicateVectors = Offset.getScalable() / 2;
5941 // This method is used to get the offsets to adjust the frame offset.
5942 // If the function requires ADDPL to be used and needs more than two ADDPL
5943 // instructions, part of the offset is folded into NumDataVectors so that it
5944 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5945 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5946 NumPredicateVectors > 62) {
5947 NumDataVectors = NumPredicateVectors / 8;
5948 NumPredicateVectors -= NumDataVectors * 8;
5949 }
5950}
5951
5952// Convenience function to create a DWARF expression for: Constant `Operation`.
5953// This helper emits compact sequences for common cases. For example, for`-15
5954// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
5957 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
5958 // -Constant (1 to 31)
5959 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
5960 Operation = dwarf::DW_OP_minus;
5961 } else if (Constant >= 0 && Constant <= 31) {
5962 // Literal value 0 to 31
5963 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
5964 } else {
5965 // Signed constant
5966 Expr.push_back(dwarf::DW_OP_consts);
5968 }
5969 return Expr.push_back(Operation);
5970}
5971
5972// Convenience function to create a DWARF expression for a register.
5973static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
5974 Expr.push_back((char)dwarf::DW_OP_bregx);
5976 Expr.push_back(0);
5977}
5978
5979// Convenience function to create a DWARF expression for loading a register from
5980// a CFA offset.
5982 int64_t OffsetFromDefCFA) {
5983 // This assumes the top of the DWARF stack contains the CFA.
5984 Expr.push_back(dwarf::DW_OP_dup);
5985 // Add the offset to the register.
5986 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
5987 // Dereference the address (loads a 64 bit value)..
5988 Expr.push_back(dwarf::DW_OP_deref);
5989}
5990
5991// Convenience function to create a comment for
5992// (+/-) NumBytes (* RegScale)?
5993static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
5994 StringRef RegScale = {}) {
5995 if (NumBytes) {
5996 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5997 if (!RegScale.empty())
5998 Comment << ' ' << RegScale;
5999 }
6000}
6001
6002// Creates an MCCFIInstruction:
6003// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6005 unsigned Reg,
6006 const StackOffset &Offset) {
6007 int64_t NumBytes, NumVGScaledBytes;
6008 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6009 NumVGScaledBytes);
6010 std::string CommentBuffer;
6011 llvm::raw_string_ostream Comment(CommentBuffer);
6012
6013 if (Reg == AArch64::SP)
6014 Comment << "sp";
6015 else if (Reg == AArch64::FP)
6016 Comment << "fp";
6017 else
6018 Comment << printReg(Reg, &TRI);
6019
6020 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6021 SmallString<64> Expr;
6022 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6023 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6024 // Reg + NumBytes
6025 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6026 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6027 appendOffsetComment(NumBytes, Comment);
6028 if (NumVGScaledBytes) {
6029 // + VG * NumVGScaledBytes
6030 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6031 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6032 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6033 Expr.push_back(dwarf::DW_OP_plus);
6034 }
6035
6036 // Wrap this into DW_CFA_def_cfa.
6037 SmallString<64> DefCfaExpr;
6038 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6039 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6040 DefCfaExpr.append(Expr.str());
6041 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6042 Comment.str());
6043}
6044
6046 unsigned FrameReg, unsigned Reg,
6047 const StackOffset &Offset,
6048 bool LastAdjustmentWasScalable) {
6049 if (Offset.getScalable())
6050 return createDefCFAExpression(TRI, Reg, Offset);
6051
6052 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6053 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6054
6055 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6056 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6057}
6058
6061 const StackOffset &OffsetFromDefCFA,
6062 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6063 int64_t NumBytes, NumVGScaledBytes;
6064 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6065 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6066
6067 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6068
6069 // Non-scalable offsets can use DW_CFA_offset directly.
6070 if (!NumVGScaledBytes)
6071 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6072
6073 std::string CommentBuffer;
6074 llvm::raw_string_ostream Comment(CommentBuffer);
6075 Comment << printReg(Reg, &TRI) << " @ cfa";
6076
6077 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6078 assert(NumVGScaledBytes && "Expected scalable offset");
6079 SmallString<64> OffsetExpr;
6080 // + VG * NumVGScaledBytes
6081 StringRef VGRegScale;
6082 if (IncomingVGOffsetFromDefCFA) {
6083 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6084 VGRegScale = "* IncomingVG";
6085 } else {
6086 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6087 VGRegScale = "* VG";
6088 }
6089 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6090 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6091 OffsetExpr.push_back(dwarf::DW_OP_plus);
6092 if (NumBytes) {
6093 // + NumBytes
6094 appendOffsetComment(NumBytes, Comment);
6095 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6096 }
6097
6098 // Wrap this into DW_CFA_expression
6099 SmallString<64> CfaExpr;
6100 CfaExpr.push_back(dwarf::DW_CFA_expression);
6101 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6102 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6103 CfaExpr.append(OffsetExpr.str());
6104
6105 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6106 Comment.str());
6107}
6108
6109// Helper function to emit a frame offset adjustment from a given
6110// pointer (SrcReg), stored into DestReg. This function is explicit
6111// in that it requires the opcode.
6114 const DebugLoc &DL, unsigned DestReg,
6115 unsigned SrcReg, int64_t Offset, unsigned Opc,
6116 const TargetInstrInfo *TII,
6117 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6118 bool *HasWinCFI, bool EmitCFAOffset,
6119 StackOffset CFAOffset, unsigned FrameReg) {
6120 int Sign = 1;
6121 unsigned MaxEncoding, ShiftSize;
6122 switch (Opc) {
6123 case AArch64::ADDXri:
6124 case AArch64::ADDSXri:
6125 case AArch64::SUBXri:
6126 case AArch64::SUBSXri:
6127 MaxEncoding = 0xfff;
6128 ShiftSize = 12;
6129 break;
6130 case AArch64::ADDVL_XXI:
6131 case AArch64::ADDPL_XXI:
6132 case AArch64::ADDSVL_XXI:
6133 case AArch64::ADDSPL_XXI:
6134 MaxEncoding = 31;
6135 ShiftSize = 0;
6136 if (Offset < 0) {
6137 MaxEncoding = 32;
6138 Sign = -1;
6139 Offset = -Offset;
6140 }
6141 break;
6142 default:
6143 llvm_unreachable("Unsupported opcode");
6144 }
6145
6146 // `Offset` can be in bytes or in "scalable bytes".
6147 int VScale = 1;
6148 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6149 VScale = 16;
6150 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6151 VScale = 2;
6152
6153 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6154 // scratch register. If DestReg is a virtual register, use it as the
6155 // scratch register; otherwise, create a new virtual register (to be
6156 // replaced by the scavenger at the end of PEI). That case can be optimized
6157 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6158 // register can be loaded with offset%8 and the add/sub can use an extending
6159 // instruction with LSL#3.
6160 // Currently the function handles any offsets but generates a poor sequence
6161 // of code.
6162 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6163
6164 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6165 Register TmpReg = DestReg;
6166 if (TmpReg == AArch64::XZR)
6167 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6168 &AArch64::GPR64RegClass);
6169 do {
6170 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6171 unsigned LocalShiftSize = 0;
6172 if (ThisVal > MaxEncoding) {
6173 ThisVal = ThisVal >> ShiftSize;
6174 LocalShiftSize = ShiftSize;
6175 }
6176 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6177 "Encoding cannot handle value that big");
6178
6179 Offset -= ThisVal << LocalShiftSize;
6180 if (Offset == 0)
6181 TmpReg = DestReg;
6182 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6183 .addReg(SrcReg)
6184 .addImm(Sign * (int)ThisVal);
6185 if (ShiftSize)
6186 MBI = MBI.addImm(
6188 MBI = MBI.setMIFlag(Flag);
6189
6190 auto Change =
6191 VScale == 1
6192 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6193 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6194 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6195 CFAOffset += Change;
6196 else
6197 CFAOffset -= Change;
6198 if (EmitCFAOffset && DestReg == TmpReg) {
6199 MachineFunction &MF = *MBB.getParent();
6200 const TargetSubtargetInfo &STI = MF.getSubtarget();
6201 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6202
6203 unsigned CFIIndex = MF.addFrameInst(
6204 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6205 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6206 .addCFIIndex(CFIIndex)
6207 .setMIFlags(Flag);
6208 }
6209
6210 if (NeedsWinCFI) {
6211 int Imm = (int)(ThisVal << LocalShiftSize);
6212 if (VScale != 1 && DestReg == AArch64::SP) {
6213 if (HasWinCFI)
6214 *HasWinCFI = true;
6215 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6216 .addImm(ThisVal)
6217 .setMIFlag(Flag);
6218 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6219 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6220 assert(VScale == 1 && "Expected non-scalable operation");
6221 if (HasWinCFI)
6222 *HasWinCFI = true;
6223 if (Imm == 0)
6224 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6225 else
6226 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6227 .addImm(Imm)
6228 .setMIFlag(Flag);
6229 assert(Offset == 0 && "Expected remaining offset to be zero to "
6230 "emit a single SEH directive");
6231 } else if (DestReg == AArch64::SP) {
6232 assert(VScale == 1 && "Expected non-scalable operation");
6233 if (HasWinCFI)
6234 *HasWinCFI = true;
6235 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6236 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6237 .addImm(Imm)
6238 .setMIFlag(Flag);
6239 }
6240 }
6241
6242 SrcReg = TmpReg;
6243 } while (Offset);
6244}
6245
6248 unsigned DestReg, unsigned SrcReg,
6250 MachineInstr::MIFlag Flag, bool SetNZCV,
6251 bool NeedsWinCFI, bool *HasWinCFI,
6252 bool EmitCFAOffset, StackOffset CFAOffset,
6253 unsigned FrameReg) {
6254 // If a function is marked as arm_locally_streaming, then the runtime value of
6255 // vscale in the prologue/epilogue is different the runtime value of vscale
6256 // in the function's body. To avoid having to consider multiple vscales,
6257 // we can use `addsvl` to allocate any scalable stack-slots, which under
6258 // most circumstances will be only locals, not callee-save slots.
6259 const Function &F = MBB.getParent()->getFunction();
6260 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6261
6262 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6263 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6264 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6265
6266 // First emit non-scalable frame offsets, or a simple 'mov'.
6267 if (Bytes || (!Offset && SrcReg != DestReg)) {
6268 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6269 "SP increment/decrement not 8-byte aligned");
6270 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6271 if (Bytes < 0) {
6272 Bytes = -Bytes;
6273 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6274 }
6275 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6276 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6277 FrameReg);
6278 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6279 ? StackOffset::getFixed(-Bytes)
6280 : StackOffset::getFixed(Bytes);
6281 SrcReg = DestReg;
6282 FrameReg = DestReg;
6283 }
6284
6285 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
6286 "SetNZCV not supported with SVE vectors");
6287 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6288 "WinCFI can't allocate fractions of an SVE data vector");
6289
6290 if (NumDataVectors) {
6291 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6292 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6293 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6294 FrameReg);
6295 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6296 SrcReg = DestReg;
6297 }
6298
6299 if (NumPredicateVectors) {
6300 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6301 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6302 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6303 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6304 FrameReg);
6305 }
6306}
6307
6310 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6311 LiveIntervals *LIS, VirtRegMap *VRM) const {
6312 // This is a bit of a hack. Consider this instruction:
6313 //
6314 // %0 = COPY %sp; GPR64all:%0
6315 //
6316 // We explicitly chose GPR64all for the virtual register so such a copy might
6317 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6318 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6319 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6320 //
6321 // To prevent that, we are going to constrain the %0 register class here.
6322 if (MI.isFullCopy()) {
6323 Register DstReg = MI.getOperand(0).getReg();
6324 Register SrcReg = MI.getOperand(1).getReg();
6325 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6326 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6327 return nullptr;
6328 }
6329 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6330 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6331 return nullptr;
6332 }
6333 // Nothing can folded with copy from/to NZCV.
6334 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6335 return nullptr;
6336 }
6337
6338 // Handle the case where a copy is being spilled or filled but the source
6339 // and destination register class don't match. For example:
6340 //
6341 // %0 = COPY %xzr; GPR64common:%0
6342 //
6343 // In this case we can still safely fold away the COPY and generate the
6344 // following spill code:
6345 //
6346 // STRXui %xzr, %stack.0
6347 //
6348 // This also eliminates spilled cross register class COPYs (e.g. between x and
6349 // d regs) of the same size. For example:
6350 //
6351 // %0 = COPY %1; GPR64:%0, FPR64:%1
6352 //
6353 // will be filled as
6354 //
6355 // LDRDui %0, fi<#0>
6356 //
6357 // instead of
6358 //
6359 // LDRXui %Temp, fi<#0>
6360 // %0 = FMOV %Temp
6361 //
6362 if (MI.isCopy() && Ops.size() == 1 &&
6363 // Make sure we're only folding the explicit COPY defs/uses.
6364 (Ops[0] == 0 || Ops[0] == 1)) {
6365 bool IsSpill = Ops[0] == 0;
6366 bool IsFill = !IsSpill;
6368 const MachineRegisterInfo &MRI = MF.getRegInfo();
6369 MachineBasicBlock &MBB = *MI.getParent();
6370 const MachineOperand &DstMO = MI.getOperand(0);
6371 const MachineOperand &SrcMO = MI.getOperand(1);
6372 Register DstReg = DstMO.getReg();
6373 Register SrcReg = SrcMO.getReg();
6374 // This is slightly expensive to compute for physical regs since
6375 // getMinimalPhysRegClass is slow.
6376 auto getRegClass = [&](unsigned Reg) {
6377 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6378 : TRI.getMinimalPhysRegClass(Reg);
6379 };
6380
6381 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6382 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6383 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6384 "Mismatched register size in non subreg COPY");
6385 if (IsSpill)
6386 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6387 getRegClass(SrcReg), &TRI, Register());
6388 else
6389 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6390 getRegClass(DstReg), &TRI, Register());
6391 return &*--InsertPt;
6392 }
6393
6394 // Handle cases like spilling def of:
6395 //
6396 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6397 //
6398 // where the physical register source can be widened and stored to the full
6399 // virtual reg destination stack slot, in this case producing:
6400 //
6401 // STRXui %xzr, %stack.0
6402 //
6403 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6404 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6405 assert(SrcMO.getSubReg() == 0 &&
6406 "Unexpected subreg on physical register");
6407 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6408 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6409 Register());
6410 return &*--InsertPt;
6411 }
6412
6413 // Handle cases like filling use of:
6414 //
6415 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6416 //
6417 // where we can load the full virtual reg source stack slot, into the subreg
6418 // destination, in this case producing:
6419 //
6420 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6421 //
6422 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6423 const TargetRegisterClass *FillRC = nullptr;
6424 switch (DstMO.getSubReg()) {
6425 default:
6426 break;
6427 case AArch64::sub_32:
6428 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6429 FillRC = &AArch64::GPR32RegClass;
6430 break;
6431 case AArch64::ssub:
6432 FillRC = &AArch64::FPR32RegClass;
6433 break;
6434 case AArch64::dsub:
6435 FillRC = &AArch64::FPR64RegClass;
6436 break;
6437 }
6438
6439 if (FillRC) {
6440 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6441 TRI.getRegSizeInBits(*FillRC) &&
6442 "Mismatched regclass size on folded subreg COPY");
6443 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6444 Register());
6445 MachineInstr &LoadMI = *--InsertPt;
6446 MachineOperand &LoadDst = LoadMI.getOperand(0);
6447 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6448 LoadDst.setSubReg(DstMO.getSubReg());
6449 LoadDst.setIsUndef();
6450 return &LoadMI;
6451 }
6452 }
6453 }
6454
6455 // Cannot fold.
6456 return nullptr;
6457}
6458
6460 StackOffset &SOffset,
6461 bool *OutUseUnscaledOp,
6462 unsigned *OutUnscaledOp,
6463 int64_t *EmittableOffset) {
6464 // Set output values in case of early exit.
6465 if (EmittableOffset)
6466 *EmittableOffset = 0;
6467 if (OutUseUnscaledOp)
6468 *OutUseUnscaledOp = false;
6469 if (OutUnscaledOp)
6470 *OutUnscaledOp = 0;
6471
6472 // Exit early for structured vector spills/fills as they can't take an
6473 // immediate offset.
6474 switch (MI.getOpcode()) {
6475 default:
6476 break;
6477 case AArch64::LD1Rv1d:
6478 case AArch64::LD1Rv2s:
6479 case AArch64::LD1Rv2d:
6480 case AArch64::LD1Rv4h:
6481 case AArch64::LD1Rv4s:
6482 case AArch64::LD1Rv8b:
6483 case AArch64::LD1Rv8h:
6484 case AArch64::LD1Rv16b:
6485 case AArch64::LD1Twov2d:
6486 case AArch64::LD1Threev2d:
6487 case AArch64::LD1Fourv2d:
6488 case AArch64::LD1Twov1d:
6489 case AArch64::LD1Threev1d:
6490 case AArch64::LD1Fourv1d:
6491 case AArch64::ST1Twov2d:
6492 case AArch64::ST1Threev2d:
6493 case AArch64::ST1Fourv2d:
6494 case AArch64::ST1Twov1d:
6495 case AArch64::ST1Threev1d:
6496 case AArch64::ST1Fourv1d:
6497 case AArch64::ST1i8:
6498 case AArch64::ST1i16:
6499 case AArch64::ST1i32:
6500 case AArch64::ST1i64:
6501 case AArch64::IRG:
6502 case AArch64::IRGstack:
6503 case AArch64::STGloop:
6504 case AArch64::STZGloop:
6506 }
6507
6508 // Get the min/max offset and the scale.
6509 TypeSize ScaleValue(0U, false), Width(0U, false);
6510 int64_t MinOff, MaxOff;
6511 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6512 MaxOff))
6513 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6514
6515 // Construct the complete offset.
6516 bool IsMulVL = ScaleValue.isScalable();
6517 unsigned Scale = ScaleValue.getKnownMinValue();
6518 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6519
6520 const MachineOperand &ImmOpnd =
6521 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6522 Offset += ImmOpnd.getImm() * Scale;
6523
6524 // If the offset doesn't match the scale, we rewrite the instruction to
6525 // use the unscaled instruction instead. Likewise, if we have a negative
6526 // offset and there is an unscaled op to use.
6527 std::optional<unsigned> UnscaledOp =
6529 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6530 if (useUnscaledOp &&
6531 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6532 MaxOff))
6533 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6534
6535 Scale = ScaleValue.getKnownMinValue();
6536 assert(IsMulVL == ScaleValue.isScalable() &&
6537 "Unscaled opcode has different value for scalable");
6538
6539 int64_t Remainder = Offset % Scale;
6540 assert(!(Remainder && useUnscaledOp) &&
6541 "Cannot have remainder when using unscaled op");
6542
6543 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6544 int64_t NewOffset = Offset / Scale;
6545 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6546 Offset = Remainder;
6547 else {
6548 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6549 Offset = Offset - (NewOffset * Scale);
6550 }
6551
6552 if (EmittableOffset)
6553 *EmittableOffset = NewOffset;
6554 if (OutUseUnscaledOp)
6555 *OutUseUnscaledOp = useUnscaledOp;
6556 if (OutUnscaledOp && UnscaledOp)
6557 *OutUnscaledOp = *UnscaledOp;
6558
6559 if (IsMulVL)
6560 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6561 else
6562 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6564 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6565}
6566
6568 unsigned FrameReg, StackOffset &Offset,
6569 const AArch64InstrInfo *TII) {
6570 unsigned Opcode = MI.getOpcode();
6571 unsigned ImmIdx = FrameRegIdx + 1;
6572
6573 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6574 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6575 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6576 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6577 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6578 MI.eraseFromParent();
6579 Offset = StackOffset();
6580 return true;
6581 }
6582
6583 int64_t NewOffset;
6584 unsigned UnscaledOp;
6585 bool UseUnscaledOp;
6586 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6587 &UnscaledOp, &NewOffset);
6590 // Replace the FrameIndex with FrameReg.
6591 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6592 if (UseUnscaledOp)
6593 MI.setDesc(TII->get(UnscaledOp));
6594
6595 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6596 return !Offset;
6597 }
6598
6599 return false;
6600}
6601
6607
6609 return MCInstBuilder(AArch64::HINT).addImm(0);
6610}
6611
6612// AArch64 supports MachineCombiner.
6613bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6614
6615// True when Opc sets flag
6616static bool isCombineInstrSettingFlag(unsigned Opc) {
6617 switch (Opc) {
6618 case AArch64::ADDSWrr:
6619 case AArch64::ADDSWri:
6620 case AArch64::ADDSXrr:
6621 case AArch64::ADDSXri:
6622 case AArch64::SUBSWrr:
6623 case AArch64::SUBSXrr:
6624 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6625 case AArch64::SUBSWri:
6626 case AArch64::SUBSXri:
6627 return true;
6628 default:
6629 break;
6630 }
6631 return false;
6632}
6633
6634// 32b Opcodes that can be combined with a MUL
6635static bool isCombineInstrCandidate32(unsigned Opc) {
6636 switch (Opc) {
6637 case AArch64::ADDWrr:
6638 case AArch64::ADDWri:
6639 case AArch64::SUBWrr:
6640 case AArch64::ADDSWrr:
6641 case AArch64::ADDSWri:
6642 case AArch64::SUBSWrr:
6643 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6644 case AArch64::SUBWri:
6645 case AArch64::SUBSWri:
6646 return true;
6647 default:
6648 break;
6649 }
6650 return false;
6651}
6652
6653// 64b Opcodes that can be combined with a MUL
6654static bool isCombineInstrCandidate64(unsigned Opc) {
6655 switch (Opc) {
6656 case AArch64::ADDXrr:
6657 case AArch64::ADDXri:
6658 case AArch64::SUBXrr:
6659 case AArch64::ADDSXrr:
6660 case AArch64::ADDSXri:
6661 case AArch64::SUBSXrr:
6662 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6663 case AArch64::SUBXri:
6664 case AArch64::SUBSXri:
6665 case AArch64::ADDv8i8:
6666 case AArch64::ADDv16i8:
6667 case AArch64::ADDv4i16:
6668 case AArch64::ADDv8i16:
6669 case AArch64::ADDv2i32:
6670 case AArch64::ADDv4i32:
6671 case AArch64::SUBv8i8:
6672 case AArch64::SUBv16i8:
6673 case AArch64::SUBv4i16:
6674 case AArch64::SUBv8i16:
6675 case AArch64::SUBv2i32:
6676 case AArch64::SUBv4i32:
6677 return true;
6678 default:
6679 break;
6680 }
6681 return false;
6682}
6683
6684// FP Opcodes that can be combined with a FMUL.
6685static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6686 switch (Inst.getOpcode()) {
6687 default:
6688 break;
6689 case AArch64::FADDHrr:
6690 case AArch64::FADDSrr:
6691 case AArch64::FADDDrr:
6692 case AArch64::FADDv4f16:
6693 case AArch64::FADDv8f16:
6694 case AArch64::FADDv2f32:
6695 case AArch64::FADDv2f64:
6696 case AArch64::FADDv4f32:
6697 case AArch64::FSUBHrr:
6698 case AArch64::FSUBSrr:
6699 case AArch64::FSUBDrr:
6700 case AArch64::FSUBv4f16:
6701 case AArch64::FSUBv8f16:
6702 case AArch64::FSUBv2f32:
6703 case AArch64::FSUBv2f64:
6704 case AArch64::FSUBv4f32:
6706 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6707 // the target options or if FADD/FSUB has the contract fast-math flag.
6708 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6710 }
6711 return false;
6712}
6713
6714// Opcodes that can be combined with a MUL
6718
6719//
6720// Utility routine that checks if \param MO is defined by an
6721// \param CombineOpc instruction in the basic block \param MBB
6723 unsigned CombineOpc, unsigned ZeroReg = 0,
6724 bool CheckZeroReg = false) {
6725 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6726 MachineInstr *MI = nullptr;
6727
6728 if (MO.isReg() && MO.getReg().isVirtual())
6729 MI = MRI.getUniqueVRegDef(MO.getReg());
6730 // And it needs to be in the trace (otherwise, it won't have a depth).
6731 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
6732 return false;
6733 // Must only used by the user we combine with.
6734 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6735 return false;
6736
6737 if (CheckZeroReg) {
6738 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6739 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6740 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6741 // The third input reg must be zero.
6742 if (MI->getOperand(3).getReg() != ZeroReg)
6743 return false;
6744 }
6745
6746 if (isCombineInstrSettingFlag(CombineOpc) &&
6747 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6748 return false;
6749
6750 return true;
6751}
6752
6753//
6754// Is \param MO defined by an integer multiply and can be combined?
6756 unsigned MulOpc, unsigned ZeroReg) {
6757 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6758}
6759
6760//
6761// Is \param MO defined by a floating-point multiply and can be combined?
6763 unsigned MulOpc) {
6764 return canCombine(MBB, MO, MulOpc);
6765}
6766
6767// TODO: There are many more machine instruction opcodes to match:
6768// 1. Other data types (integer, vectors)
6769// 2. Other math / logic operations (xor, or)
6770// 3. Other forms of the same operation (intrinsics and other variants)
6771bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6772 bool Invert) const {
6773 if (Invert)
6774 return false;
6775 switch (Inst.getOpcode()) {
6776 // == Floating-point types ==
6777 // -- Floating-point instructions --
6778 case AArch64::FADDHrr:
6779 case AArch64::FADDSrr:
6780 case AArch64::FADDDrr:
6781 case AArch64::FMULHrr:
6782 case AArch64::FMULSrr:
6783 case AArch64::FMULDrr:
6784 case AArch64::FMULX16:
6785 case AArch64::FMULX32:
6786 case AArch64::FMULX64:
6787 // -- Advanced SIMD instructions --
6788 case AArch64::FADDv4f16:
6789 case AArch64::FADDv8f16:
6790 case AArch64::FADDv2f32:
6791 case AArch64::FADDv4f32:
6792 case AArch64::FADDv2f64:
6793 case AArch64::FMULv4f16:
6794 case AArch64::FMULv8f16:
6795 case AArch64::FMULv2f32:
6796 case AArch64::FMULv4f32:
6797 case AArch64::FMULv2f64:
6798 case AArch64::FMULXv4f16:
6799 case AArch64::FMULXv8f16:
6800 case AArch64::FMULXv2f32:
6801 case AArch64::FMULXv4f32:
6802 case AArch64::FMULXv2f64:
6803 // -- SVE instructions --
6804 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6805 // in the SVE instruction set (though there are predicated ones).
6806 case AArch64::FADD_ZZZ_H:
6807 case AArch64::FADD_ZZZ_S:
6808 case AArch64::FADD_ZZZ_D:
6809 case AArch64::FMUL_ZZZ_H:
6810 case AArch64::FMUL_ZZZ_S:
6811 case AArch64::FMUL_ZZZ_D:
6814
6815 // == Integer types ==
6816 // -- Base instructions --
6817 // Opcodes MULWrr and MULXrr don't exist because
6818 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6819 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6820 // The machine-combiner does not support three-source-operands machine
6821 // instruction. So we cannot reassociate MULs.
6822 case AArch64::ADDWrr:
6823 case AArch64::ADDXrr:
6824 case AArch64::ANDWrr:
6825 case AArch64::ANDXrr:
6826 case AArch64::ORRWrr:
6827 case AArch64::ORRXrr:
6828 case AArch64::EORWrr:
6829 case AArch64::EORXrr:
6830 case AArch64::EONWrr:
6831 case AArch64::EONXrr:
6832 // -- Advanced SIMD instructions --
6833 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6834 // in the Advanced SIMD instruction set.
6835 case AArch64::ADDv8i8:
6836 case AArch64::ADDv16i8:
6837 case AArch64::ADDv4i16:
6838 case AArch64::ADDv8i16:
6839 case AArch64::ADDv2i32:
6840 case AArch64::ADDv4i32:
6841 case AArch64::ADDv1i64:
6842 case AArch64::ADDv2i64:
6843 case AArch64::MULv8i8:
6844 case AArch64::MULv16i8:
6845 case AArch64::MULv4i16:
6846 case AArch64::MULv8i16:
6847 case AArch64::MULv2i32:
6848 case AArch64::MULv4i32:
6849 case AArch64::ANDv8i8:
6850 case AArch64::ANDv16i8:
6851 case AArch64::ORRv8i8:
6852 case AArch64::ORRv16i8:
6853 case AArch64::EORv8i8:
6854 case AArch64::EORv16i8:
6855 // -- SVE instructions --
6856 case AArch64::ADD_ZZZ_B:
6857 case AArch64::ADD_ZZZ_H:
6858 case AArch64::ADD_ZZZ_S:
6859 case AArch64::ADD_ZZZ_D:
6860 case AArch64::MUL_ZZZ_B:
6861 case AArch64::MUL_ZZZ_H:
6862 case AArch64::MUL_ZZZ_S:
6863 case AArch64::MUL_ZZZ_D:
6864 case AArch64::AND_ZZZ:
6865 case AArch64::ORR_ZZZ:
6866 case AArch64::EOR_ZZZ:
6867 return true;
6868
6869 default:
6870 return false;
6871 }
6872}
6873
6874/// Find instructions that can be turned into madd.
6876 SmallVectorImpl<unsigned> &Patterns) {
6877 unsigned Opc = Root.getOpcode();
6878 MachineBasicBlock &MBB = *Root.getParent();
6879 bool Found = false;
6880
6882 return false;
6884 int Cmp_NZCV =
6885 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6886 // When NZCV is live bail out.
6887 if (Cmp_NZCV == -1)
6888 return false;
6889 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6890 // When opcode can't change bail out.
6891 // CHECKME: do we miss any cases for opcode conversion?
6892 if (NewOpc == Opc)
6893 return false;
6894 Opc = NewOpc;
6895 }
6896
6897 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6898 unsigned Pattern) {
6899 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6900 Patterns.push_back(Pattern);
6901 Found = true;
6902 }
6903 };
6904
6905 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6906 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6907 Patterns.push_back(Pattern);
6908 Found = true;
6909 }
6910 };
6911
6913
6914 switch (Opc) {
6915 default:
6916 break;
6917 case AArch64::ADDWrr:
6918 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6919 "ADDWrr does not have register operands");
6920 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6921 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6922 break;
6923 case AArch64::ADDXrr:
6924 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6925 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6926 break;
6927 case AArch64::SUBWrr:
6928 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6929 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6930 break;
6931 case AArch64::SUBXrr:
6932 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6933 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6934 break;
6935 case AArch64::ADDWri:
6936 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6937 break;
6938 case AArch64::ADDXri:
6939 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6940 break;
6941 case AArch64::SUBWri:
6942 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6943 break;
6944 case AArch64::SUBXri:
6945 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6946 break;
6947 case AArch64::ADDv8i8:
6948 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6949 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6950 break;
6951 case AArch64::ADDv16i8:
6952 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6953 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6954 break;
6955 case AArch64::ADDv4i16:
6956 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6957 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6958 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6959 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6960 break;
6961 case AArch64::ADDv8i16:
6962 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6963 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6964 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6965 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6966 break;
6967 case AArch64::ADDv2i32:
6968 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6969 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6970 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6971 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6972 break;
6973 case AArch64::ADDv4i32:
6974 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6975 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6976 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6977 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6978 break;
6979 case AArch64::SUBv8i8:
6980 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6981 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6982 break;
6983 case AArch64::SUBv16i8:
6984 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6985 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6986 break;
6987 case AArch64::SUBv4i16:
6988 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6989 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6990 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6991 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6992 break;
6993 case AArch64::SUBv8i16:
6994 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6995 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6996 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6997 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6998 break;
6999 case AArch64::SUBv2i32:
7000 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7001 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7002 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7003 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7004 break;
7005 case AArch64::SUBv4i32:
7006 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7007 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7008 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7009 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7010 break;
7011 }
7012 return Found;
7013}
7014
7015bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7016 switch (Opcode) {
7017 default:
7018 break;
7019 case AArch64::UABALB_ZZZ_D:
7020 case AArch64::UABALB_ZZZ_H:
7021 case AArch64::UABALB_ZZZ_S:
7022 case AArch64::UABALT_ZZZ_D:
7023 case AArch64::UABALT_ZZZ_H:
7024 case AArch64::UABALT_ZZZ_S:
7025 case AArch64::SABALB_ZZZ_D:
7026 case AArch64::SABALB_ZZZ_S:
7027 case AArch64::SABALB_ZZZ_H:
7028 case AArch64::SABALT_ZZZ_D:
7029 case AArch64::SABALT_ZZZ_S:
7030 case AArch64::SABALT_ZZZ_H:
7031 case AArch64::UABALv16i8_v8i16:
7032 case AArch64::UABALv2i32_v2i64:
7033 case AArch64::UABALv4i16_v4i32:
7034 case AArch64::UABALv4i32_v2i64:
7035 case AArch64::UABALv8i16_v4i32:
7036 case AArch64::UABALv8i8_v8i16:
7037 case AArch64::UABAv16i8:
7038 case AArch64::UABAv2i32:
7039 case AArch64::UABAv4i16:
7040 case AArch64::UABAv4i32:
7041 case AArch64::UABAv8i16:
7042 case AArch64::UABAv8i8:
7043 case AArch64::SABALv16i8_v8i16:
7044 case AArch64::SABALv2i32_v2i64:
7045 case AArch64::SABALv4i16_v4i32:
7046 case AArch64::SABALv4i32_v2i64:
7047 case AArch64::SABALv8i16_v4i32:
7048 case AArch64::SABALv8i8_v8i16:
7049 case AArch64::SABAv16i8:
7050 case AArch64::SABAv2i32:
7051 case AArch64::SABAv4i16:
7052 case AArch64::SABAv4i32:
7053 case AArch64::SABAv8i16:
7054 case AArch64::SABAv8i8:
7055 return true;
7056 }
7057
7058 return false;
7059}
7060
7061unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7062 unsigned AccumulationOpcode) const {
7063 switch (AccumulationOpcode) {
7064 default:
7065 llvm_unreachable("Unsupported accumulation Opcode!");
7066 case AArch64::UABALB_ZZZ_D:
7067 return AArch64::UABDLB_ZZZ_D;
7068 case AArch64::UABALB_ZZZ_H:
7069 return AArch64::UABDLB_ZZZ_H;
7070 case AArch64::UABALB_ZZZ_S:
7071 return AArch64::UABDLB_ZZZ_S;
7072 case AArch64::UABALT_ZZZ_D:
7073 return AArch64::UABDLT_ZZZ_D;
7074 case AArch64::UABALT_ZZZ_H:
7075 return AArch64::UABDLT_ZZZ_H;
7076 case AArch64::UABALT_ZZZ_S:
7077 return AArch64::UABDLT_ZZZ_S;
7078 case AArch64::UABALv16i8_v8i16:
7079 return AArch64::UABDLv16i8_v8i16;
7080 case AArch64::UABALv2i32_v2i64:
7081 return AArch64::UABDLv2i32_v2i64;
7082 case AArch64::UABALv4i16_v4i32:
7083 return AArch64::UABDLv4i16_v4i32;
7084 case AArch64::UABALv4i32_v2i64:
7085 return AArch64::UABDLv4i32_v2i64;
7086 case AArch64::UABALv8i16_v4i32:
7087 return AArch64::UABDLv8i16_v4i32;
7088 case AArch64::UABALv8i8_v8i16:
7089 return AArch64::UABDLv8i8_v8i16;
7090 case AArch64::UABAv16i8:
7091 return AArch64::UABDv16i8;
7092 case AArch64::UABAv2i32:
7093 return AArch64::UABDv2i32;
7094 case AArch64::UABAv4i16:
7095 return AArch64::UABDv4i16;
7096 case AArch64::UABAv4i32:
7097 return AArch64::UABDv4i32;
7098 case AArch64::UABAv8i16:
7099 return AArch64::UABDv8i16;
7100 case AArch64::UABAv8i8:
7101 return AArch64::UABDv8i8;
7102 case AArch64::SABALB_ZZZ_D:
7103 return AArch64::SABDLB_ZZZ_D;
7104 case AArch64::SABALB_ZZZ_S:
7105 return AArch64::SABDLB_ZZZ_S;
7106 case AArch64::SABALB_ZZZ_H:
7107 return AArch64::SABDLB_ZZZ_H;
7108 case AArch64::SABALT_ZZZ_D:
7109 return AArch64::SABDLT_ZZZ_D;
7110 case AArch64::SABALT_ZZZ_S:
7111 return AArch64::SABDLT_ZZZ_S;
7112 case AArch64::SABALT_ZZZ_H:
7113 return AArch64::SABDLT_ZZZ_H;
7114 case AArch64::SABALv16i8_v8i16:
7115 return AArch64::SABDLv16i8_v8i16;
7116 case AArch64::SABALv2i32_v2i64:
7117 return AArch64::SABDLv2i32_v2i64;
7118 case AArch64::SABALv4i16_v4i32:
7119 return AArch64::SABDLv4i16_v4i32;
7120 case AArch64::SABALv4i32_v2i64:
7121 return AArch64::SABDLv4i32_v2i64;
7122 case AArch64::SABALv8i16_v4i32:
7123 return AArch64::SABDLv8i16_v4i32;
7124 case AArch64::SABALv8i8_v8i16:
7125 return AArch64::SABDLv8i8_v8i16;
7126 case AArch64::SABAv16i8:
7127 return AArch64::SABDv16i8;
7128 case AArch64::SABAv2i32:
7129 return AArch64::SABAv2i32;
7130 case AArch64::SABAv4i16:
7131 return AArch64::SABDv4i16;
7132 case AArch64::SABAv4i32:
7133 return AArch64::SABDv4i32;
7134 case AArch64::SABAv8i16:
7135 return AArch64::SABDv8i16;
7136 case AArch64::SABAv8i8:
7137 return AArch64::SABDv8i8;
7138 }
7139}
7140
7141/// Floating-Point Support
7142
7143/// Find instructions that can be turned into madd.
7145 SmallVectorImpl<unsigned> &Patterns) {
7146
7147 if (!isCombineInstrCandidateFP(Root))
7148 return false;
7149
7150 MachineBasicBlock &MBB = *Root.getParent();
7151 bool Found = false;
7152
7153 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7154 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7155 Patterns.push_back(Pattern);
7156 return true;
7157 }
7158 return false;
7159 };
7160
7162
7163 switch (Root.getOpcode()) {
7164 default:
7165 assert(false && "Unsupported FP instruction in combiner\n");
7166 break;
7167 case AArch64::FADDHrr:
7168 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7169 "FADDHrr does not have register operands");
7170
7171 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7172 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7173 break;
7174 case AArch64::FADDSrr:
7175 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7176 "FADDSrr does not have register operands");
7177
7178 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7179 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7180
7181 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7182 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7183 break;
7184 case AArch64::FADDDrr:
7185 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7186 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7187
7188 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7189 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7190 break;
7191 case AArch64::FADDv4f16:
7192 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7193 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7194
7195 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7196 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7197 break;
7198 case AArch64::FADDv8f16:
7199 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7200 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7201
7202 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7203 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7204 break;
7205 case AArch64::FADDv2f32:
7206 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7207 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7208
7209 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7210 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7211 break;
7212 case AArch64::FADDv2f64:
7213 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7214 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7215
7216 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7217 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7218 break;
7219 case AArch64::FADDv4f32:
7220 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7221 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7222
7223 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7224 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7225 break;
7226 case AArch64::FSUBHrr:
7227 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7228 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7229 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7230 break;
7231 case AArch64::FSUBSrr:
7232 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7233
7234 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7235 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7236
7237 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7238 break;
7239 case AArch64::FSUBDrr:
7240 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7241
7242 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7243 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7244
7245 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7246 break;
7247 case AArch64::FSUBv4f16:
7248 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7249 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7250
7251 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7252 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7253 break;
7254 case AArch64::FSUBv8f16:
7255 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7256 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7257
7258 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7259 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7260 break;
7261 case AArch64::FSUBv2f32:
7262 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7263 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7264
7265 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7266 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7267 break;
7268 case AArch64::FSUBv2f64:
7269 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7270 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7271
7272 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7273 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7274 break;
7275 case AArch64::FSUBv4f32:
7276 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7277 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7278
7279 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7280 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7281 break;
7282 }
7283 return Found;
7284}
7285
7287 SmallVectorImpl<unsigned> &Patterns) {
7288 MachineBasicBlock &MBB = *Root.getParent();
7289 bool Found = false;
7290
7291 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7292 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7293 MachineOperand &MO = Root.getOperand(Operand);
7294 MachineInstr *MI = nullptr;
7295 if (MO.isReg() && MO.getReg().isVirtual())
7296 MI = MRI.getUniqueVRegDef(MO.getReg());
7297 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7298 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7299 MI->getOperand(1).getReg().isVirtual())
7300 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7301 if (MI && MI->getOpcode() == Opcode) {
7302 Patterns.push_back(Pattern);
7303 return true;
7304 }
7305 return false;
7306 };
7307
7309
7310 switch (Root.getOpcode()) {
7311 default:
7312 return false;
7313 case AArch64::FMULv2f32:
7314 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7315 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7316 break;
7317 case AArch64::FMULv2f64:
7318 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7319 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7320 break;
7321 case AArch64::FMULv4f16:
7322 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7323 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7324 break;
7325 case AArch64::FMULv4f32:
7326 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7327 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7328 break;
7329 case AArch64::FMULv8f16:
7330 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7331 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7332 break;
7333 }
7334
7335 return Found;
7336}
7337
7339 SmallVectorImpl<unsigned> &Patterns) {
7340 unsigned Opc = Root.getOpcode();
7341 MachineBasicBlock &MBB = *Root.getParent();
7342 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7343
7344 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7345 MachineOperand &MO = Root.getOperand(1);
7346 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7347 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7348 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7352 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7353 Patterns.push_back(Pattern);
7354 return true;
7355 }
7356 return false;
7357 };
7358
7359 switch (Opc) {
7360 default:
7361 break;
7362 case AArch64::FNEGDr:
7363 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7364 case AArch64::FNEGSr:
7365 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7366 }
7367
7368 return false;
7369}
7370
7371/// Return true when a code sequence can improve throughput. It
7372/// should be called only for instructions in loops.
7373/// \param Pattern - combiner pattern
7375 switch (Pattern) {
7376 default:
7377 break;
7483 return true;
7484 } // end switch (Pattern)
7485 return false;
7486}
7487
7488/// Find other MI combine patterns.
7490 SmallVectorImpl<unsigned> &Patterns) {
7491 // A - (B + C) ==> (A - B) - C or (A - C) - B
7492 unsigned Opc = Root.getOpcode();
7493 MachineBasicBlock &MBB = *Root.getParent();
7494
7495 switch (Opc) {
7496 case AArch64::SUBWrr:
7497 case AArch64::SUBSWrr:
7498 case AArch64::SUBXrr:
7499 case AArch64::SUBSXrr:
7500 // Found candidate root.
7501 break;
7502 default:
7503 return false;
7504 }
7505
7507 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7508 -1)
7509 return false;
7510
7511 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7512 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7513 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7514 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7517 return true;
7518 }
7519
7520 return false;
7521}
7522
7523/// Check if the given instruction forms a gather load pattern that can be
7524/// optimized for better Memory-Level Parallelism (MLP). This function
7525/// identifies chains of NEON lane load instructions that load data from
7526/// different memory addresses into individual lanes of a 128-bit vector
7527/// register, then attempts to split the pattern into parallel loads to break
7528/// the serial dependency between instructions.
7529///
7530/// Pattern Matched:
7531/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7532/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7533///
7534/// Transformed Into:
7535/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7536/// to combine the results, enabling better memory-level parallelism.
7537///
7538/// Supported Element Types:
7539/// - 32-bit elements (LD1i32, 4 lanes total)
7540/// - 16-bit elements (LD1i16, 8 lanes total)
7541/// - 8-bit elements (LD1i8, 16 lanes total)
7543 SmallVectorImpl<unsigned> &Patterns,
7544 unsigned LoadLaneOpCode, unsigned NumLanes) {
7545 const MachineFunction *MF = Root.getMF();
7546
7547 // Early exit if optimizing for size.
7548 if (MF->getFunction().hasMinSize())
7549 return false;
7550
7551 const MachineRegisterInfo &MRI = MF->getRegInfo();
7553
7554 // The root of the pattern must load into the last lane of the vector.
7555 if (Root.getOperand(2).getImm() != NumLanes - 1)
7556 return false;
7557
7558 // Check that we have load into all lanes except lane 0.
7559 // For each load we also want to check that:
7560 // 1. It has a single non-debug use (since we will be replacing the virtual
7561 // register)
7562 // 2. That the addressing mode only uses a single pointer operand
7563 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7564 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7565 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7567 while (!RemainingLanes.empty() && CurrInstr &&
7568 CurrInstr->getOpcode() == LoadLaneOpCode &&
7569 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7570 CurrInstr->getNumOperands() == 4) {
7571 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7572 LoadInstrs.push_back(CurrInstr);
7573 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7574 }
7575
7576 // Check that we have found a match for lanes N-1.. 1.
7577 if (!RemainingLanes.empty())
7578 return false;
7579
7580 // Match the SUBREG_TO_REG sequence.
7581 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7582 return false;
7583
7584 // Verify that the subreg to reg loads an integer into the first lane.
7585 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7586 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7587 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7588 return false;
7589
7590 // Verify that it also has a single non debug use.
7591 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7592 return false;
7593
7594 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7595
7596 // If there is any chance of aliasing, do not apply the pattern.
7597 // Walk backward through the MBB starting from Root.
7598 // Exit early if we've encountered all load instructions or hit the search
7599 // limit.
7600 auto MBBItr = Root.getIterator();
7601 unsigned RemainingSteps = GatherOptSearchLimit;
7602 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7603 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7604 const MachineBasicBlock *MBB = Root.getParent();
7605
7606 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7607 !RemainingLoadInstrs.empty();
7608 --MBBItr, --RemainingSteps) {
7609 const MachineInstr &CurrInstr = *MBBItr;
7610
7611 // Remove this instruction from remaining loads if it's one we're tracking.
7612 RemainingLoadInstrs.erase(&CurrInstr);
7613
7614 // Check for potential aliasing with any of the load instructions to
7615 // optimize.
7616 if (CurrInstr.isLoadFoldBarrier())
7617 return false;
7618 }
7619
7620 // If we hit the search limit without finding all load instructions,
7621 // don't match the pattern.
7622 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7623 return false;
7624
7625 switch (NumLanes) {
7626 case 4:
7628 break;
7629 case 8:
7631 break;
7632 case 16:
7634 break;
7635 default:
7636 llvm_unreachable("Got bad number of lanes for gather pattern.");
7637 }
7638
7639 return true;
7640}
7641
7642/// Search for patterns of LD instructions we can optimize.
7644 SmallVectorImpl<unsigned> &Patterns) {
7645
7646 // The pattern searches for loads into single lanes.
7647 switch (Root.getOpcode()) {
7648 case AArch64::LD1i32:
7649 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7650 case AArch64::LD1i16:
7651 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7652 case AArch64::LD1i8:
7653 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7654 default:
7655 return false;
7656 }
7657}
7658
7659/// Generate optimized instruction sequence for gather load patterns to improve
7660/// Memory-Level Parallelism (MLP). This function transforms a chain of
7661/// sequential NEON lane loads into parallel vector loads that can execute
7662/// concurrently.
7663static void
7667 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7668 unsigned Pattern, unsigned NumLanes) {
7669 MachineFunction &MF = *Root.getParent()->getParent();
7672
7673 // Gather the initial load instructions to build the pattern.
7674 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7675 MachineInstr *CurrInstr = &Root;
7676 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7677 LoadToLaneInstrs.push_back(CurrInstr);
7678 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7679 }
7680
7681 // Sort the load instructions according to the lane.
7682 llvm::sort(LoadToLaneInstrs,
7683 [](const MachineInstr *A, const MachineInstr *B) {
7684 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7685 });
7686
7687 MachineInstr *SubregToReg = CurrInstr;
7688 LoadToLaneInstrs.push_back(
7689 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7690 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7691
7692 const TargetRegisterClass *FPR128RegClass =
7693 MRI.getRegClass(Root.getOperand(0).getReg());
7694
7695 // Helper lambda to create a LD1 instruction.
7696 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7697 Register SrcRegister, unsigned Lane,
7698 Register OffsetRegister,
7699 bool OffsetRegisterKillState) {
7700 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7701 MachineInstrBuilder LoadIndexIntoRegister =
7702 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7703 NewRegister)
7704 .addReg(SrcRegister)
7705 .addImm(Lane)
7706 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7707 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7708 InsInstrs.push_back(LoadIndexIntoRegister);
7709 return NewRegister;
7710 };
7711
7712 // Helper to create load instruction based on the NumLanes in the NEON
7713 // register we are rewriting.
7714 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7715 Register OffsetReg,
7716 bool KillState) -> MachineInstrBuilder {
7717 unsigned Opcode;
7718 switch (NumLanes) {
7719 case 4:
7720 Opcode = AArch64::LDRSui;
7721 break;
7722 case 8:
7723 Opcode = AArch64::LDRHui;
7724 break;
7725 case 16:
7726 Opcode = AArch64::LDRBui;
7727 break;
7728 default:
7730 "Got unsupported number of lanes in machine-combiner gather pattern");
7731 }
7732 // Immediate offset load
7733 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7734 .addReg(OffsetReg)
7735 .addImm(0);
7736 };
7737
7738 // Load the remaining lanes into register 0.
7739 auto LanesToLoadToReg0 =
7740 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7741 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7742 Register PrevReg = SubregToReg->getOperand(0).getReg();
7743 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7744 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7745 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7746 OffsetRegOperand.getReg(),
7747 OffsetRegOperand.isKill());
7748 DelInstrs.push_back(LoadInstr);
7749 }
7750 Register LastLoadReg0 = PrevReg;
7751
7752 // First load into register 1. Perform an integer load to zero out the upper
7753 // lanes in a single instruction.
7754 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7755 MachineInstr *OriginalSplitLoad =
7756 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7757 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7758 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7759
7760 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7761 OriginalSplitLoad->getOperand(3);
7762 MachineInstrBuilder MiddleIndexLoadInstr =
7763 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7764 OriginalSplitToLoadOffsetOperand.getReg(),
7765 OriginalSplitToLoadOffsetOperand.isKill());
7766
7767 InstrIdxForVirtReg.insert(
7768 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7769 InsInstrs.push_back(MiddleIndexLoadInstr);
7770 DelInstrs.push_back(OriginalSplitLoad);
7771
7772 // Subreg To Reg instruction for register 1.
7773 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7774 unsigned SubregType;
7775 switch (NumLanes) {
7776 case 4:
7777 SubregType = AArch64::ssub;
7778 break;
7779 case 8:
7780 SubregType = AArch64::hsub;
7781 break;
7782 case 16:
7783 SubregType = AArch64::bsub;
7784 break;
7785 default:
7787 "Got invalid NumLanes for machine-combiner gather pattern");
7788 }
7789
7790 auto SubRegToRegInstr =
7791 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7792 DestRegForSubregToReg)
7793 .addImm(0)
7794 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7795 .addImm(SubregType);
7796 InstrIdxForVirtReg.insert(
7797 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7798 InsInstrs.push_back(SubRegToRegInstr);
7799
7800 // Load remaining lanes into register 1.
7801 auto LanesToLoadToReg1 =
7802 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7803 LoadToLaneInstrsAscending.end());
7804 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7805 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7806 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7807 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7808 OffsetRegOperand.getReg(),
7809 OffsetRegOperand.isKill());
7810
7811 // Do not add the last reg to DelInstrs - it will be removed later.
7812 if (Index == NumLanes / 2 - 2) {
7813 break;
7814 }
7815 DelInstrs.push_back(LoadInstr);
7816 }
7817 Register LastLoadReg1 = PrevReg;
7818
7819 // Create the final zip instruction to combine the results.
7820 MachineInstrBuilder ZipInstr =
7821 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7822 Root.getOperand(0).getReg())
7823 .addReg(LastLoadReg0)
7824 .addReg(LastLoadReg1);
7825 InsInstrs.push_back(ZipInstr);
7826}
7827
7841
7842/// Return true when there is potentially a faster code sequence for an
7843/// instruction chain ending in \p Root. All potential patterns are listed in
7844/// the \p Pattern vector. Pattern should be sorted in priority order since the
7845/// pattern evaluator stops checking as soon as it finds a faster sequence.
7846
7847bool AArch64InstrInfo::getMachineCombinerPatterns(
7848 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7849 bool DoRegPressureReduce) const {
7850 // Integer patterns
7851 if (getMaddPatterns(Root, Patterns))
7852 return true;
7853 // Floating point patterns
7854 if (getFMULPatterns(Root, Patterns))
7855 return true;
7856 if (getFMAPatterns(Root, Patterns))
7857 return true;
7858 if (getFNEGPatterns(Root, Patterns))
7859 return true;
7860
7861 // Other patterns
7862 if (getMiscPatterns(Root, Patterns))
7863 return true;
7864
7865 // Load patterns
7866 if (getLoadPatterns(Root, Patterns))
7867 return true;
7868
7869 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7870 DoRegPressureReduce);
7871}
7872
7874/// genFusedMultiply - Generate fused multiply instructions.
7875/// This function supports both integer and floating point instructions.
7876/// A typical example:
7877/// F|MUL I=A,B,0
7878/// F|ADD R,I,C
7879/// ==> F|MADD R,A,B,C
7880/// \param MF Containing MachineFunction
7881/// \param MRI Register information
7882/// \param TII Target information
7883/// \param Root is the F|ADD instruction
7884/// \param [out] InsInstrs is a vector of machine instructions and will
7885/// contain the generated madd instruction
7886/// \param IdxMulOpd is index of operand in Root that is the result of
7887/// the F|MUL. In the example above IdxMulOpd is 1.
7888/// \param MaddOpc the opcode fo the f|madd instruction
7889/// \param RC Register class of operands
7890/// \param kind of fma instruction (addressing mode) to be generated
7891/// \param ReplacedAddend is the result register from the instruction
7892/// replacing the non-combined operand, if any.
7893static MachineInstr *
7895 const TargetInstrInfo *TII, MachineInstr &Root,
7896 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7897 unsigned MaddOpc, const TargetRegisterClass *RC,
7899 const Register *ReplacedAddend = nullptr) {
7900 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7901
7902 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7903 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7904 Register ResultReg = Root.getOperand(0).getReg();
7905 Register SrcReg0 = MUL->getOperand(1).getReg();
7906 bool Src0IsKill = MUL->getOperand(1).isKill();
7907 Register SrcReg1 = MUL->getOperand(2).getReg();
7908 bool Src1IsKill = MUL->getOperand(2).isKill();
7909
7910 Register SrcReg2;
7911 bool Src2IsKill;
7912 if (ReplacedAddend) {
7913 // If we just generated a new addend, we must be it's only use.
7914 SrcReg2 = *ReplacedAddend;
7915 Src2IsKill = true;
7916 } else {
7917 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7918 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7919 }
7920
7921 if (ResultReg.isVirtual())
7922 MRI.constrainRegClass(ResultReg, RC);
7923 if (SrcReg0.isVirtual())
7924 MRI.constrainRegClass(SrcReg0, RC);
7925 if (SrcReg1.isVirtual())
7926 MRI.constrainRegClass(SrcReg1, RC);
7927 if (SrcReg2.isVirtual())
7928 MRI.constrainRegClass(SrcReg2, RC);
7929
7931 if (kind == FMAInstKind::Default)
7932 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7933 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7934 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7935 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7936 else if (kind == FMAInstKind::Indexed)
7937 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7938 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7939 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7940 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7941 .addImm(MUL->getOperand(3).getImm());
7942 else if (kind == FMAInstKind::Accumulator)
7943 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7944 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7945 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7946 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7947 else
7948 assert(false && "Invalid FMA instruction kind \n");
7949 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7950 InsInstrs.push_back(MIB);
7951 return MUL;
7952}
7953
7954static MachineInstr *
7956 const TargetInstrInfo *TII, MachineInstr &Root,
7958 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7959
7960 unsigned Opc = 0;
7961 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7962 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7963 Opc = AArch64::FNMADDSrrr;
7964 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7965 Opc = AArch64::FNMADDDrrr;
7966 else
7967 return nullptr;
7968
7969 Register ResultReg = Root.getOperand(0).getReg();
7970 Register SrcReg0 = MAD->getOperand(1).getReg();
7971 Register SrcReg1 = MAD->getOperand(2).getReg();
7972 Register SrcReg2 = MAD->getOperand(3).getReg();
7973 bool Src0IsKill = MAD->getOperand(1).isKill();
7974 bool Src1IsKill = MAD->getOperand(2).isKill();
7975 bool Src2IsKill = MAD->getOperand(3).isKill();
7976 if (ResultReg.isVirtual())
7977 MRI.constrainRegClass(ResultReg, RC);
7978 if (SrcReg0.isVirtual())
7979 MRI.constrainRegClass(SrcReg0, RC);
7980 if (SrcReg1.isVirtual())
7981 MRI.constrainRegClass(SrcReg1, RC);
7982 if (SrcReg2.isVirtual())
7983 MRI.constrainRegClass(SrcReg2, RC);
7984
7986 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
7987 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7988 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7989 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7990 InsInstrs.push_back(MIB);
7991
7992 return MAD;
7993}
7994
7995/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7996static MachineInstr *
7999 unsigned IdxDupOp, unsigned MulOpc,
8001 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8002 "Invalid index of FMUL operand");
8003
8004 MachineFunction &MF = *Root.getMF();
8006
8007 MachineInstr *Dup =
8008 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8009
8010 if (Dup->getOpcode() == TargetOpcode::COPY)
8011 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8012
8013 Register DupSrcReg = Dup->getOperand(1).getReg();
8014 MRI.clearKillFlags(DupSrcReg);
8015 MRI.constrainRegClass(DupSrcReg, RC);
8016
8017 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8018
8019 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8020 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8021
8022 Register ResultReg = Root.getOperand(0).getReg();
8023
8025 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8026 .add(MulOp)
8027 .addReg(DupSrcReg)
8028 .addImm(DupSrcLane);
8029
8030 InsInstrs.push_back(MIB);
8031 return &Root;
8032}
8033
8034/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8035/// instructions.
8036///
8037/// \see genFusedMultiply
8041 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8042 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8044}
8045
8046/// genNeg - Helper to generate an intermediate negation of the second operand
8047/// of Root
8049 const TargetInstrInfo *TII, MachineInstr &Root,
8051 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8052 unsigned MnegOpc, const TargetRegisterClass *RC) {
8053 Register NewVR = MRI.createVirtualRegister(RC);
8055 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8056 .add(Root.getOperand(2));
8057 InsInstrs.push_back(MIB);
8058
8059 assert(InstrIdxForVirtReg.empty());
8060 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8061
8062 return NewVR;
8063}
8064
8065/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8066/// instructions with an additional negation of the accumulator
8070 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8071 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8072 assert(IdxMulOpd == 1);
8073
8074 Register NewVR =
8075 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8076 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8077 FMAInstKind::Accumulator, &NewVR);
8078}
8079
8080/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8081/// instructions.
8082///
8083/// \see genFusedMultiply
8087 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8088 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8090}
8091
8092/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8093/// instructions with an additional negation of the accumulator
8097 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8098 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8099 assert(IdxMulOpd == 1);
8100
8101 Register NewVR =
8102 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8103
8104 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8105 FMAInstKind::Indexed, &NewVR);
8106}
8107
8108/// genMaddR - Generate madd instruction and combine mul and add using
8109/// an extra virtual register
8110/// Example - an ADD intermediate needs to be stored in a register:
8111/// MUL I=A,B,0
8112/// ADD R,I,Imm
8113/// ==> ORR V, ZR, Imm
8114/// ==> MADD R,A,B,V
8115/// \param MF Containing MachineFunction
8116/// \param MRI Register information
8117/// \param TII Target information
8118/// \param Root is the ADD instruction
8119/// \param [out] InsInstrs is a vector of machine instructions and will
8120/// contain the generated madd instruction
8121/// \param IdxMulOpd is index of operand in Root that is the result of
8122/// the MUL. In the example above IdxMulOpd is 1.
8123/// \param MaddOpc the opcode fo the madd instruction
8124/// \param VR is a virtual register that holds the value of an ADD operand
8125/// (V in the example above).
8126/// \param RC Register class of operands
8128 const TargetInstrInfo *TII, MachineInstr &Root,
8130 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8131 const TargetRegisterClass *RC) {
8132 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8133
8134 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8135 Register ResultReg = Root.getOperand(0).getReg();
8136 Register SrcReg0 = MUL->getOperand(1).getReg();
8137 bool Src0IsKill = MUL->getOperand(1).isKill();
8138 Register SrcReg1 = MUL->getOperand(2).getReg();
8139 bool Src1IsKill = MUL->getOperand(2).isKill();
8140
8141 if (ResultReg.isVirtual())
8142 MRI.constrainRegClass(ResultReg, RC);
8143 if (SrcReg0.isVirtual())
8144 MRI.constrainRegClass(SrcReg0, RC);
8145 if (SrcReg1.isVirtual())
8146 MRI.constrainRegClass(SrcReg1, RC);
8148 MRI.constrainRegClass(VR, RC);
8149
8151 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8152 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8153 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8154 .addReg(VR);
8155 // Insert the MADD
8156 InsInstrs.push_back(MIB);
8157 return MUL;
8158}
8159
8160/// Do the following transformation
8161/// A - (B + C) ==> (A - B) - C
8162/// A - (B + C) ==> (A - C) - B
8164 const TargetInstrInfo *TII, MachineInstr &Root,
8167 unsigned IdxOpd1,
8168 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8169 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8170 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8171 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8172
8173 Register ResultReg = Root.getOperand(0).getReg();
8174 Register RegA = Root.getOperand(1).getReg();
8175 bool RegAIsKill = Root.getOperand(1).isKill();
8176 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8177 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8178 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8179 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8180 Register NewVR =
8181 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8182
8183 unsigned Opcode = Root.getOpcode();
8184 if (Opcode == AArch64::SUBSWrr)
8185 Opcode = AArch64::SUBWrr;
8186 else if (Opcode == AArch64::SUBSXrr)
8187 Opcode = AArch64::SUBXrr;
8188 else
8189 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8190 "Unexpected instruction opcode.");
8191
8192 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8193 Flags &= ~MachineInstr::NoSWrap;
8194 Flags &= ~MachineInstr::NoUWrap;
8195
8196 MachineInstrBuilder MIB1 =
8197 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8198 .addReg(RegA, getKillRegState(RegAIsKill))
8199 .addReg(RegB, getKillRegState(RegBIsKill))
8200 .setMIFlags(Flags);
8201 MachineInstrBuilder MIB2 =
8202 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8203 .addReg(NewVR, getKillRegState(true))
8204 .addReg(RegC, getKillRegState(RegCIsKill))
8205 .setMIFlags(Flags);
8206
8207 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8208 InsInstrs.push_back(MIB1);
8209 InsInstrs.push_back(MIB2);
8210 DelInstrs.push_back(AddMI);
8211 DelInstrs.push_back(&Root);
8212}
8213
8214unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8215 unsigned int AccumulatorOpCode) const {
8216 switch (AccumulatorOpCode) {
8217 case AArch64::UABALB_ZZZ_D:
8218 case AArch64::SABALB_ZZZ_D:
8219 case AArch64::UABALT_ZZZ_D:
8220 case AArch64::SABALT_ZZZ_D:
8221 return AArch64::ADD_ZZZ_D;
8222 case AArch64::UABALB_ZZZ_H:
8223 case AArch64::SABALB_ZZZ_H:
8224 case AArch64::UABALT_ZZZ_H:
8225 case AArch64::SABALT_ZZZ_H:
8226 return AArch64::ADD_ZZZ_H;
8227 case AArch64::UABALB_ZZZ_S:
8228 case AArch64::SABALB_ZZZ_S:
8229 case AArch64::UABALT_ZZZ_S:
8230 case AArch64::SABALT_ZZZ_S:
8231 return AArch64::ADD_ZZZ_S;
8232 case AArch64::UABALv16i8_v8i16:
8233 case AArch64::SABALv8i8_v8i16:
8234 case AArch64::SABAv8i16:
8235 case AArch64::UABAv8i16:
8236 return AArch64::ADDv8i16;
8237 case AArch64::SABALv2i32_v2i64:
8238 case AArch64::UABALv2i32_v2i64:
8239 case AArch64::SABALv4i32_v2i64:
8240 return AArch64::ADDv2i64;
8241 case AArch64::UABALv4i16_v4i32:
8242 case AArch64::SABALv4i16_v4i32:
8243 case AArch64::SABALv8i16_v4i32:
8244 case AArch64::SABAv4i32:
8245 case AArch64::UABAv4i32:
8246 return AArch64::ADDv4i32;
8247 case AArch64::UABALv4i32_v2i64:
8248 return AArch64::ADDv2i64;
8249 case AArch64::UABALv8i16_v4i32:
8250 return AArch64::ADDv4i32;
8251 case AArch64::UABALv8i8_v8i16:
8252 case AArch64::SABALv16i8_v8i16:
8253 return AArch64::ADDv8i16;
8254 case AArch64::UABAv16i8:
8255 case AArch64::SABAv16i8:
8256 return AArch64::ADDv16i8;
8257 case AArch64::UABAv4i16:
8258 case AArch64::SABAv4i16:
8259 return AArch64::ADDv4i16;
8260 case AArch64::UABAv2i32:
8261 case AArch64::SABAv2i32:
8262 return AArch64::ADDv2i32;
8263 case AArch64::UABAv8i8:
8264 case AArch64::SABAv8i8:
8265 return AArch64::ADDv8i8;
8266 default:
8267 llvm_unreachable("Unknown accumulator opcode");
8268 }
8269}
8270
8271/// When getMachineCombinerPatterns() finds potential patterns,
8272/// this function generates the instructions that could replace the
8273/// original code sequence
8274void AArch64InstrInfo::genAlternativeCodeSequence(
8275 MachineInstr &Root, unsigned Pattern,
8278 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8279 MachineBasicBlock &MBB = *Root.getParent();
8280 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8281 MachineFunction &MF = *MBB.getParent();
8282 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8283
8284 MachineInstr *MUL = nullptr;
8285 const TargetRegisterClass *RC;
8286 unsigned Opc;
8287 switch (Pattern) {
8288 default:
8289 // Reassociate instructions.
8290 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8291 DelInstrs, InstrIdxForVirtReg);
8292 return;
8294 // A - (B + C)
8295 // ==> (A - B) - C
8296 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8297 InstrIdxForVirtReg);
8298 return;
8300 // A - (B + C)
8301 // ==> (A - C) - B
8302 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8303 InstrIdxForVirtReg);
8304 return;
8307 // MUL I=A,B,0
8308 // ADD R,I,C
8309 // ==> MADD R,A,B,C
8310 // --- Create(MADD);
8312 Opc = AArch64::MADDWrrr;
8313 RC = &AArch64::GPR32RegClass;
8314 } else {
8315 Opc = AArch64::MADDXrrr;
8316 RC = &AArch64::GPR64RegClass;
8317 }
8318 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8319 break;
8322 // MUL I=A,B,0
8323 // ADD R,C,I
8324 // ==> MADD R,A,B,C
8325 // --- Create(MADD);
8327 Opc = AArch64::MADDWrrr;
8328 RC = &AArch64::GPR32RegClass;
8329 } else {
8330 Opc = AArch64::MADDXrrr;
8331 RC = &AArch64::GPR64RegClass;
8332 }
8333 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8334 break;
8339 // MUL I=A,B,0
8340 // ADD/SUB R,I,Imm
8341 // ==> MOV V, Imm/-Imm
8342 // ==> MADD R,A,B,V
8343 // --- Create(MADD);
8344 const TargetRegisterClass *RC;
8345 unsigned BitSize, MovImm;
8348 MovImm = AArch64::MOVi32imm;
8349 RC = &AArch64::GPR32spRegClass;
8350 BitSize = 32;
8351 Opc = AArch64::MADDWrrr;
8352 RC = &AArch64::GPR32RegClass;
8353 } else {
8354 MovImm = AArch64::MOVi64imm;
8355 RC = &AArch64::GPR64spRegClass;
8356 BitSize = 64;
8357 Opc = AArch64::MADDXrrr;
8358 RC = &AArch64::GPR64RegClass;
8359 }
8360 Register NewVR = MRI.createVirtualRegister(RC);
8361 uint64_t Imm = Root.getOperand(2).getImm();
8362
8363 if (Root.getOperand(3).isImm()) {
8364 unsigned Val = Root.getOperand(3).getImm();
8365 Imm = Imm << Val;
8366 }
8367 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8369 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8370 // Check that the immediate can be composed via a single instruction.
8372 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8373 if (Insn.size() != 1)
8374 return;
8375 MachineInstrBuilder MIB1 =
8376 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8377 .addImm(IsSub ? -Imm : Imm);
8378 InsInstrs.push_back(MIB1);
8379 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8380 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8381 break;
8382 }
8385 // MUL I=A,B,0
8386 // SUB R,I, C
8387 // ==> SUB V, 0, C
8388 // ==> MADD R,A,B,V // = -C + A*B
8389 // --- Create(MADD);
8390 const TargetRegisterClass *SubRC;
8391 unsigned SubOpc, ZeroReg;
8393 SubOpc = AArch64::SUBWrr;
8394 SubRC = &AArch64::GPR32spRegClass;
8395 ZeroReg = AArch64::WZR;
8396 Opc = AArch64::MADDWrrr;
8397 RC = &AArch64::GPR32RegClass;
8398 } else {
8399 SubOpc = AArch64::SUBXrr;
8400 SubRC = &AArch64::GPR64spRegClass;
8401 ZeroReg = AArch64::XZR;
8402 Opc = AArch64::MADDXrrr;
8403 RC = &AArch64::GPR64RegClass;
8404 }
8405 Register NewVR = MRI.createVirtualRegister(SubRC);
8406 // SUB NewVR, 0, C
8407 MachineInstrBuilder MIB1 =
8408 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8409 .addReg(ZeroReg)
8410 .add(Root.getOperand(2));
8411 InsInstrs.push_back(MIB1);
8412 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8413 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8414 break;
8415 }
8418 // MUL I=A,B,0
8419 // SUB R,C,I
8420 // ==> MSUB R,A,B,C (computes C - A*B)
8421 // --- Create(MSUB);
8423 Opc = AArch64::MSUBWrrr;
8424 RC = &AArch64::GPR32RegClass;
8425 } else {
8426 Opc = AArch64::MSUBXrrr;
8427 RC = &AArch64::GPR64RegClass;
8428 }
8429 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8430 break;
8432 Opc = AArch64::MLAv8i8;
8433 RC = &AArch64::FPR64RegClass;
8434 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8435 break;
8437 Opc = AArch64::MLAv8i8;
8438 RC = &AArch64::FPR64RegClass;
8439 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8440 break;
8442 Opc = AArch64::MLAv16i8;
8443 RC = &AArch64::FPR128RegClass;
8444 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8445 break;
8447 Opc = AArch64::MLAv16i8;
8448 RC = &AArch64::FPR128RegClass;
8449 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8450 break;
8452 Opc = AArch64::MLAv4i16;
8453 RC = &AArch64::FPR64RegClass;
8454 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8455 break;
8457 Opc = AArch64::MLAv4i16;
8458 RC = &AArch64::FPR64RegClass;
8459 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8460 break;
8462 Opc = AArch64::MLAv8i16;
8463 RC = &AArch64::FPR128RegClass;
8464 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8465 break;
8467 Opc = AArch64::MLAv8i16;
8468 RC = &AArch64::FPR128RegClass;
8469 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8470 break;
8472 Opc = AArch64::MLAv2i32;
8473 RC = &AArch64::FPR64RegClass;
8474 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8475 break;
8477 Opc = AArch64::MLAv2i32;
8478 RC = &AArch64::FPR64RegClass;
8479 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8480 break;
8482 Opc = AArch64::MLAv4i32;
8483 RC = &AArch64::FPR128RegClass;
8484 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8485 break;
8487 Opc = AArch64::MLAv4i32;
8488 RC = &AArch64::FPR128RegClass;
8489 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8490 break;
8491
8493 Opc = AArch64::MLAv8i8;
8494 RC = &AArch64::FPR64RegClass;
8495 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8496 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8497 RC);
8498 break;
8500 Opc = AArch64::MLSv8i8;
8501 RC = &AArch64::FPR64RegClass;
8502 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8503 break;
8505 Opc = AArch64::MLAv16i8;
8506 RC = &AArch64::FPR128RegClass;
8507 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8508 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8509 RC);
8510 break;
8512 Opc = AArch64::MLSv16i8;
8513 RC = &AArch64::FPR128RegClass;
8514 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8515 break;
8517 Opc = AArch64::MLAv4i16;
8518 RC = &AArch64::FPR64RegClass;
8519 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8520 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8521 RC);
8522 break;
8524 Opc = AArch64::MLSv4i16;
8525 RC = &AArch64::FPR64RegClass;
8526 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8527 break;
8529 Opc = AArch64::MLAv8i16;
8530 RC = &AArch64::FPR128RegClass;
8531 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8532 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8533 RC);
8534 break;
8536 Opc = AArch64::MLSv8i16;
8537 RC = &AArch64::FPR128RegClass;
8538 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8539 break;
8541 Opc = AArch64::MLAv2i32;
8542 RC = &AArch64::FPR64RegClass;
8543 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8544 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8545 RC);
8546 break;
8548 Opc = AArch64::MLSv2i32;
8549 RC = &AArch64::FPR64RegClass;
8550 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8551 break;
8553 Opc = AArch64::MLAv4i32;
8554 RC = &AArch64::FPR128RegClass;
8555 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8556 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8557 RC);
8558 break;
8560 Opc = AArch64::MLSv4i32;
8561 RC = &AArch64::FPR128RegClass;
8562 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8563 break;
8564
8566 Opc = AArch64::MLAv4i16_indexed;
8567 RC = &AArch64::FPR64RegClass;
8568 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8569 break;
8571 Opc = AArch64::MLAv4i16_indexed;
8572 RC = &AArch64::FPR64RegClass;
8573 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8574 break;
8576 Opc = AArch64::MLAv8i16_indexed;
8577 RC = &AArch64::FPR128RegClass;
8578 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8579 break;
8581 Opc = AArch64::MLAv8i16_indexed;
8582 RC = &AArch64::FPR128RegClass;
8583 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8584 break;
8586 Opc = AArch64::MLAv2i32_indexed;
8587 RC = &AArch64::FPR64RegClass;
8588 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8589 break;
8591 Opc = AArch64::MLAv2i32_indexed;
8592 RC = &AArch64::FPR64RegClass;
8593 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8594 break;
8596 Opc = AArch64::MLAv4i32_indexed;
8597 RC = &AArch64::FPR128RegClass;
8598 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8599 break;
8601 Opc = AArch64::MLAv4i32_indexed;
8602 RC = &AArch64::FPR128RegClass;
8603 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8604 break;
8605
8607 Opc = AArch64::MLAv4i16_indexed;
8608 RC = &AArch64::FPR64RegClass;
8609 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8610 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8611 RC);
8612 break;
8614 Opc = AArch64::MLSv4i16_indexed;
8615 RC = &AArch64::FPR64RegClass;
8616 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8617 break;
8619 Opc = AArch64::MLAv8i16_indexed;
8620 RC = &AArch64::FPR128RegClass;
8621 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8622 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8623 RC);
8624 break;
8626 Opc = AArch64::MLSv8i16_indexed;
8627 RC = &AArch64::FPR128RegClass;
8628 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8629 break;
8631 Opc = AArch64::MLAv2i32_indexed;
8632 RC = &AArch64::FPR64RegClass;
8633 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8634 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8635 RC);
8636 break;
8638 Opc = AArch64::MLSv2i32_indexed;
8639 RC = &AArch64::FPR64RegClass;
8640 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8641 break;
8643 Opc = AArch64::MLAv4i32_indexed;
8644 RC = &AArch64::FPR128RegClass;
8645 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8646 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8647 RC);
8648 break;
8650 Opc = AArch64::MLSv4i32_indexed;
8651 RC = &AArch64::FPR128RegClass;
8652 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8653 break;
8654
8655 // Floating Point Support
8657 Opc = AArch64::FMADDHrrr;
8658 RC = &AArch64::FPR16RegClass;
8659 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8660 break;
8662 Opc = AArch64::FMADDSrrr;
8663 RC = &AArch64::FPR32RegClass;
8664 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8665 break;
8667 Opc = AArch64::FMADDDrrr;
8668 RC = &AArch64::FPR64RegClass;
8669 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8670 break;
8671
8673 Opc = AArch64::FMADDHrrr;
8674 RC = &AArch64::FPR16RegClass;
8675 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8676 break;
8678 Opc = AArch64::FMADDSrrr;
8679 RC = &AArch64::FPR32RegClass;
8680 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8681 break;
8683 Opc = AArch64::FMADDDrrr;
8684 RC = &AArch64::FPR64RegClass;
8685 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8686 break;
8687
8689 Opc = AArch64::FMLAv1i32_indexed;
8690 RC = &AArch64::FPR32RegClass;
8691 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8693 break;
8695 Opc = AArch64::FMLAv1i32_indexed;
8696 RC = &AArch64::FPR32RegClass;
8697 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8699 break;
8700
8702 Opc = AArch64::FMLAv1i64_indexed;
8703 RC = &AArch64::FPR64RegClass;
8704 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8706 break;
8708 Opc = AArch64::FMLAv1i64_indexed;
8709 RC = &AArch64::FPR64RegClass;
8710 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8712 break;
8713
8715 RC = &AArch64::FPR64RegClass;
8716 Opc = AArch64::FMLAv4i16_indexed;
8717 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8719 break;
8721 RC = &AArch64::FPR64RegClass;
8722 Opc = AArch64::FMLAv4f16;
8723 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8725 break;
8727 RC = &AArch64::FPR64RegClass;
8728 Opc = AArch64::FMLAv4i16_indexed;
8729 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8731 break;
8733 RC = &AArch64::FPR64RegClass;
8734 Opc = AArch64::FMLAv4f16;
8735 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8737 break;
8738
8741 RC = &AArch64::FPR64RegClass;
8743 Opc = AArch64::FMLAv2i32_indexed;
8744 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8746 } else {
8747 Opc = AArch64::FMLAv2f32;
8748 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8750 }
8751 break;
8754 RC = &AArch64::FPR64RegClass;
8756 Opc = AArch64::FMLAv2i32_indexed;
8757 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8759 } else {
8760 Opc = AArch64::FMLAv2f32;
8761 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8763 }
8764 break;
8765
8767 RC = &AArch64::FPR128RegClass;
8768 Opc = AArch64::FMLAv8i16_indexed;
8769 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8771 break;
8773 RC = &AArch64::FPR128RegClass;
8774 Opc = AArch64::FMLAv8f16;
8775 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8777 break;
8779 RC = &AArch64::FPR128RegClass;
8780 Opc = AArch64::FMLAv8i16_indexed;
8781 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8783 break;
8785 RC = &AArch64::FPR128RegClass;
8786 Opc = AArch64::FMLAv8f16;
8787 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8789 break;
8790
8793 RC = &AArch64::FPR128RegClass;
8795 Opc = AArch64::FMLAv2i64_indexed;
8796 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8798 } else {
8799 Opc = AArch64::FMLAv2f64;
8800 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8802 }
8803 break;
8806 RC = &AArch64::FPR128RegClass;
8808 Opc = AArch64::FMLAv2i64_indexed;
8809 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8811 } else {
8812 Opc = AArch64::FMLAv2f64;
8813 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8815 }
8816 break;
8817
8820 RC = &AArch64::FPR128RegClass;
8822 Opc = AArch64::FMLAv4i32_indexed;
8823 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8825 } else {
8826 Opc = AArch64::FMLAv4f32;
8827 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8829 }
8830 break;
8831
8834 RC = &AArch64::FPR128RegClass;
8836 Opc = AArch64::FMLAv4i32_indexed;
8837 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8839 } else {
8840 Opc = AArch64::FMLAv4f32;
8841 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8843 }
8844 break;
8845
8847 Opc = AArch64::FNMSUBHrrr;
8848 RC = &AArch64::FPR16RegClass;
8849 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8850 break;
8852 Opc = AArch64::FNMSUBSrrr;
8853 RC = &AArch64::FPR32RegClass;
8854 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8855 break;
8857 Opc = AArch64::FNMSUBDrrr;
8858 RC = &AArch64::FPR64RegClass;
8859 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8860 break;
8861
8863 Opc = AArch64::FNMADDHrrr;
8864 RC = &AArch64::FPR16RegClass;
8865 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8866 break;
8868 Opc = AArch64::FNMADDSrrr;
8869 RC = &AArch64::FPR32RegClass;
8870 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8871 break;
8873 Opc = AArch64::FNMADDDrrr;
8874 RC = &AArch64::FPR64RegClass;
8875 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8876 break;
8877
8879 Opc = AArch64::FMSUBHrrr;
8880 RC = &AArch64::FPR16RegClass;
8881 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8882 break;
8884 Opc = AArch64::FMSUBSrrr;
8885 RC = &AArch64::FPR32RegClass;
8886 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8887 break;
8889 Opc = AArch64::FMSUBDrrr;
8890 RC = &AArch64::FPR64RegClass;
8891 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8892 break;
8893
8895 Opc = AArch64::FMLSv1i32_indexed;
8896 RC = &AArch64::FPR32RegClass;
8897 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8899 break;
8900
8902 Opc = AArch64::FMLSv1i64_indexed;
8903 RC = &AArch64::FPR64RegClass;
8904 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8906 break;
8907
8910 RC = &AArch64::FPR64RegClass;
8911 Register NewVR = MRI.createVirtualRegister(RC);
8912 MachineInstrBuilder MIB1 =
8913 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8914 .add(Root.getOperand(2));
8915 InsInstrs.push_back(MIB1);
8916 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8918 Opc = AArch64::FMLAv4f16;
8919 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8920 FMAInstKind::Accumulator, &NewVR);
8921 } else {
8922 Opc = AArch64::FMLAv4i16_indexed;
8923 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8924 FMAInstKind::Indexed, &NewVR);
8925 }
8926 break;
8927 }
8929 RC = &AArch64::FPR64RegClass;
8930 Opc = AArch64::FMLSv4f16;
8931 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8933 break;
8935 RC = &AArch64::FPR64RegClass;
8936 Opc = AArch64::FMLSv4i16_indexed;
8937 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8939 break;
8940
8943 RC = &AArch64::FPR64RegClass;
8945 Opc = AArch64::FMLSv2i32_indexed;
8946 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8948 } else {
8949 Opc = AArch64::FMLSv2f32;
8950 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8952 }
8953 break;
8954
8957 RC = &AArch64::FPR128RegClass;
8958 Register NewVR = MRI.createVirtualRegister(RC);
8959 MachineInstrBuilder MIB1 =
8960 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8961 .add(Root.getOperand(2));
8962 InsInstrs.push_back(MIB1);
8963 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8965 Opc = AArch64::FMLAv8f16;
8966 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8967 FMAInstKind::Accumulator, &NewVR);
8968 } else {
8969 Opc = AArch64::FMLAv8i16_indexed;
8970 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8971 FMAInstKind::Indexed, &NewVR);
8972 }
8973 break;
8974 }
8976 RC = &AArch64::FPR128RegClass;
8977 Opc = AArch64::FMLSv8f16;
8978 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8980 break;
8982 RC = &AArch64::FPR128RegClass;
8983 Opc = AArch64::FMLSv8i16_indexed;
8984 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8986 break;
8987
8990 RC = &AArch64::FPR128RegClass;
8992 Opc = AArch64::FMLSv2i64_indexed;
8993 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8995 } else {
8996 Opc = AArch64::FMLSv2f64;
8997 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8999 }
9000 break;
9001
9004 RC = &AArch64::FPR128RegClass;
9006 Opc = AArch64::FMLSv4i32_indexed;
9007 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9009 } else {
9010 Opc = AArch64::FMLSv4f32;
9011 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9013 }
9014 break;
9017 RC = &AArch64::FPR64RegClass;
9018 Register NewVR = MRI.createVirtualRegister(RC);
9019 MachineInstrBuilder MIB1 =
9020 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9021 .add(Root.getOperand(2));
9022 InsInstrs.push_back(MIB1);
9023 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9025 Opc = AArch64::FMLAv2i32_indexed;
9026 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9027 FMAInstKind::Indexed, &NewVR);
9028 } else {
9029 Opc = AArch64::FMLAv2f32;
9030 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9031 FMAInstKind::Accumulator, &NewVR);
9032 }
9033 break;
9034 }
9037 RC = &AArch64::FPR128RegClass;
9038 Register NewVR = MRI.createVirtualRegister(RC);
9039 MachineInstrBuilder MIB1 =
9040 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9041 .add(Root.getOperand(2));
9042 InsInstrs.push_back(MIB1);
9043 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9045 Opc = AArch64::FMLAv4i32_indexed;
9046 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9047 FMAInstKind::Indexed, &NewVR);
9048 } else {
9049 Opc = AArch64::FMLAv4f32;
9050 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9051 FMAInstKind::Accumulator, &NewVR);
9052 }
9053 break;
9054 }
9057 RC = &AArch64::FPR128RegClass;
9058 Register NewVR = MRI.createVirtualRegister(RC);
9059 MachineInstrBuilder MIB1 =
9060 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9061 .add(Root.getOperand(2));
9062 InsInstrs.push_back(MIB1);
9063 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9065 Opc = AArch64::FMLAv2i64_indexed;
9066 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9067 FMAInstKind::Indexed, &NewVR);
9068 } else {
9069 Opc = AArch64::FMLAv2f64;
9070 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9071 FMAInstKind::Accumulator, &NewVR);
9072 }
9073 break;
9074 }
9077 unsigned IdxDupOp =
9079 : 2;
9080 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9081 &AArch64::FPR128RegClass, MRI);
9082 break;
9083 }
9086 unsigned IdxDupOp =
9088 : 2;
9089 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9090 &AArch64::FPR128RegClass, MRI);
9091 break;
9092 }
9095 unsigned IdxDupOp =
9097 : 2;
9098 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9099 &AArch64::FPR128_loRegClass, MRI);
9100 break;
9101 }
9104 unsigned IdxDupOp =
9106 : 2;
9107 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9108 &AArch64::FPR128RegClass, MRI);
9109 break;
9110 }
9113 unsigned IdxDupOp =
9115 : 2;
9116 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9117 &AArch64::FPR128_loRegClass, MRI);
9118 break;
9119 }
9121 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9122 break;
9123 }
9125 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9126 Pattern, 4);
9127 break;
9128 }
9130 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9131 Pattern, 8);
9132 break;
9133 }
9135 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9136 Pattern, 16);
9137 break;
9138 }
9139
9140 } // end switch (Pattern)
9141 // Record MUL and ADD/SUB for deletion
9142 if (MUL)
9143 DelInstrs.push_back(MUL);
9144 DelInstrs.push_back(&Root);
9145
9146 // Set the flags on the inserted instructions to be the merged flags of the
9147 // instructions that we have combined.
9148 uint32_t Flags = Root.getFlags();
9149 if (MUL)
9150 Flags = Root.mergeFlagsWith(*MUL);
9151 for (auto *MI : InsInstrs)
9152 MI->setFlags(Flags);
9153}
9154
9155/// Replace csincr-branch sequence by simple conditional branch
9156///
9157/// Examples:
9158/// 1. \code
9159/// csinc w9, wzr, wzr, <condition code>
9160/// tbnz w9, #0, 0x44
9161/// \endcode
9162/// to
9163/// \code
9164/// b.<inverted condition code>
9165/// \endcode
9166///
9167/// 2. \code
9168/// csinc w9, wzr, wzr, <condition code>
9169/// tbz w9, #0, 0x44
9170/// \endcode
9171/// to
9172/// \code
9173/// b.<condition code>
9174/// \endcode
9175///
9176/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9177/// compare's constant operand is power of 2.
9178///
9179/// Examples:
9180/// \code
9181/// and w8, w8, #0x400
9182/// cbnz w8, L1
9183/// \endcode
9184/// to
9185/// \code
9186/// tbnz w8, #10, L1
9187/// \endcode
9188///
9189/// \param MI Conditional Branch
9190/// \return True when the simple conditional branch is generated
9191///
9193 bool IsNegativeBranch = false;
9194 bool IsTestAndBranch = false;
9195 unsigned TargetBBInMI = 0;
9196 switch (MI.getOpcode()) {
9197 default:
9198 llvm_unreachable("Unknown branch instruction?");
9199 case AArch64::Bcc:
9200 case AArch64::CBWPri:
9201 case AArch64::CBXPri:
9202 case AArch64::CBWPrr:
9203 case AArch64::CBXPrr:
9204 return false;
9205 case AArch64::CBZW:
9206 case AArch64::CBZX:
9207 TargetBBInMI = 1;
9208 break;
9209 case AArch64::CBNZW:
9210 case AArch64::CBNZX:
9211 TargetBBInMI = 1;
9212 IsNegativeBranch = true;
9213 break;
9214 case AArch64::TBZW:
9215 case AArch64::TBZX:
9216 TargetBBInMI = 2;
9217 IsTestAndBranch = true;
9218 break;
9219 case AArch64::TBNZW:
9220 case AArch64::TBNZX:
9221 TargetBBInMI = 2;
9222 IsNegativeBranch = true;
9223 IsTestAndBranch = true;
9224 break;
9225 }
9226 // So we increment a zero register and test for bits other
9227 // than bit 0? Conservatively bail out in case the verifier
9228 // missed this case.
9229 if (IsTestAndBranch && MI.getOperand(1).getImm())
9230 return false;
9231
9232 // Find Definition.
9233 assert(MI.getParent() && "Incomplete machine instruction\n");
9234 MachineBasicBlock *MBB = MI.getParent();
9235 MachineFunction *MF = MBB->getParent();
9237 Register VReg = MI.getOperand(0).getReg();
9238 if (!VReg.isVirtual())
9239 return false;
9240
9241 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9242
9243 // Look through COPY instructions to find definition.
9244 while (DefMI->isCopy()) {
9245 Register CopyVReg = DefMI->getOperand(1).getReg();
9246 if (!MRI->hasOneNonDBGUse(CopyVReg))
9247 return false;
9248 if (!MRI->hasOneDef(CopyVReg))
9249 return false;
9250 DefMI = MRI->getVRegDef(CopyVReg);
9251 }
9252
9253 switch (DefMI->getOpcode()) {
9254 default:
9255 return false;
9256 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9257 case AArch64::ANDWri:
9258 case AArch64::ANDXri: {
9259 if (IsTestAndBranch)
9260 return false;
9261 if (DefMI->getParent() != MBB)
9262 return false;
9263 if (!MRI->hasOneNonDBGUse(VReg))
9264 return false;
9265
9266 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9268 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9269 if (!isPowerOf2_64(Mask))
9270 return false;
9271
9272 MachineOperand &MO = DefMI->getOperand(1);
9273 Register NewReg = MO.getReg();
9274 if (!NewReg.isVirtual())
9275 return false;
9276
9277 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9278
9279 MachineBasicBlock &RefToMBB = *MBB;
9280 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9281 DebugLoc DL = MI.getDebugLoc();
9282 unsigned Imm = Log2_64(Mask);
9283 unsigned Opc = (Imm < 32)
9284 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9285 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9286 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9287 .addReg(NewReg)
9288 .addImm(Imm)
9289 .addMBB(TBB);
9290 // Register lives on to the CBZ now.
9291 MO.setIsKill(false);
9292
9293 // For immediate smaller than 32, we need to use the 32-bit
9294 // variant (W) in all cases. Indeed the 64-bit variant does not
9295 // allow to encode them.
9296 // Therefore, if the input register is 64-bit, we need to take the
9297 // 32-bit sub-part.
9298 if (!Is32Bit && Imm < 32)
9299 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9300 MI.eraseFromParent();
9301 return true;
9302 }
9303 // Look for CSINC
9304 case AArch64::CSINCWr:
9305 case AArch64::CSINCXr: {
9306 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9307 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9308 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9309 DefMI->getOperand(2).getReg() == AArch64::XZR))
9310 return false;
9311
9312 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9313 true) != -1)
9314 return false;
9315
9316 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9317 // Convert only when the condition code is not modified between
9318 // the CSINC and the branch. The CC may be used by other
9319 // instructions in between.
9321 return false;
9322 MachineBasicBlock &RefToMBB = *MBB;
9323 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9324 DebugLoc DL = MI.getDebugLoc();
9325 if (IsNegativeBranch)
9327 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9328 MI.eraseFromParent();
9329 return true;
9330 }
9331 }
9332}
9333
9334std::pair<unsigned, unsigned>
9335AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9336 const unsigned Mask = AArch64II::MO_FRAGMENT;
9337 return std::make_pair(TF & Mask, TF & ~Mask);
9338}
9339
9341AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9342 using namespace AArch64II;
9343
9344 static const std::pair<unsigned, const char *> TargetFlags[] = {
9345 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9346 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9347 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9348 {MO_HI12, "aarch64-hi12"}};
9349 return ArrayRef(TargetFlags);
9350}
9351
9353AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9354 using namespace AArch64II;
9355
9356 static const std::pair<unsigned, const char *> TargetFlags[] = {
9357 {MO_COFFSTUB, "aarch64-coffstub"},
9358 {MO_GOT, "aarch64-got"},
9359 {MO_NC, "aarch64-nc"},
9360 {MO_S, "aarch64-s"},
9361 {MO_TLS, "aarch64-tls"},
9362 {MO_DLLIMPORT, "aarch64-dllimport"},
9363 {MO_PREL, "aarch64-prel"},
9364 {MO_TAGGED, "aarch64-tagged"},
9365 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9366 };
9367 return ArrayRef(TargetFlags);
9368}
9369
9371AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9372 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9373 {{MOSuppressPair, "aarch64-suppress-pair"},
9374 {MOStridedAccess, "aarch64-strided-access"}};
9375 return ArrayRef(TargetFlags);
9376}
9377
9378/// Constants defining how certain sequences should be outlined.
9379/// This encompasses how an outlined function should be called, and what kind of
9380/// frame should be emitted for that outlined function.
9381///
9382/// \p MachineOutlinerDefault implies that the function should be called with
9383/// a save and restore of LR to the stack.
9384///
9385/// That is,
9386///
9387/// I1 Save LR OUTLINED_FUNCTION:
9388/// I2 --> BL OUTLINED_FUNCTION I1
9389/// I3 Restore LR I2
9390/// I3
9391/// RET
9392///
9393/// * Call construction overhead: 3 (save + BL + restore)
9394/// * Frame construction overhead: 1 (ret)
9395/// * Requires stack fixups? Yes
9396///
9397/// \p MachineOutlinerTailCall implies that the function is being created from
9398/// a sequence of instructions ending in a return.
9399///
9400/// That is,
9401///
9402/// I1 OUTLINED_FUNCTION:
9403/// I2 --> B OUTLINED_FUNCTION I1
9404/// RET I2
9405/// RET
9406///
9407/// * Call construction overhead: 1 (B)
9408/// * Frame construction overhead: 0 (Return included in sequence)
9409/// * Requires stack fixups? No
9410///
9411/// \p MachineOutlinerNoLRSave implies that the function should be called using
9412/// a BL instruction, but doesn't require LR to be saved and restored. This
9413/// happens when LR is known to be dead.
9414///
9415/// That is,
9416///
9417/// I1 OUTLINED_FUNCTION:
9418/// I2 --> BL OUTLINED_FUNCTION I1
9419/// I3 I2
9420/// I3
9421/// RET
9422///
9423/// * Call construction overhead: 1 (BL)
9424/// * Frame construction overhead: 1 (RET)
9425/// * Requires stack fixups? No
9426///
9427/// \p MachineOutlinerThunk implies that the function is being created from
9428/// a sequence of instructions ending in a call. The outlined function is
9429/// called with a BL instruction, and the outlined function tail-calls the
9430/// original call destination.
9431///
9432/// That is,
9433///
9434/// I1 OUTLINED_FUNCTION:
9435/// I2 --> BL OUTLINED_FUNCTION I1
9436/// BL f I2
9437/// B f
9438/// * Call construction overhead: 1 (BL)
9439/// * Frame construction overhead: 0
9440/// * Requires stack fixups? No
9441///
9442/// \p MachineOutlinerRegSave implies that the function should be called with a
9443/// save and restore of LR to an available register. This allows us to avoid
9444/// stack fixups. Note that this outlining variant is compatible with the
9445/// NoLRSave case.
9446///
9447/// That is,
9448///
9449/// I1 Save LR OUTLINED_FUNCTION:
9450/// I2 --> BL OUTLINED_FUNCTION I1
9451/// I3 Restore LR I2
9452/// I3
9453/// RET
9454///
9455/// * Call construction overhead: 3 (save + BL + restore)
9456/// * Frame construction overhead: 1 (ret)
9457/// * Requires stack fixups? No
9459 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9460 MachineOutlinerTailCall, /// Only emit a branch.
9461 MachineOutlinerNoLRSave, /// Emit a call and return.
9462 MachineOutlinerThunk, /// Emit a call and tail-call.
9463 MachineOutlinerRegSave /// Same as default, but save to a register.
9464};
9465
9471
9473AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9474 MachineFunction *MF = C.getMF();
9475 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9476 const AArch64RegisterInfo *ARI =
9477 static_cast<const AArch64RegisterInfo *>(&TRI);
9478 // Check if there is an available register across the sequence that we can
9479 // use.
9480 for (unsigned Reg : AArch64::GPR64RegClass) {
9481 if (!ARI->isReservedReg(*MF, Reg) &&
9482 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9483 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9484 Reg != AArch64::X17 && // Ditto for X17.
9485 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9486 C.isAvailableInsideSeq(Reg, TRI))
9487 return Reg;
9488 }
9489 return Register();
9490}
9491
9492static bool
9494 const outliner::Candidate &b) {
9495 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9496 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9497
9498 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9499 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9500}
9501
9502static bool
9504 const outliner::Candidate &b) {
9505 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9506 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9507
9508 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9509}
9510
9512 const outliner::Candidate &b) {
9513 const AArch64Subtarget &SubtargetA =
9515 const AArch64Subtarget &SubtargetB =
9516 b.getMF()->getSubtarget<AArch64Subtarget>();
9517 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9518}
9519
9520std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9521AArch64InstrInfo::getOutliningCandidateInfo(
9522 const MachineModuleInfo &MMI,
9523 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9524 unsigned MinRepeats) const {
9525 unsigned SequenceSize = 0;
9526 for (auto &MI : RepeatedSequenceLocs[0])
9527 SequenceSize += getInstSizeInBytes(MI);
9528
9529 unsigned NumBytesToCreateFrame = 0;
9530
9531 // We only allow outlining for functions having exactly matching return
9532 // address signing attributes, i.e., all share the same value for the
9533 // attribute "sign-return-address" and all share the same type of key they
9534 // are signed with.
9535 // Additionally we require all functions to simultaneously either support
9536 // v8.3a features or not. Otherwise an outlined function could get signed
9537 // using dedicated v8.3 instructions and a call from a function that doesn't
9538 // support v8.3 instructions would therefore be invalid.
9539 if (std::adjacent_find(
9540 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9541 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9542 // Return true if a and b are non-equal w.r.t. return address
9543 // signing or support of v8.3a features
9544 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9545 outliningCandidatesSigningKeyConsensus(a, b) &&
9546 outliningCandidatesV8_3OpsConsensus(a, b)) {
9547 return false;
9548 }
9549 return true;
9550 }) != RepeatedSequenceLocs.end()) {
9551 return std::nullopt;
9552 }
9553
9554 // Since at this point all candidates agree on their return address signing
9555 // picking just one is fine. If the candidate functions potentially sign their
9556 // return addresses, the outlined function should do the same. Note that in
9557 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9558 // not certainly true that the outlined function will have to sign its return
9559 // address but this decision is made later, when the decision to outline
9560 // has already been made.
9561 // The same holds for the number of additional instructions we need: On
9562 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9563 // necessary. However, at this point we don't know if the outlined function
9564 // will have a RET instruction so we assume the worst.
9565 const TargetRegisterInfo &TRI = getRegisterInfo();
9566 // Performing a tail call may require extra checks when PAuth is enabled.
9567 // If PAuth is disabled, set it to zero for uniformity.
9568 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9569 if (RepeatedSequenceLocs[0]
9570 .getMF()
9571 ->getInfo<AArch64FunctionInfo>()
9572 ->shouldSignReturnAddress(true)) {
9573 // One PAC and one AUT instructions
9574 NumBytesToCreateFrame += 8;
9575
9576 // PAuth is enabled - set extra tail call cost, if any.
9577 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9578 *RepeatedSequenceLocs[0].getMF());
9579 NumBytesToCheckLRInTCEpilogue =
9581 // Checking the authenticated LR value may significantly impact
9582 // SequenceSize, so account for it for more precise results.
9583 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9584 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9585
9586 // We have to check if sp modifying instructions would get outlined.
9587 // If so we only allow outlining if sp is unchanged overall, so matching
9588 // sub and add instructions are okay to outline, all other sp modifications
9589 // are not
9590 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9591 int SPValue = 0;
9592 for (auto &MI : C) {
9593 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9594 switch (MI.getOpcode()) {
9595 case AArch64::ADDXri:
9596 case AArch64::ADDWri:
9597 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9598 assert(MI.getOperand(2).isImm() &&
9599 "Expected operand to be immediate");
9600 assert(MI.getOperand(1).isReg() &&
9601 "Expected operand to be a register");
9602 // Check if the add just increments sp. If so, we search for
9603 // matching sub instructions that decrement sp. If not, the
9604 // modification is illegal
9605 if (MI.getOperand(1).getReg() == AArch64::SP)
9606 SPValue += MI.getOperand(2).getImm();
9607 else
9608 return true;
9609 break;
9610 case AArch64::SUBXri:
9611 case AArch64::SUBWri:
9612 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9613 assert(MI.getOperand(2).isImm() &&
9614 "Expected operand to be immediate");
9615 assert(MI.getOperand(1).isReg() &&
9616 "Expected operand to be a register");
9617 // Check if the sub just decrements sp. If so, we search for
9618 // matching add instructions that increment sp. If not, the
9619 // modification is illegal
9620 if (MI.getOperand(1).getReg() == AArch64::SP)
9621 SPValue -= MI.getOperand(2).getImm();
9622 else
9623 return true;
9624 break;
9625 default:
9626 return true;
9627 }
9628 }
9629 }
9630 if (SPValue)
9631 return true;
9632 return false;
9633 };
9634 // Remove candidates with illegal stack modifying instructions
9635 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9636
9637 // If the sequence doesn't have enough candidates left, then we're done.
9638 if (RepeatedSequenceLocs.size() < MinRepeats)
9639 return std::nullopt;
9640 }
9641
9642 // Properties about candidate MBBs that hold for all of them.
9643 unsigned FlagsSetInAll = 0xF;
9644
9645 // Compute liveness information for each candidate, and set FlagsSetInAll.
9646 for (outliner::Candidate &C : RepeatedSequenceLocs)
9647 FlagsSetInAll &= C.Flags;
9648
9649 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9650
9651 // Helper lambda which sets call information for every candidate.
9652 auto SetCandidateCallInfo =
9653 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9654 for (outliner::Candidate &C : RepeatedSequenceLocs)
9655 C.setCallInfo(CallID, NumBytesForCall);
9656 };
9657
9658 unsigned FrameID = MachineOutlinerDefault;
9659 NumBytesToCreateFrame += 4;
9660
9661 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9662 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9663 });
9664
9665 // We check to see if CFI Instructions are present, and if they are
9666 // we find the number of CFI Instructions in the candidates.
9667 unsigned CFICount = 0;
9668 for (auto &I : RepeatedSequenceLocs[0]) {
9669 if (I.isCFIInstruction())
9670 CFICount++;
9671 }
9672
9673 // We compare the number of found CFI Instructions to the number of CFI
9674 // instructions in the parent function for each candidate. We must check this
9675 // since if we outline one of the CFI instructions in a function, we have to
9676 // outline them all for correctness. If we do not, the address offsets will be
9677 // incorrect between the two sections of the program.
9678 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9679 std::vector<MCCFIInstruction> CFIInstructions =
9680 C.getMF()->getFrameInstructions();
9681
9682 if (CFICount > 0 && CFICount != CFIInstructions.size())
9683 return std::nullopt;
9684 }
9685
9686 // Returns true if an instructions is safe to fix up, false otherwise.
9687 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9688 if (MI.isCall())
9689 return true;
9690
9691 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9692 !MI.readsRegister(AArch64::SP, &TRI))
9693 return true;
9694
9695 // Any modification of SP will break our code to save/restore LR.
9696 // FIXME: We could handle some instructions which add a constant
9697 // offset to SP, with a bit more work.
9698 if (MI.modifiesRegister(AArch64::SP, &TRI))
9699 return false;
9700
9701 // At this point, we have a stack instruction that we might need to
9702 // fix up. We'll handle it if it's a load or store.
9703 if (MI.mayLoadOrStore()) {
9704 const MachineOperand *Base; // Filled with the base operand of MI.
9705 int64_t Offset; // Filled with the offset of MI.
9706 bool OffsetIsScalable;
9707
9708 // Does it allow us to offset the base operand and is the base the
9709 // register SP?
9710 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9711 !Base->isReg() || Base->getReg() != AArch64::SP)
9712 return false;
9713
9714 // Fixe-up code below assumes bytes.
9715 if (OffsetIsScalable)
9716 return false;
9717
9718 // Find the minimum/maximum offset for this instruction and check
9719 // if fixing it up would be in range.
9720 int64_t MinOffset,
9721 MaxOffset; // Unscaled offsets for the instruction.
9722 // The scale to multiply the offsets by.
9723 TypeSize Scale(0U, false), DummyWidth(0U, false);
9724 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9725
9726 Offset += 16; // Update the offset to what it would be if we outlined.
9727 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9728 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9729 return false;
9730
9731 // It's in range, so we can outline it.
9732 return true;
9733 }
9734
9735 // FIXME: Add handling for instructions like "add x0, sp, #8".
9736
9737 // We can't fix it up, so don't outline it.
9738 return false;
9739 };
9740
9741 // True if it's possible to fix up each stack instruction in this sequence.
9742 // Important for frames/call variants that modify the stack.
9743 bool AllStackInstrsSafe =
9744 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9745
9746 // If the last instruction in any candidate is a terminator, then we should
9747 // tail call all of the candidates.
9748 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9749 FrameID = MachineOutlinerTailCall;
9750 NumBytesToCreateFrame = 0;
9751 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9752 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9753 }
9754
9755 else if (LastInstrOpcode == AArch64::BL ||
9756 ((LastInstrOpcode == AArch64::BLR ||
9757 LastInstrOpcode == AArch64::BLRNoIP) &&
9758 !HasBTI)) {
9759 // FIXME: Do we need to check if the code after this uses the value of LR?
9760 FrameID = MachineOutlinerThunk;
9761 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9762 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9763 }
9764
9765 else {
9766 // We need to decide how to emit calls + frames. We can always emit the same
9767 // frame if we don't need to save to the stack. If we have to save to the
9768 // stack, then we need a different frame.
9769 unsigned NumBytesNoStackCalls = 0;
9770 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9771
9772 // Check if we have to save LR.
9773 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9774 bool LRAvailable =
9776 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9777 : true;
9778 // If we have a noreturn caller, then we're going to be conservative and
9779 // say that we have to save LR. If we don't have a ret at the end of the
9780 // block, then we can't reason about liveness accurately.
9781 //
9782 // FIXME: We can probably do better than always disabling this in
9783 // noreturn functions by fixing up the liveness info.
9784 bool IsNoReturn =
9785 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9786
9787 // Is LR available? If so, we don't need a save.
9788 if (LRAvailable && !IsNoReturn) {
9789 NumBytesNoStackCalls += 4;
9790 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9791 CandidatesWithoutStackFixups.push_back(C);
9792 }
9793
9794 // Is an unused register available? If so, we won't modify the stack, so
9795 // we can outline with the same frame type as those that don't save LR.
9796 else if (findRegisterToSaveLRTo(C)) {
9797 NumBytesNoStackCalls += 12;
9798 C.setCallInfo(MachineOutlinerRegSave, 12);
9799 CandidatesWithoutStackFixups.push_back(C);
9800 }
9801
9802 // Is SP used in the sequence at all? If not, we don't have to modify
9803 // the stack, so we are guaranteed to get the same frame.
9804 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9805 NumBytesNoStackCalls += 12;
9806 C.setCallInfo(MachineOutlinerDefault, 12);
9807 CandidatesWithoutStackFixups.push_back(C);
9808 }
9809
9810 // If we outline this, we need to modify the stack. Pretend we don't
9811 // outline this by saving all of its bytes.
9812 else {
9813 NumBytesNoStackCalls += SequenceSize;
9814 }
9815 }
9816
9817 // If there are no places where we have to save LR, then note that we
9818 // don't have to update the stack. Otherwise, give every candidate the
9819 // default call type, as long as it's safe to do so.
9820 if (!AllStackInstrsSafe ||
9821 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9822 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9823 FrameID = MachineOutlinerNoLRSave;
9824 if (RepeatedSequenceLocs.size() < MinRepeats)
9825 return std::nullopt;
9826 } else {
9827 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9828
9829 // Bugzilla ID: 46767
9830 // TODO: Check if fixing up the stack more than once is safe so we can
9831 // outline these.
9832 //
9833 // An outline resulting in a caller that requires stack fixups at the
9834 // callsite to a callee that also requires stack fixups can happen when
9835 // there are no available registers at the candidate callsite for a
9836 // candidate that itself also has calls.
9837 //
9838 // In other words if function_containing_sequence in the following pseudo
9839 // assembly requires that we save LR at the point of the call, but there
9840 // are no available registers: in this case we save using SP and as a
9841 // result the SP offsets requires stack fixups by multiples of 16.
9842 //
9843 // function_containing_sequence:
9844 // ...
9845 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9846 // call OUTLINED_FUNCTION_N
9847 // restore LR from SP
9848 // ...
9849 //
9850 // OUTLINED_FUNCTION_N:
9851 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9852 // ...
9853 // bl foo
9854 // restore LR from SP
9855 // ret
9856 //
9857 // Because the code to handle more than one stack fixup does not
9858 // currently have the proper checks for legality, these cases will assert
9859 // in the AArch64 MachineOutliner. This is because the code to do this
9860 // needs more hardening, testing, better checks that generated code is
9861 // legal, etc and because it is only verified to handle a single pass of
9862 // stack fixup.
9863 //
9864 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9865 // these cases until they are known to be handled. Bugzilla 46767 is
9866 // referenced in comments at the assert site.
9867 //
9868 // To avoid asserting (or generating non-legal code on noassert builds)
9869 // we remove all candidates which would need more than one stack fixup by
9870 // pruning the cases where the candidate has calls while also having no
9871 // available LR and having no available general purpose registers to copy
9872 // LR to (ie one extra stack save/restore).
9873 //
9874 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9875 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9876 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9877 return (llvm::any_of(C, IsCall)) &&
9878 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9879 !findRegisterToSaveLRTo(C));
9880 });
9881 }
9882 }
9883
9884 // If we dropped all of the candidates, bail out here.
9885 if (RepeatedSequenceLocs.size() < MinRepeats)
9886 return std::nullopt;
9887 }
9888
9889 // Does every candidate's MBB contain a call? If so, then we might have a call
9890 // in the range.
9891 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9892 // Check if the range contains a call. These require a save + restore of the
9893 // link register.
9894 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9895 bool ModStackToSaveLR = false;
9896 if (any_of(drop_end(FirstCand),
9897 [](const MachineInstr &MI) { return MI.isCall(); }))
9898 ModStackToSaveLR = true;
9899
9900 // Handle the last instruction separately. If this is a tail call, then the
9901 // last instruction is a call. We don't want to save + restore in this case.
9902 // However, it could be possible that the last instruction is a call without
9903 // it being valid to tail call this sequence. We should consider this as
9904 // well.
9905 else if (FrameID != MachineOutlinerThunk &&
9906 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9907 ModStackToSaveLR = true;
9908
9909 if (ModStackToSaveLR) {
9910 // We can't fix up the stack. Bail out.
9911 if (!AllStackInstrsSafe)
9912 return std::nullopt;
9913
9914 // Save + restore LR.
9915 NumBytesToCreateFrame += 8;
9916 }
9917 }
9918
9919 // If we have CFI instructions, we can only outline if the outlined section
9920 // can be a tail call
9921 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9922 return std::nullopt;
9923
9924 return std::make_unique<outliner::OutlinedFunction>(
9925 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9926}
9927
9928void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9929 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9930 // If a bunch of candidates reach this point they must agree on their return
9931 // address signing. It is therefore enough to just consider the signing
9932 // behaviour of one of them
9933 const auto &CFn = Candidates.front().getMF()->getFunction();
9934
9935 if (CFn.hasFnAttribute("ptrauth-returns"))
9936 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9937 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9938 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9939 // Since all candidates belong to the same module, just copy the
9940 // function-level attributes of an arbitrary function.
9941 if (CFn.hasFnAttribute("sign-return-address"))
9942 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9943 if (CFn.hasFnAttribute("sign-return-address-key"))
9944 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9945
9946 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9947}
9948
9949bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
9950 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9951 const Function &F = MF.getFunction();
9952
9953 // Can F be deduplicated by the linker? If it can, don't outline from it.
9954 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9955 return false;
9956
9957 // Don't outline from functions with section markings; the program could
9958 // expect that all the code is in the named section.
9959 // FIXME: Allow outlining from multiple functions with the same section
9960 // marking.
9961 if (F.hasSection())
9962 return false;
9963
9964 // Outlining from functions with redzones is unsafe since the outliner may
9965 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9966 // outline from it.
9967 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
9968 if (!AFI || AFI->hasRedZone().value_or(true))
9969 return false;
9970
9971 // FIXME: Determine whether it is safe to outline from functions which contain
9972 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9973 // outlined together and ensure it is safe to outline with async unwind info,
9974 // required for saving & restoring VG around calls.
9975 if (AFI->hasStreamingModeChanges())
9976 return false;
9977
9978 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9980 return false;
9981
9982 // It's safe to outline from MF.
9983 return true;
9984}
9985
9987AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
9988 unsigned &Flags) const {
9990 "Must track liveness!");
9992 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9993 Ranges;
9994 // According to the AArch64 Procedure Call Standard, the following are
9995 // undefined on entry/exit from a function call:
9996 //
9997 // * Registers x16, x17, (and thus w16, w17)
9998 // * Condition codes (and thus the NZCV register)
9999 //
10000 // If any of these registers are used inside or live across an outlined
10001 // function, then they may be modified later, either by the compiler or
10002 // some other tool (like the linker).
10003 //
10004 // To avoid outlining in these situations, partition each block into ranges
10005 // where these registers are dead. We will only outline from those ranges.
10006 LiveRegUnits LRU(getRegisterInfo());
10007 auto AreAllUnsafeRegsDead = [&LRU]() {
10008 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10009 LRU.available(AArch64::NZCV);
10010 };
10011
10012 // We need to know if LR is live across an outlining boundary later on in
10013 // order to decide how we'll create the outlined call, frame, etc.
10014 //
10015 // It's pretty expensive to check this for *every candidate* within a block.
10016 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10017 // to compute liveness from the end of the block for O(n) candidates within
10018 // the block.
10019 //
10020 // So, to improve the average case, let's keep track of liveness from the end
10021 // of the block to the beginning of *every outlinable range*. If we know that
10022 // LR is available in every range we could outline from, then we know that
10023 // we don't need to check liveness for any candidate within that range.
10024 bool LRAvailableEverywhere = true;
10025 // Compute liveness bottom-up.
10026 LRU.addLiveOuts(MBB);
10027 // Update flags that require info about the entire MBB.
10028 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10029 if (MI.isCall() && !MI.isTerminator())
10031 };
10032 // Range: [RangeBegin, RangeEnd)
10033 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10034 unsigned RangeLen;
10035 auto CreateNewRangeStartingAt =
10036 [&RangeBegin, &RangeEnd,
10037 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10038 RangeBegin = NewBegin;
10039 RangeEnd = std::next(RangeBegin);
10040 RangeLen = 0;
10041 };
10042 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10043 // At least one unsafe register is not dead. We do not want to outline at
10044 // this point. If it is long enough to outline from and does not cross a
10045 // bundle boundary, save the range [RangeBegin, RangeEnd).
10046 if (RangeLen <= 1)
10047 return;
10048 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10049 return;
10050 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10051 return;
10052 Ranges.emplace_back(RangeBegin, RangeEnd);
10053 };
10054 // Find the first point where all unsafe registers are dead.
10055 // FIND: <safe instr> <-- end of first potential range
10056 // SKIP: <unsafe def>
10057 // SKIP: ... everything between ...
10058 // SKIP: <unsafe use>
10059 auto FirstPossibleEndPt = MBB.instr_rbegin();
10060 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10061 LRU.stepBackward(*FirstPossibleEndPt);
10062 // Update flags that impact how we outline across the entire block,
10063 // regardless of safety.
10064 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10065 if (AreAllUnsafeRegsDead())
10066 break;
10067 }
10068 // If we exhausted the entire block, we have no safe ranges to outline.
10069 if (FirstPossibleEndPt == MBB.instr_rend())
10070 return Ranges;
10071 // Current range.
10072 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10073 // StartPt points to the first place where all unsafe registers
10074 // are dead (if there is any such point). Begin partitioning the MBB into
10075 // ranges.
10076 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10077 LRU.stepBackward(MI);
10078 UpdateWholeMBBFlags(MI);
10079 if (!AreAllUnsafeRegsDead()) {
10080 SaveRangeIfNonEmpty();
10081 CreateNewRangeStartingAt(MI.getIterator());
10082 continue;
10083 }
10084 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10085 RangeBegin = MI.getIterator();
10086 ++RangeLen;
10087 }
10088 // Above loop misses the last (or only) range. If we are still safe, then
10089 // let's save the range.
10090 if (AreAllUnsafeRegsDead())
10091 SaveRangeIfNonEmpty();
10092 if (Ranges.empty())
10093 return Ranges;
10094 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10095 // the order.
10096 std::reverse(Ranges.begin(), Ranges.end());
10097 // If there is at least one outlinable range where LR is unavailable
10098 // somewhere, remember that.
10099 if (!LRAvailableEverywhere)
10101 return Ranges;
10102}
10103
10105AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10107 unsigned Flags) const {
10108 MachineInstr &MI = *MIT;
10109
10110 // Don't outline anything used for return address signing. The outlined
10111 // function will get signed later if needed
10112 switch (MI.getOpcode()) {
10113 case AArch64::PACM:
10114 case AArch64::PACIASP:
10115 case AArch64::PACIBSP:
10116 case AArch64::PACIASPPC:
10117 case AArch64::PACIBSPPC:
10118 case AArch64::AUTIASP:
10119 case AArch64::AUTIBSP:
10120 case AArch64::AUTIASPPCi:
10121 case AArch64::AUTIASPPCr:
10122 case AArch64::AUTIBSPPCi:
10123 case AArch64::AUTIBSPPCr:
10124 case AArch64::RETAA:
10125 case AArch64::RETAB:
10126 case AArch64::RETAASPPCi:
10127 case AArch64::RETAASPPCr:
10128 case AArch64::RETABSPPCi:
10129 case AArch64::RETABSPPCr:
10130 case AArch64::EMITBKEY:
10131 case AArch64::PAUTH_PROLOGUE:
10132 case AArch64::PAUTH_EPILOGUE:
10134 }
10135
10136 // We can only outline these if we will tail call the outlined function, or
10137 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10138 // in a tail call.
10139 //
10140 // FIXME: If the proper fixups for the offset are implemented, this should be
10141 // possible.
10142 if (MI.isCFIInstruction())
10144
10145 // Is this a terminator for a basic block?
10146 if (MI.isTerminator())
10147 // TargetInstrInfo::getOutliningType has already filtered out anything
10148 // that would break this, so we can allow it here.
10150
10151 // Make sure none of the operands are un-outlinable.
10152 for (const MachineOperand &MOP : MI.operands()) {
10153 // A check preventing CFI indices was here before, but only CFI
10154 // instructions should have those.
10155 assert(!MOP.isCFIIndex());
10156
10157 // If it uses LR or W30 explicitly, then don't touch it.
10158 if (MOP.isReg() && !MOP.isImplicit() &&
10159 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10161 }
10162
10163 // Special cases for instructions that can always be outlined, but will fail
10164 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10165 // be outlined because they don't require a *specific* value to be in LR.
10166 if (MI.getOpcode() == AArch64::ADRP)
10168
10169 // If MI is a call we might be able to outline it. We don't want to outline
10170 // any calls that rely on the position of items on the stack. When we outline
10171 // something containing a call, we have to emit a save and restore of LR in
10172 // the outlined function. Currently, this always happens by saving LR to the
10173 // stack. Thus, if we outline, say, half the parameters for a function call
10174 // plus the call, then we'll break the callee's expectations for the layout
10175 // of the stack.
10176 //
10177 // FIXME: Allow calls to functions which construct a stack frame, as long
10178 // as they don't access arguments on the stack.
10179 // FIXME: Figure out some way to analyze functions defined in other modules.
10180 // We should be able to compute the memory usage based on the IR calling
10181 // convention, even if we can't see the definition.
10182 if (MI.isCall()) {
10183 // Get the function associated with the call. Look at each operand and find
10184 // the one that represents the callee and get its name.
10185 const Function *Callee = nullptr;
10186 for (const MachineOperand &MOP : MI.operands()) {
10187 if (MOP.isGlobal()) {
10188 Callee = dyn_cast<Function>(MOP.getGlobal());
10189 break;
10190 }
10191 }
10192
10193 // Never outline calls to mcount. There isn't any rule that would require
10194 // this, but the Linux kernel's "ftrace" feature depends on it.
10195 if (Callee && Callee->getName() == "\01_mcount")
10197
10198 // If we don't know anything about the callee, assume it depends on the
10199 // stack layout of the caller. In that case, it's only legal to outline
10200 // as a tail-call. Explicitly list the call instructions we know about so we
10201 // don't get unexpected results with call pseudo-instructions.
10202 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10203 if (MI.getOpcode() == AArch64::BLR ||
10204 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10205 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10206
10207 if (!Callee)
10208 return UnknownCallOutlineType;
10209
10210 // We have a function we have information about. Check it if it's something
10211 // can safely outline.
10212 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10213
10214 // We don't know what's going on with the callee at all. Don't touch it.
10215 if (!CalleeMF)
10216 return UnknownCallOutlineType;
10217
10218 // Check if we know anything about the callee saves on the function. If we
10219 // don't, then don't touch it, since that implies that we haven't
10220 // computed anything about its stack frame yet.
10221 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10222 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10223 MFI.getNumObjects() > 0)
10224 return UnknownCallOutlineType;
10225
10226 // At this point, we can say that CalleeMF ought to not pass anything on the
10227 // stack. Therefore, we can outline it.
10229 }
10230
10231 // Don't touch the link register or W30.
10232 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10233 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10235
10236 // Don't outline BTI instructions, because that will prevent the outlining
10237 // site from being indirectly callable.
10238 if (hasBTISemantics(MI))
10240
10242}
10243
10244void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10245 for (MachineInstr &MI : MBB) {
10246 const MachineOperand *Base;
10247 TypeSize Width(0, false);
10248 int64_t Offset;
10249 bool OffsetIsScalable;
10250
10251 // Is this a load or store with an immediate offset with SP as the base?
10252 if (!MI.mayLoadOrStore() ||
10253 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10254 &RI) ||
10255 (Base->isReg() && Base->getReg() != AArch64::SP))
10256 continue;
10257
10258 // It is, so we have to fix it up.
10259 TypeSize Scale(0U, false);
10260 int64_t Dummy1, Dummy2;
10261
10262 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10263 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10264 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10265 assert(Scale != 0 && "Unexpected opcode!");
10266 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10267
10268 // We've pushed the return address to the stack, so add 16 to the offset.
10269 // This is safe, since we already checked if it would overflow when we
10270 // checked if this instruction was legal to outline.
10271 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10272 StackOffsetOperand.setImm(NewImm);
10273 }
10274}
10275
10277 const AArch64InstrInfo *TII,
10278 bool ShouldSignReturnAddr) {
10279 if (!ShouldSignReturnAddr)
10280 return;
10281
10282 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10284 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10285 TII->get(AArch64::PAUTH_EPILOGUE))
10287}
10288
10289void AArch64InstrInfo::buildOutlinedFrame(
10291 const outliner::OutlinedFunction &OF) const {
10292
10293 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10294
10295 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10296 FI->setOutliningStyle("Tail Call");
10297 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10298 // For thunk outlining, rewrite the last instruction from a call to a
10299 // tail-call.
10300 MachineInstr *Call = &*--MBB.instr_end();
10301 unsigned TailOpcode;
10302 if (Call->getOpcode() == AArch64::BL) {
10303 TailOpcode = AArch64::TCRETURNdi;
10304 } else {
10305 assert(Call->getOpcode() == AArch64::BLR ||
10306 Call->getOpcode() == AArch64::BLRNoIP);
10307 TailOpcode = AArch64::TCRETURNriALL;
10308 }
10309 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10310 .add(Call->getOperand(0))
10311 .addImm(0);
10312 MBB.insert(MBB.end(), TC);
10314
10315 FI->setOutliningStyle("Thunk");
10316 }
10317
10318 bool IsLeafFunction = true;
10319
10320 // Is there a call in the outlined range?
10321 auto IsNonTailCall = [](const MachineInstr &MI) {
10322 return MI.isCall() && !MI.isReturn();
10323 };
10324
10325 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10326 // Fix up the instructions in the range, since we're going to modify the
10327 // stack.
10328
10329 // Bugzilla ID: 46767
10330 // TODO: Check if fixing up twice is safe so we can outline these.
10331 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10332 "Can only fix up stack references once");
10333 fixupPostOutline(MBB);
10334
10335 IsLeafFunction = false;
10336
10337 // LR has to be a live in so that we can save it.
10338 if (!MBB.isLiveIn(AArch64::LR))
10339 MBB.addLiveIn(AArch64::LR);
10340
10343
10344 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10345 OF.FrameConstructionID == MachineOutlinerThunk)
10346 Et = std::prev(MBB.end());
10347
10348 // Insert a save before the outlined region
10349 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10350 .addReg(AArch64::SP, RegState::Define)
10351 .addReg(AArch64::LR)
10352 .addReg(AArch64::SP)
10353 .addImm(-16);
10354 It = MBB.insert(It, STRXpre);
10355
10356 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10357 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10358
10359 // Add a CFI saying the stack was moved 16 B down.
10360 CFIBuilder.buildDefCFAOffset(16);
10361
10362 // Add a CFI saying that the LR that we want to find is now 16 B higher
10363 // than before.
10364 CFIBuilder.buildOffset(AArch64::LR, -16);
10365 }
10366
10367 // Insert a restore before the terminator for the function.
10368 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10369 .addReg(AArch64::SP, RegState::Define)
10370 .addReg(AArch64::LR, RegState::Define)
10371 .addReg(AArch64::SP)
10372 .addImm(16);
10373 Et = MBB.insert(Et, LDRXpost);
10374 }
10375
10376 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10377
10378 // If this is a tail call outlined function, then there's already a return.
10379 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10380 OF.FrameConstructionID == MachineOutlinerThunk) {
10381 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10382 return;
10383 }
10384
10385 // It's not a tail call, so we have to insert the return ourselves.
10386
10387 // LR has to be a live in so that we can return to it.
10388 if (!MBB.isLiveIn(AArch64::LR))
10389 MBB.addLiveIn(AArch64::LR);
10390
10391 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10392 .addReg(AArch64::LR);
10393 MBB.insert(MBB.end(), ret);
10394
10395 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10396
10397 FI->setOutliningStyle("Function");
10398
10399 // Did we have to modify the stack by saving the link register?
10400 if (OF.FrameConstructionID != MachineOutlinerDefault)
10401 return;
10402
10403 // We modified the stack.
10404 // Walk over the basic block and fix up all the stack accesses.
10405 fixupPostOutline(MBB);
10406}
10407
10408MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10411
10412 // Are we tail calling?
10413 if (C.CallConstructionID == MachineOutlinerTailCall) {
10414 // If yes, then we can just branch to the label.
10415 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10416 .addGlobalAddress(M.getNamedValue(MF.getName()))
10417 .addImm(0));
10418 return It;
10419 }
10420
10421 // Are we saving the link register?
10422 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10423 C.CallConstructionID == MachineOutlinerThunk) {
10424 // No, so just insert the call.
10425 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10426 .addGlobalAddress(M.getNamedValue(MF.getName())));
10427 return It;
10428 }
10429
10430 // We want to return the spot where we inserted the call.
10432
10433 // Instructions for saving and restoring LR around the call instruction we're
10434 // going to insert.
10435 MachineInstr *Save;
10436 MachineInstr *Restore;
10437 // Can we save to a register?
10438 if (C.CallConstructionID == MachineOutlinerRegSave) {
10439 // FIXME: This logic should be sunk into a target-specific interface so that
10440 // we don't have to recompute the register.
10441 Register Reg = findRegisterToSaveLRTo(C);
10442 assert(Reg && "No callee-saved register available?");
10443
10444 // LR has to be a live in so that we can save it.
10445 if (!MBB.isLiveIn(AArch64::LR))
10446 MBB.addLiveIn(AArch64::LR);
10447
10448 // Save and restore LR from Reg.
10449 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10450 .addReg(AArch64::XZR)
10451 .addReg(AArch64::LR)
10452 .addImm(0);
10453 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10454 .addReg(AArch64::XZR)
10455 .addReg(Reg)
10456 .addImm(0);
10457 } else {
10458 // We have the default case. Save and restore from SP.
10459 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10460 .addReg(AArch64::SP, RegState::Define)
10461 .addReg(AArch64::LR)
10462 .addReg(AArch64::SP)
10463 .addImm(-16);
10464 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10465 .addReg(AArch64::SP, RegState::Define)
10466 .addReg(AArch64::LR, RegState::Define)
10467 .addReg(AArch64::SP)
10468 .addImm(16);
10469 }
10470
10471 It = MBB.insert(It, Save);
10472 It++;
10473
10474 // Insert the call.
10475 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10476 .addGlobalAddress(M.getNamedValue(MF.getName())));
10477 CallPt = It;
10478 It++;
10479
10480 It = MBB.insert(It, Restore);
10481 return CallPt;
10482}
10483
10484bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10485 MachineFunction &MF) const {
10486 return MF.getFunction().hasMinSize();
10487}
10488
10489void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10491 DebugLoc &DL,
10492 bool AllowSideEffects) const {
10493 const MachineFunction &MF = *MBB.getParent();
10494 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10495 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10496
10497 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10498 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10499 } else if (STI.isSVEorStreamingSVEAvailable()) {
10500 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10501 .addImm(0)
10502 .addImm(0);
10503 } else if (STI.isNeonAvailable()) {
10504 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10505 .addImm(0);
10506 } else {
10507 // This is a streaming-compatible function without SVE. We don't have full
10508 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10509 // So given `movi v..` would be illegal use `fmov d..` instead.
10510 assert(STI.hasNEON() && "Expected to have NEON.");
10511 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10512 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10513 }
10514}
10515
10516std::optional<DestSourcePair>
10518
10519 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10520 // and zero immediate operands used as an alias for mov instruction.
10521 if (((MI.getOpcode() == AArch64::ORRWrs &&
10522 MI.getOperand(1).getReg() == AArch64::WZR &&
10523 MI.getOperand(3).getImm() == 0x0) ||
10524 (MI.getOpcode() == AArch64::ORRWrr &&
10525 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10526 // Check that the w->w move is not a zero-extending w->x mov.
10527 (!MI.getOperand(0).getReg().isVirtual() ||
10528 MI.getOperand(0).getSubReg() == 0) &&
10529 (!MI.getOperand(0).getReg().isPhysical() ||
10530 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10531 /*TRI=*/nullptr) == -1))
10532 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10533
10534 if (MI.getOpcode() == AArch64::ORRXrs &&
10535 MI.getOperand(1).getReg() == AArch64::XZR &&
10536 MI.getOperand(3).getImm() == 0x0)
10537 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10538
10539 return std::nullopt;
10540}
10541
10542std::optional<DestSourcePair>
10544 if ((MI.getOpcode() == AArch64::ORRWrs &&
10545 MI.getOperand(1).getReg() == AArch64::WZR &&
10546 MI.getOperand(3).getImm() == 0x0) ||
10547 (MI.getOpcode() == AArch64::ORRWrr &&
10548 MI.getOperand(1).getReg() == AArch64::WZR))
10549 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10550 return std::nullopt;
10551}
10552
10553std::optional<RegImmPair>
10554AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10555 int Sign = 1;
10556 int64_t Offset = 0;
10557
10558 // TODO: Handle cases where Reg is a super- or sub-register of the
10559 // destination register.
10560 const MachineOperand &Op0 = MI.getOperand(0);
10561 if (!Op0.isReg() || Reg != Op0.getReg())
10562 return std::nullopt;
10563
10564 switch (MI.getOpcode()) {
10565 default:
10566 return std::nullopt;
10567 case AArch64::SUBWri:
10568 case AArch64::SUBXri:
10569 case AArch64::SUBSWri:
10570 case AArch64::SUBSXri:
10571 Sign *= -1;
10572 [[fallthrough]];
10573 case AArch64::ADDSWri:
10574 case AArch64::ADDSXri:
10575 case AArch64::ADDWri:
10576 case AArch64::ADDXri: {
10577 // TODO: Third operand can be global address (usually some string).
10578 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10579 !MI.getOperand(2).isImm())
10580 return std::nullopt;
10581 int Shift = MI.getOperand(3).getImm();
10582 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10583 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10584 }
10585 }
10586 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10587}
10588
10589/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10590/// the destination register then, if possible, describe the value in terms of
10591/// the source register.
10592static std::optional<ParamLoadedValue>
10594 const TargetInstrInfo *TII,
10595 const TargetRegisterInfo *TRI) {
10596 auto DestSrc = TII->isCopyLikeInstr(MI);
10597 if (!DestSrc)
10598 return std::nullopt;
10599
10600 Register DestReg = DestSrc->Destination->getReg();
10601 Register SrcReg = DestSrc->Source->getReg();
10602
10603 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10604
10605 // If the described register is the destination, just return the source.
10606 if (DestReg == DescribedReg)
10607 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10608
10609 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10610 if (MI.getOpcode() == AArch64::ORRWrs &&
10611 TRI->isSuperRegister(DestReg, DescribedReg))
10612 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10613
10614 // We may need to describe the lower part of a ORRXrs move.
10615 if (MI.getOpcode() == AArch64::ORRXrs &&
10616 TRI->isSubRegister(DestReg, DescribedReg)) {
10617 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10618 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10619 }
10620
10621 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10622 "Unhandled ORR[XW]rs copy case");
10623
10624 return std::nullopt;
10625}
10626
10627bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10628 // Functions cannot be split to different sections on AArch64 if they have
10629 // a red zone. This is because relaxing a cross-section branch may require
10630 // incrementing the stack pointer to spill a register, which would overwrite
10631 // the red zone.
10632 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10633 return false;
10634
10636}
10637
10638bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10639 const MachineBasicBlock &MBB) const {
10640 // Asm Goto blocks can contain conditional branches to goto labels, which can
10641 // get moved out of range of the branch instruction.
10642 auto isAsmGoto = [](const MachineInstr &MI) {
10643 return MI.getOpcode() == AArch64::INLINEASM_BR;
10644 };
10645 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10646 return false;
10647
10648 // Because jump tables are label-relative instead of table-relative, they all
10649 // must be in the same section or relocation fixup handling will fail.
10650
10651 // Check if MBB is a jump table target
10652 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10653 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10654 return llvm::is_contained(JTE.MBBs, &MBB);
10655 };
10656 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10657 return false;
10658
10659 // Check if MBB contains a jump table lookup
10660 for (const MachineInstr &MI : MBB) {
10661 switch (MI.getOpcode()) {
10662 case TargetOpcode::G_BRJT:
10663 case AArch64::JumpTableDest32:
10664 case AArch64::JumpTableDest16:
10665 case AArch64::JumpTableDest8:
10666 return false;
10667 default:
10668 continue;
10669 }
10670 }
10671
10672 // MBB isn't a special case, so it's safe to be split to the cold section.
10673 return true;
10674}
10675
10676std::optional<ParamLoadedValue>
10677AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10678 Register Reg) const {
10679 const MachineFunction *MF = MI.getMF();
10680 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10681 switch (MI.getOpcode()) {
10682 case AArch64::MOVZWi:
10683 case AArch64::MOVZXi: {
10684 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10685 // 64-bit parameters, so we need to consider super-registers.
10686 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10687 return std::nullopt;
10688
10689 if (!MI.getOperand(1).isImm())
10690 return std::nullopt;
10691 int64_t Immediate = MI.getOperand(1).getImm();
10692 int Shift = MI.getOperand(2).getImm();
10693 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10694 nullptr);
10695 }
10696 case AArch64::ORRWrs:
10697 case AArch64::ORRXrs:
10698 return describeORRLoadedValue(MI, Reg, this, TRI);
10699 }
10700
10702}
10703
10704bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10705 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10706 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10707 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10708 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10709
10710 // Anyexts are nops.
10711 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10712 return true;
10713
10714 Register DefReg = ExtMI.getOperand(0).getReg();
10715 if (!MRI.hasOneNonDBGUse(DefReg))
10716 return false;
10717
10718 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10719 // addressing mode.
10720 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10721 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10722}
10723
10724uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10725 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10726}
10727
10728bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10729 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10730}
10731
10732bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10733 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10734}
10735
10736unsigned int
10737AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10738 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10739}
10740
10741bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10742 unsigned Scale) const {
10743 if (Offset && Scale)
10744 return false;
10745
10746 // Check Reg + Imm
10747 if (!Scale) {
10748 // 9-bit signed offset
10749 if (isInt<9>(Offset))
10750 return true;
10751
10752 // 12-bit unsigned offset
10753 unsigned Shift = Log2_64(NumBytes);
10754 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10755 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10756 (Offset >> Shift) << Shift == Offset)
10757 return true;
10758 return false;
10759 }
10760
10761 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10762 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10763}
10764
10766 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10767 return AArch64::BLRNoIP;
10768 else
10769 return AArch64::BLR;
10770}
10771
10774 Register TargetReg, bool FrameSetup) const {
10775 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10776
10777 MachineBasicBlock &MBB = *MBBI->getParent();
10778 MachineFunction &MF = *MBB.getParent();
10779 const AArch64InstrInfo *TII =
10780 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10781 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10782 DebugLoc DL = MBB.findDebugLoc(MBBI);
10783
10784 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10785 MachineBasicBlock *LoopTestMBB =
10786 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10787 MF.insert(MBBInsertPoint, LoopTestMBB);
10788 MachineBasicBlock *LoopBodyMBB =
10789 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10790 MF.insert(MBBInsertPoint, LoopBodyMBB);
10791 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10792 MF.insert(MBBInsertPoint, ExitMBB);
10793 MachineInstr::MIFlag Flags =
10795
10796 // LoopTest:
10797 // SUB SP, SP, #ProbeSize
10798 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10799 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10800
10801 // CMP SP, TargetReg
10802 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10803 AArch64::XZR)
10804 .addReg(AArch64::SP)
10805 .addReg(TargetReg)
10807 .setMIFlags(Flags);
10808
10809 // B.<Cond> LoopExit
10810 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10812 .addMBB(ExitMBB)
10813 .setMIFlags(Flags);
10814
10815 // STR XZR, [SP]
10816 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10817 .addReg(AArch64::XZR)
10818 .addReg(AArch64::SP)
10819 .addImm(0)
10820 .setMIFlags(Flags);
10821
10822 // B loop
10823 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10824 .addMBB(LoopTestMBB)
10825 .setMIFlags(Flags);
10826
10827 // LoopExit:
10828 // MOV SP, TargetReg
10829 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10830 .addReg(TargetReg)
10831 .addImm(0)
10833 .setMIFlags(Flags);
10834
10835 // LDR XZR, [SP]
10836 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10837 .addReg(AArch64::XZR, RegState::Define)
10838 .addReg(AArch64::SP)
10839 .addImm(0)
10840 .setMIFlags(Flags);
10841
10842 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10844
10845 LoopTestMBB->addSuccessor(ExitMBB);
10846 LoopTestMBB->addSuccessor(LoopBodyMBB);
10847 LoopBodyMBB->addSuccessor(LoopTestMBB);
10848 MBB.addSuccessor(LoopTestMBB);
10849
10850 // Update liveins.
10851 if (MF.getRegInfo().reservedRegsFrozen())
10852 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10853
10854 return ExitMBB->begin();
10855}
10856
10857namespace {
10858class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10859 MachineFunction *MF;
10860 const TargetInstrInfo *TII;
10861 const TargetRegisterInfo *TRI;
10863
10864 /// The block of the loop
10865 MachineBasicBlock *LoopBB;
10866 /// The conditional branch of the loop
10867 MachineInstr *CondBranch;
10868 /// The compare instruction for loop control
10869 MachineInstr *Comp;
10870 /// The number of the operand of the loop counter value in Comp
10871 unsigned CompCounterOprNum;
10872 /// The instruction that updates the loop counter value
10873 MachineInstr *Update;
10874 /// The number of the operand of the loop counter value in Update
10875 unsigned UpdateCounterOprNum;
10876 /// The initial value of the loop counter
10877 Register Init;
10878 /// True iff Update is a predecessor of Comp
10879 bool IsUpdatePriorComp;
10880
10881 /// The normalized condition used by createTripCountGreaterCondition()
10883
10884public:
10885 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10886 MachineInstr *Comp, unsigned CompCounterOprNum,
10887 MachineInstr *Update, unsigned UpdateCounterOprNum,
10888 Register Init, bool IsUpdatePriorComp,
10890 : MF(Comp->getParent()->getParent()),
10891 TII(MF->getSubtarget().getInstrInfo()),
10892 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10893 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10894 CompCounterOprNum(CompCounterOprNum), Update(Update),
10895 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10896 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10897
10898 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10899 // Make the instructions for loop control be placed in stage 0.
10900 // The predecessors of Comp are considered by the caller.
10901 return MI == Comp;
10902 }
10903
10904 std::optional<bool> createTripCountGreaterCondition(
10905 int TC, MachineBasicBlock &MBB,
10906 SmallVectorImpl<MachineOperand> &CondParam) override {
10907 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10908 // Cond is normalized for such use.
10909 // The predecessors of the branch are assumed to have already been inserted.
10910 CondParam = Cond;
10911 return {};
10912 }
10913
10914 void createRemainingIterationsGreaterCondition(
10915 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10916 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10917
10918 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10919
10920 void adjustTripCount(int TripCountAdjust) override {}
10921
10922 bool isMVEExpanderSupported() override { return true; }
10923};
10924} // namespace
10925
10926/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10927/// is replaced by ReplaceReg. The output register is newly created.
10928/// The other operands are unchanged from MI.
10929static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10930 Register ReplaceReg, MachineBasicBlock &MBB,
10931 MachineBasicBlock::iterator InsertTo) {
10932 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10933 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10934 const TargetRegisterInfo *TRI =
10935 MBB.getParent()->getSubtarget().getRegisterInfo();
10936 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
10937 Register Result = 0;
10938 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10939 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10940 Result = MRI.createVirtualRegister(
10941 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10942 NewMI->getOperand(I).setReg(Result);
10943 } else if (I == ReplaceOprNum) {
10944 MRI.constrainRegClass(
10945 ReplaceReg,
10946 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
10947 NewMI->getOperand(I).setReg(ReplaceReg);
10948 }
10949 }
10950 MBB.insert(InsertTo, NewMI);
10951 return Result;
10952}
10953
10954void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10957 // Create and accumulate conditions for next TC iterations.
10958 // Example:
10959 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10960 // # iteration of the kernel
10961 //
10962 // # insert the following instructions
10963 // cond = CSINCXr 0, 0, C, implicit $nzcv
10964 // counter = ADDXri counter, 1 # clone from this->Update
10965 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10966 // cond = CSINCXr cond, cond, C, implicit $nzcv
10967 // ... (repeat TC times)
10968 // SUBSXri cond, 0, implicit-def $nzcv
10969
10970 assert(CondBranch->getOpcode() == AArch64::Bcc);
10971 // CondCode to exit the loop
10973 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10974 if (CondBranch->getOperand(1).getMBB() == LoopBB)
10976
10977 // Accumulate conditions to exit the loop
10978 Register AccCond = AArch64::XZR;
10979
10980 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10981 auto AccumulateCond = [&](Register CurCond,
10983 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
10984 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
10985 .addReg(NewCond, RegState::Define)
10986 .addReg(CurCond)
10987 .addReg(CurCond)
10989 return NewCond;
10990 };
10991
10992 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
10993 // Update and Comp for I==0 are already exists in MBB
10994 // (MBB is an unrolled kernel)
10995 Register Counter;
10996 for (int I = 0; I <= TC; ++I) {
10997 Register NextCounter;
10998 if (I != 0)
10999 NextCounter =
11000 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11001
11002 AccCond = AccumulateCond(AccCond, CC);
11003
11004 if (I != TC) {
11005 if (I == 0) {
11006 if (Update != Comp && IsUpdatePriorComp) {
11007 Counter =
11008 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11009 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11010 MBB.end());
11011 } else {
11012 // can use already calculated value
11013 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11014 }
11015 } else if (Update != Comp) {
11016 NextCounter =
11017 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11018 }
11019 }
11020 Counter = NextCounter;
11021 }
11022 } else {
11023 Register Counter;
11024 if (LastStage0Insts.empty()) {
11025 // use initial counter value (testing if the trip count is sufficient to
11026 // be executed by pipelined code)
11027 Counter = Init;
11028 if (IsUpdatePriorComp)
11029 Counter =
11030 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11031 } else {
11032 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11033 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11034 }
11035
11036 for (int I = 0; I <= TC; ++I) {
11037 Register NextCounter;
11038 NextCounter =
11039 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11040 AccCond = AccumulateCond(AccCond, CC);
11041 if (I != TC && Update != Comp)
11042 NextCounter =
11043 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11044 Counter = NextCounter;
11045 }
11046 }
11047
11048 // If AccCond == 0, the remainder is greater than TC.
11049 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11050 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11051 .addReg(AccCond)
11052 .addImm(0)
11053 .addImm(0);
11054 Cond.clear();
11056}
11057
11058static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11059 Register &RegMBB, Register &RegOther) {
11060 assert(Phi.getNumOperands() == 5);
11061 if (Phi.getOperand(2).getMBB() == MBB) {
11062 RegMBB = Phi.getOperand(1).getReg();
11063 RegOther = Phi.getOperand(3).getReg();
11064 } else {
11065 assert(Phi.getOperand(4).getMBB() == MBB);
11066 RegMBB = Phi.getOperand(3).getReg();
11067 RegOther = Phi.getOperand(1).getReg();
11068 }
11069}
11070
11072 if (!Reg.isVirtual())
11073 return false;
11074 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11075 return MRI.getVRegDef(Reg)->getParent() != BB;
11076}
11077
11078/// If Reg is an induction variable, return true and set some parameters
11079static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11080 MachineInstr *&UpdateInst,
11081 unsigned &UpdateCounterOprNum, Register &InitReg,
11082 bool &IsUpdatePriorComp) {
11083 // Example:
11084 //
11085 // Preheader:
11086 // InitReg = ...
11087 // LoopBB:
11088 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11089 // Reg = COPY Reg0 ; COPY is ignored.
11090 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11091 // ; Reg is the value calculated in the previous
11092 // ; iteration, so IsUpdatePriorComp == false.
11093
11094 if (LoopBB->pred_size() != 2)
11095 return false;
11096 if (!Reg.isVirtual())
11097 return false;
11098 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11099 UpdateInst = nullptr;
11100 UpdateCounterOprNum = 0;
11101 InitReg = 0;
11102 IsUpdatePriorComp = true;
11103 Register CurReg = Reg;
11104 while (true) {
11105 MachineInstr *Def = MRI.getVRegDef(CurReg);
11106 if (Def->getParent() != LoopBB)
11107 return false;
11108 if (Def->isCopy()) {
11109 // Ignore copy instructions unless they contain subregisters
11110 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11111 return false;
11112 CurReg = Def->getOperand(1).getReg();
11113 } else if (Def->isPHI()) {
11114 if (InitReg != 0)
11115 return false;
11116 if (!UpdateInst)
11117 IsUpdatePriorComp = false;
11118 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11119 } else {
11120 if (UpdateInst)
11121 return false;
11122 switch (Def->getOpcode()) {
11123 case AArch64::ADDSXri:
11124 case AArch64::ADDSWri:
11125 case AArch64::SUBSXri:
11126 case AArch64::SUBSWri:
11127 case AArch64::ADDXri:
11128 case AArch64::ADDWri:
11129 case AArch64::SUBXri:
11130 case AArch64::SUBWri:
11131 UpdateInst = Def;
11132 UpdateCounterOprNum = 1;
11133 break;
11134 case AArch64::ADDSXrr:
11135 case AArch64::ADDSWrr:
11136 case AArch64::SUBSXrr:
11137 case AArch64::SUBSWrr:
11138 case AArch64::ADDXrr:
11139 case AArch64::ADDWrr:
11140 case AArch64::SUBXrr:
11141 case AArch64::SUBWrr:
11142 UpdateInst = Def;
11143 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11144 UpdateCounterOprNum = 1;
11145 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11146 UpdateCounterOprNum = 2;
11147 else
11148 return false;
11149 break;
11150 default:
11151 return false;
11152 }
11153 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11154 }
11155
11156 if (!CurReg.isVirtual())
11157 return false;
11158 if (Reg == CurReg)
11159 break;
11160 }
11161
11162 if (!UpdateInst)
11163 return false;
11164
11165 return true;
11166}
11167
11168std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11170 // Accept loops that meet the following conditions
11171 // * The conditional branch is BCC
11172 // * The compare instruction is ADDS/SUBS/WHILEXX
11173 // * One operand of the compare is an induction variable and the other is a
11174 // loop invariant value
11175 // * The induction variable is incremented/decremented by a single instruction
11176 // * Does not contain CALL or instructions which have unmodeled side effects
11177
11178 for (MachineInstr &MI : *LoopBB)
11179 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11180 // This instruction may use NZCV, which interferes with the instruction to
11181 // be inserted for loop control.
11182 return nullptr;
11183
11184 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11186 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11187 return nullptr;
11188
11189 // Infinite loops are not supported
11190 if (TBB == LoopBB && FBB == LoopBB)
11191 return nullptr;
11192
11193 // Must be conditional branch
11194 if (TBB != LoopBB && FBB == nullptr)
11195 return nullptr;
11196
11197 assert((TBB == LoopBB || FBB == LoopBB) &&
11198 "The Loop must be a single-basic-block loop");
11199
11200 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11202
11203 if (CondBranch->getOpcode() != AArch64::Bcc)
11204 return nullptr;
11205
11206 // Normalization for createTripCountGreaterCondition()
11207 if (TBB == LoopBB)
11209
11210 MachineInstr *Comp = nullptr;
11211 unsigned CompCounterOprNum = 0;
11212 for (MachineInstr &MI : reverse(*LoopBB)) {
11213 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11214 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11215 // operands is a loop invariant value
11216
11217 switch (MI.getOpcode()) {
11218 case AArch64::SUBSXri:
11219 case AArch64::SUBSWri:
11220 case AArch64::ADDSXri:
11221 case AArch64::ADDSWri:
11222 Comp = &MI;
11223 CompCounterOprNum = 1;
11224 break;
11225 case AArch64::ADDSWrr:
11226 case AArch64::ADDSXrr:
11227 case AArch64::SUBSWrr:
11228 case AArch64::SUBSXrr:
11229 Comp = &MI;
11230 break;
11231 default:
11232 if (isWhileOpcode(MI.getOpcode())) {
11233 Comp = &MI;
11234 break;
11235 }
11236 return nullptr;
11237 }
11238
11239 if (CompCounterOprNum == 0) {
11240 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11241 CompCounterOprNum = 2;
11242 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11243 CompCounterOprNum = 1;
11244 else
11245 return nullptr;
11246 }
11247 break;
11248 }
11249 }
11250 if (!Comp)
11251 return nullptr;
11252
11253 MachineInstr *Update = nullptr;
11254 Register Init;
11255 bool IsUpdatePriorComp;
11256 unsigned UpdateCounterOprNum;
11257 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11258 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11259 return nullptr;
11260
11261 return std::make_unique<AArch64PipelinerLoopInfo>(
11262 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11263 Init, IsUpdatePriorComp, Cond);
11264}
11265
11266/// verifyInstruction - Perform target specific instruction verification.
11267bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11268 StringRef &ErrInfo) const {
11269
11270 // Verify that immediate offsets on load/store instructions are within range.
11271 // Stack objects with an FI operand are excluded as they can be fixed up
11272 // during PEI.
11273 TypeSize Scale(0U, false), Width(0U, false);
11274 int64_t MinOffset, MaxOffset;
11275 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11276 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11277 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11278 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11279 if (Imm < MinOffset || Imm > MaxOffset) {
11280 ErrInfo = "Unexpected immediate on load/store instruction";
11281 return false;
11282 }
11283 }
11284 }
11285 return true;
11286}
11287
11288#define GET_INSTRINFO_HELPERS
11289#define GET_INSTRMAP_INFO
11290#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod(const MachineFunction &MF) const
Choose a method of checking LR before performing a tail call.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:107
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
static constexpr unsigned NoRegister
Definition MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
Represents a location in source code.
Definition SMLoc.h:23
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:197
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:134
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
auto reverse(ContainerTy &&C)
Definition STLExtras.h:420
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1652
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:337
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2122
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:577
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:238
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.