LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
95 AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewVReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcOpNum = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::ADDSXri:
714 case AArch64::ADDSWri:
715 // if NZCV is used, do not fold.
716 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
717 true) == -1)
718 return 0;
719 // fall-through to ADDXri and ADDWri.
720 [[fallthrough]];
721 case AArch64::ADDXri:
722 case AArch64::ADDWri:
723 // add x, 1 -> csinc.
724 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
725 DefMI->getOperand(3).getImm() != 0)
726 return 0;
727 SrcOpNum = 1;
728 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729 break;
730
731 case AArch64::ORNXrr:
732 case AArch64::ORNWrr: {
733 // not x -> csinv, represented as orn dst, xzr, src.
734 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
735 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736 return 0;
737 SrcOpNum = 2;
738 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739 break;
740 }
741
742 case AArch64::SUBSXrr:
743 case AArch64::SUBSWrr:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to SUBXrr and SUBWrr.
749 [[fallthrough]];
750 case AArch64::SUBXrr:
751 case AArch64::SUBWrr: {
752 // neg x -> csneg, represented as sub dst, xzr, src.
753 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
754 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755 return 0;
756 SrcOpNum = 2;
757 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758 break;
759 }
760 default:
761 return 0;
762 }
763 assert(Opc && SrcOpNum && "Missing parameters");
764
765 if (NewVReg)
766 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
767 return Opc;
768}
769
772 Register DstReg, Register TrueReg,
773 Register FalseReg, int &CondCycles,
774 int &TrueCycles,
775 int &FalseCycles) const {
776 // Check register classes.
778 const TargetRegisterClass *RC =
779 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
780 if (!RC)
781 return false;
782
783 // Also need to check the dest regclass, in case we're trying to optimize
784 // something like:
785 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
786 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
787 return false;
788
789 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
790 unsigned ExtraCondLat = Cond.size() != 1;
791
792 // GPRs are handled by csel.
793 // FIXME: Fold in x+1, -x, and ~x when applicable.
794 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
795 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
796 // Single-cycle csel, csinc, csinv, and csneg.
797 CondCycles = 1 + ExtraCondLat;
798 TrueCycles = FalseCycles = 1;
799 if (canFoldIntoCSel(MRI, TrueReg))
800 TrueCycles = 0;
801 else if (canFoldIntoCSel(MRI, FalseReg))
802 FalseCycles = 0;
803 return true;
804 }
805
806 // Scalar floating point is handled by fcsel.
807 // FIXME: Form fabs, fmin, and fmax when applicable.
808 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
809 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
810 CondCycles = 5 + ExtraCondLat;
811 TrueCycles = FalseCycles = 2;
812 return true;
813 }
814
815 // Can't do vectors.
816 return false;
817}
818
821 const DebugLoc &DL, Register DstReg,
823 Register TrueReg, Register FalseReg) const {
825
826 // Parse the condition code, see parseCondBranch() above.
828 switch (Cond.size()) {
829 default:
830 llvm_unreachable("Unknown condition opcode in Cond");
831 case 1: // b.cc
833 break;
834 case 3: { // cbz/cbnz
835 // We must insert a compare against 0.
836 bool Is64Bit;
837 switch (Cond[1].getImm()) {
838 default:
839 llvm_unreachable("Unknown branch opcode in Cond");
840 case AArch64::CBZW:
841 Is64Bit = false;
842 CC = AArch64CC::EQ;
843 break;
844 case AArch64::CBZX:
845 Is64Bit = true;
846 CC = AArch64CC::EQ;
847 break;
848 case AArch64::CBNZW:
849 Is64Bit = false;
850 CC = AArch64CC::NE;
851 break;
852 case AArch64::CBNZX:
853 Is64Bit = true;
854 CC = AArch64CC::NE;
855 break;
856 }
857 Register SrcReg = Cond[2].getReg();
858 if (Is64Bit) {
859 // cmp reg, #0 is actually subs xzr, reg, #0.
860 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
861 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
862 .addReg(SrcReg)
863 .addImm(0)
864 .addImm(0);
865 } else {
866 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
867 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
868 .addReg(SrcReg)
869 .addImm(0)
870 .addImm(0);
871 }
872 break;
873 }
874 case 4: { // tbz/tbnz
875 // We must insert a tst instruction.
876 switch (Cond[1].getImm()) {
877 default:
878 llvm_unreachable("Unknown branch opcode in Cond");
879 case AArch64::TBZW:
880 case AArch64::TBZX:
881 CC = AArch64CC::EQ;
882 break;
883 case AArch64::TBNZW:
884 case AArch64::TBNZX:
885 CC = AArch64CC::NE;
886 break;
887 }
888 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
889 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
890 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
891 .addReg(Cond[2].getReg())
892 .addImm(
894 else
895 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
896 .addReg(Cond[2].getReg())
897 .addImm(
899 break;
900 }
901 case 5: { // cb
902 // We must insert a cmp, that is a subs
903 // 0 1 2 3 4
904 // Cond is { -1, Opcode, CC, Op0, Op1 }
905 unsigned SUBSOpC, SUBSDestReg;
906 bool IsImm = false;
907 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
908 switch (Cond[1].getImm()) {
909 default:
910 llvm_unreachable("Unknown branch opcode in Cond");
911 case AArch64::CBWPri:
912 SUBSOpC = AArch64::SUBSWri;
913 SUBSDestReg = AArch64::WZR;
914 IsImm = true;
915 break;
916 case AArch64::CBXPri:
917 SUBSOpC = AArch64::SUBSXri;
918 SUBSDestReg = AArch64::XZR;
919 IsImm = true;
920 break;
921 case AArch64::CBWPrr:
922 SUBSOpC = AArch64::SUBSWrr;
923 SUBSDestReg = AArch64::WZR;
924 IsImm = false;
925 break;
926 case AArch64::CBXPrr:
927 SUBSOpC = AArch64::SUBSXrr;
928 SUBSDestReg = AArch64::XZR;
929 IsImm = false;
930 break;
931 }
932
933 if (IsImm)
934 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
935 .addReg(Cond[3].getReg())
936 .addImm(Cond[4].getImm())
937 .addImm(0);
938 else
939 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
940 .addReg(Cond[3].getReg())
941 .addReg(Cond[4].getReg());
942 }
943 }
944
945 unsigned Opc = 0;
946 const TargetRegisterClass *RC = nullptr;
947 bool TryFold = false;
948 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
949 RC = &AArch64::GPR64RegClass;
950 Opc = AArch64::CSELXr;
951 TryFold = true;
952 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
953 RC = &AArch64::GPR32RegClass;
954 Opc = AArch64::CSELWr;
955 TryFold = true;
956 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
957 RC = &AArch64::FPR64RegClass;
958 Opc = AArch64::FCSELDrrr;
959 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
960 RC = &AArch64::FPR32RegClass;
961 Opc = AArch64::FCSELSrrr;
962 }
963 assert(RC && "Unsupported regclass");
964
965 // Try folding simple instructions into the csel.
966 if (TryFold) {
967 unsigned NewVReg = 0;
968 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
969 if (FoldedOpc) {
970 // The folded opcodes csinc, csinc and csneg apply the operation to
971 // FalseReg, so we need to invert the condition.
973 TrueReg = FalseReg;
974 } else
975 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
976
977 // Fold the operation. Leave any dead instructions for DCE to clean up.
978 if (FoldedOpc) {
979 FalseReg = NewVReg;
980 Opc = FoldedOpc;
981 // The extends the live range of NewVReg.
982 MRI.clearKillFlags(NewVReg);
983 }
984 }
985
986 // Pull all virtual register into the appropriate class.
987 MRI.constrainRegClass(TrueReg, RC);
988 MRI.constrainRegClass(FalseReg, RC);
989
990 // Insert the csel.
991 BuildMI(MBB, I, DL, get(Opc), DstReg)
992 .addReg(TrueReg)
993 .addReg(FalseReg)
994 .addImm(CC);
995}
996
997// Return true if Imm can be loaded into a register by a "cheap" sequence of
998// instructions. For now, "cheap" means at most two instructions.
999static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1000 if (BitSize == 32)
1001 return true;
1002
1003 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1004 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1006 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1007
1008 return Is.size() <= 2;
1009}
1010
1011// FIXME: this implementation should be micro-architecture dependent, so a
1012// micro-architecture target hook should be introduced here in future.
1014 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1015 if (isExynosCheapAsMove(MI))
1016 return true;
1017 return MI.isAsCheapAsAMove();
1018 }
1019
1020 switch (MI.getOpcode()) {
1021 default:
1022 return MI.isAsCheapAsAMove();
1023
1024 case AArch64::ADDWrs:
1025 case AArch64::ADDXrs:
1026 case AArch64::SUBWrs:
1027 case AArch64::SUBXrs:
1028 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1029
1030 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1031 // ORRXri, it is as cheap as MOV.
1032 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1033 case AArch64::MOVi32imm:
1034 return isCheapImmediate(MI, 32);
1035 case AArch64::MOVi64imm:
1036 return isCheapImmediate(MI, 64);
1037 }
1038}
1039
1041 switch (MI.getOpcode()) {
1042 default:
1043 return false;
1044
1045 case AArch64::ADDWrs:
1046 case AArch64::ADDXrs:
1047 case AArch64::ADDSWrs:
1048 case AArch64::ADDSXrs: {
1049 unsigned Imm = MI.getOperand(3).getImm();
1050 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1051 if (ShiftVal == 0)
1052 return true;
1053 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1054 }
1055
1056 case AArch64::ADDWrx:
1057 case AArch64::ADDXrx:
1058 case AArch64::ADDXrx64:
1059 case AArch64::ADDSWrx:
1060 case AArch64::ADDSXrx:
1061 case AArch64::ADDSXrx64: {
1062 unsigned Imm = MI.getOperand(3).getImm();
1063 switch (AArch64_AM::getArithExtendType(Imm)) {
1064 default:
1065 return false;
1066 case AArch64_AM::UXTB:
1067 case AArch64_AM::UXTH:
1068 case AArch64_AM::UXTW:
1069 case AArch64_AM::UXTX:
1070 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1071 }
1072 }
1073
1074 case AArch64::SUBWrs:
1075 case AArch64::SUBSWrs: {
1076 unsigned Imm = MI.getOperand(3).getImm();
1077 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1078 return ShiftVal == 0 ||
1079 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1080 }
1081
1082 case AArch64::SUBXrs:
1083 case AArch64::SUBSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 return ShiftVal == 0 ||
1087 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1088 }
1089
1090 case AArch64::SUBWrx:
1091 case AArch64::SUBXrx:
1092 case AArch64::SUBXrx64:
1093 case AArch64::SUBSWrx:
1094 case AArch64::SUBSXrx:
1095 case AArch64::SUBSXrx64: {
1096 unsigned Imm = MI.getOperand(3).getImm();
1097 switch (AArch64_AM::getArithExtendType(Imm)) {
1098 default:
1099 return false;
1100 case AArch64_AM::UXTB:
1101 case AArch64_AM::UXTH:
1102 case AArch64_AM::UXTW:
1103 case AArch64_AM::UXTX:
1104 return AArch64_AM::getArithShiftValue(Imm) == 0;
1105 }
1106 }
1107
1108 case AArch64::LDRBBroW:
1109 case AArch64::LDRBBroX:
1110 case AArch64::LDRBroW:
1111 case AArch64::LDRBroX:
1112 case AArch64::LDRDroW:
1113 case AArch64::LDRDroX:
1114 case AArch64::LDRHHroW:
1115 case AArch64::LDRHHroX:
1116 case AArch64::LDRHroW:
1117 case AArch64::LDRHroX:
1118 case AArch64::LDRQroW:
1119 case AArch64::LDRQroX:
1120 case AArch64::LDRSBWroW:
1121 case AArch64::LDRSBWroX:
1122 case AArch64::LDRSBXroW:
1123 case AArch64::LDRSBXroX:
1124 case AArch64::LDRSHWroW:
1125 case AArch64::LDRSHWroX:
1126 case AArch64::LDRSHXroW:
1127 case AArch64::LDRSHXroX:
1128 case AArch64::LDRSWroW:
1129 case AArch64::LDRSWroX:
1130 case AArch64::LDRSroW:
1131 case AArch64::LDRSroX:
1132 case AArch64::LDRWroW:
1133 case AArch64::LDRWroX:
1134 case AArch64::LDRXroW:
1135 case AArch64::LDRXroX:
1136 case AArch64::PRFMroW:
1137 case AArch64::PRFMroX:
1138 case AArch64::STRBBroW:
1139 case AArch64::STRBBroX:
1140 case AArch64::STRBroW:
1141 case AArch64::STRBroX:
1142 case AArch64::STRDroW:
1143 case AArch64::STRDroX:
1144 case AArch64::STRHHroW:
1145 case AArch64::STRHHroX:
1146 case AArch64::STRHroW:
1147 case AArch64::STRHroX:
1148 case AArch64::STRQroW:
1149 case AArch64::STRQroX:
1150 case AArch64::STRSroW:
1151 case AArch64::STRSroX:
1152 case AArch64::STRWroW:
1153 case AArch64::STRWroX:
1154 case AArch64::STRXroW:
1155 case AArch64::STRXroX: {
1156 unsigned IsSigned = MI.getOperand(3).getImm();
1157 return !IsSigned;
1158 }
1159 }
1160}
1161
1163 unsigned Opc = MI.getOpcode();
1164 switch (Opc) {
1165 default:
1166 return false;
1167 case AArch64::SEH_StackAlloc:
1168 case AArch64::SEH_SaveFPLR:
1169 case AArch64::SEH_SaveFPLR_X:
1170 case AArch64::SEH_SaveReg:
1171 case AArch64::SEH_SaveReg_X:
1172 case AArch64::SEH_SaveRegP:
1173 case AArch64::SEH_SaveRegP_X:
1174 case AArch64::SEH_SaveFReg:
1175 case AArch64::SEH_SaveFReg_X:
1176 case AArch64::SEH_SaveFRegP:
1177 case AArch64::SEH_SaveFRegP_X:
1178 case AArch64::SEH_SetFP:
1179 case AArch64::SEH_AddFP:
1180 case AArch64::SEH_Nop:
1181 case AArch64::SEH_PrologEnd:
1182 case AArch64::SEH_EpilogStart:
1183 case AArch64::SEH_EpilogEnd:
1184 case AArch64::SEH_PACSignLR:
1185 case AArch64::SEH_SaveAnyRegQP:
1186 case AArch64::SEH_SaveAnyRegQPX:
1187 case AArch64::SEH_AllocZ:
1188 case AArch64::SEH_SaveZReg:
1189 case AArch64::SEH_SavePReg:
1190 return true;
1191 }
1192}
1193
1195 Register &SrcReg, Register &DstReg,
1196 unsigned &SubIdx) const {
1197 switch (MI.getOpcode()) {
1198 default:
1199 return false;
1200 case AArch64::SBFMXri: // aka sxtw
1201 case AArch64::UBFMXri: // aka uxtw
1202 // Check for the 32 -> 64 bit extension case, these instructions can do
1203 // much more.
1204 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1205 return false;
1206 // This is a signed or unsigned 32 -> 64 bit extension.
1207 SrcReg = MI.getOperand(1).getReg();
1208 DstReg = MI.getOperand(0).getReg();
1209 SubIdx = AArch64::sub_32;
1210 return true;
1211 }
1212}
1213
1215 const MachineInstr &MIa, const MachineInstr &MIb) const {
1217 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1218 int64_t OffsetA = 0, OffsetB = 0;
1219 TypeSize WidthA(0, false), WidthB(0, false);
1220 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1221
1222 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1223 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1224
1227 return false;
1228
1229 // Retrieve the base, offset from the base and width. Width
1230 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1231 // base are identical, and the offset of a lower memory access +
1232 // the width doesn't overlap the offset of a higher memory access,
1233 // then the memory accesses are different.
1234 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1235 // are assumed to have the same scale (vscale).
1236 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1237 WidthA, TRI) &&
1238 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1239 WidthB, TRI)) {
1240 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1241 OffsetAIsScalable == OffsetBIsScalable) {
1242 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1243 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1244 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1245 if (LowWidth.isScalable() == OffsetAIsScalable &&
1246 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1247 return true;
1248 }
1249 }
1250 return false;
1251}
1252
1254 const MachineBasicBlock *MBB,
1255 const MachineFunction &MF) const {
1257 return true;
1258
1259 // Do not move an instruction that can be recognized as a branch target.
1260 if (hasBTISemantics(MI))
1261 return true;
1262
1263 switch (MI.getOpcode()) {
1264 case AArch64::HINT:
1265 // CSDB hints are scheduling barriers.
1266 if (MI.getOperand(0).getImm() == 0x14)
1267 return true;
1268 break;
1269 case AArch64::DSB:
1270 case AArch64::ISB:
1271 // DSB and ISB also are scheduling barriers.
1272 return true;
1273 case AArch64::MSRpstatesvcrImm1:
1274 // SMSTART and SMSTOP are also scheduling barriers.
1275 return true;
1276 default:;
1277 }
1278 if (isSEHInstruction(MI))
1279 return true;
1280 auto Next = std::next(MI.getIterator());
1281 return Next != MBB->end() && Next->isCFIInstruction();
1282}
1283
1284/// analyzeCompare - For a comparison instruction, return the source registers
1285/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1286/// Return true if the comparison instruction can be analyzed.
1288 Register &SrcReg2, int64_t &CmpMask,
1289 int64_t &CmpValue) const {
1290 // The first operand can be a frame index where we'd normally expect a
1291 // register.
1292 // FIXME: Pass subregisters out of analyzeCompare
1293 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1294 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1295 return false;
1296
1297 switch (MI.getOpcode()) {
1298 default:
1299 break;
1300 case AArch64::PTEST_PP:
1301 case AArch64::PTEST_PP_ANY:
1302 SrcReg = MI.getOperand(0).getReg();
1303 SrcReg2 = MI.getOperand(1).getReg();
1304 if (MI.getOperand(2).getSubReg())
1305 return false;
1306
1307 // Not sure about the mask and value for now...
1308 CmpMask = ~0;
1309 CmpValue = 0;
1310 return true;
1311 case AArch64::SUBSWrr:
1312 case AArch64::SUBSWrs:
1313 case AArch64::SUBSWrx:
1314 case AArch64::SUBSXrr:
1315 case AArch64::SUBSXrs:
1316 case AArch64::SUBSXrx:
1317 case AArch64::ADDSWrr:
1318 case AArch64::ADDSWrs:
1319 case AArch64::ADDSWrx:
1320 case AArch64::ADDSXrr:
1321 case AArch64::ADDSXrs:
1322 case AArch64::ADDSXrx:
1323 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1324 SrcReg = MI.getOperand(1).getReg();
1325 SrcReg2 = MI.getOperand(2).getReg();
1326
1327 // FIXME: Pass subregisters out of analyzeCompare
1328 if (MI.getOperand(2).getSubReg())
1329 return false;
1330
1331 CmpMask = ~0;
1332 CmpValue = 0;
1333 return true;
1334 case AArch64::SUBSWri:
1335 case AArch64::ADDSWri:
1336 case AArch64::SUBSXri:
1337 case AArch64::ADDSXri:
1338 SrcReg = MI.getOperand(1).getReg();
1339 SrcReg2 = 0;
1340 CmpMask = ~0;
1341 CmpValue = MI.getOperand(2).getImm();
1342 return true;
1343 case AArch64::ANDSWri:
1344 case AArch64::ANDSXri:
1345 // ANDS does not use the same encoding scheme as the others xxxS
1346 // instructions.
1347 SrcReg = MI.getOperand(1).getReg();
1348 SrcReg2 = 0;
1349 CmpMask = ~0;
1351 MI.getOperand(2).getImm(),
1352 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1353 return true;
1354 }
1355
1356 return false;
1357}
1358
1360 MachineBasicBlock *MBB = Instr.getParent();
1361 assert(MBB && "Can't get MachineBasicBlock here");
1362 MachineFunction *MF = MBB->getParent();
1363 assert(MF && "Can't get MachineFunction here");
1367
1368 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1369 ++OpIdx) {
1370 MachineOperand &MO = Instr.getOperand(OpIdx);
1371 const TargetRegisterClass *OpRegCstraints =
1372 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1373
1374 // If there's no constraint, there's nothing to do.
1375 if (!OpRegCstraints)
1376 continue;
1377 // If the operand is a frame index, there's nothing to do here.
1378 // A frame index operand will resolve correctly during PEI.
1379 if (MO.isFI())
1380 continue;
1381
1382 assert(MO.isReg() &&
1383 "Operand has register constraints without being a register!");
1384
1385 Register Reg = MO.getReg();
1386 if (Reg.isPhysical()) {
1387 if (!OpRegCstraints->contains(Reg))
1388 return false;
1389 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1390 !MRI->constrainRegClass(Reg, OpRegCstraints))
1391 return false;
1392 }
1393
1394 return true;
1395}
1396
1397/// Return the opcode that does not set flags when possible - otherwise
1398/// return the original opcode. The caller is responsible to do the actual
1399/// substitution and legality checking.
1401 // Don't convert all compare instructions, because for some the zero register
1402 // encoding becomes the sp register.
1403 bool MIDefinesZeroReg = false;
1404 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1405 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1406 MIDefinesZeroReg = true;
1407
1408 switch (MI.getOpcode()) {
1409 default:
1410 return MI.getOpcode();
1411 case AArch64::ADDSWrr:
1412 return AArch64::ADDWrr;
1413 case AArch64::ADDSWri:
1414 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1415 case AArch64::ADDSWrs:
1416 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1417 case AArch64::ADDSWrx:
1418 return AArch64::ADDWrx;
1419 case AArch64::ADDSXrr:
1420 return AArch64::ADDXrr;
1421 case AArch64::ADDSXri:
1422 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1423 case AArch64::ADDSXrs:
1424 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1425 case AArch64::ADDSXrx:
1426 return AArch64::ADDXrx;
1427 case AArch64::SUBSWrr:
1428 return AArch64::SUBWrr;
1429 case AArch64::SUBSWri:
1430 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1431 case AArch64::SUBSWrs:
1432 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1433 case AArch64::SUBSWrx:
1434 return AArch64::SUBWrx;
1435 case AArch64::SUBSXrr:
1436 return AArch64::SUBXrr;
1437 case AArch64::SUBSXri:
1438 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1439 case AArch64::SUBSXrs:
1440 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1441 case AArch64::SUBSXrx:
1442 return AArch64::SUBXrx;
1443 }
1444}
1445
1446enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1447
1448/// True when condition flags are accessed (either by writing or reading)
1449/// on the instruction trace starting at From and ending at To.
1450///
1451/// Note: If From and To are from different blocks it's assumed CC are accessed
1452/// on the path.
1455 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1456 // Early exit if To is at the beginning of the BB.
1457 if (To == To->getParent()->begin())
1458 return true;
1459
1460 // Check whether the instructions are in the same basic block
1461 // If not, assume the condition flags might get modified somewhere.
1462 if (To->getParent() != From->getParent())
1463 return true;
1464
1465 // From must be above To.
1466 assert(std::any_of(
1467 ++To.getReverse(), To->getParent()->rend(),
1468 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1469
1470 // We iterate backward starting at \p To until we hit \p From.
1471 for (const MachineInstr &Instr :
1472 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1473 if (((AccessToCheck & AK_Write) &&
1474 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1475 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1476 return true;
1477 }
1478 return false;
1479}
1480
1481std::optional<unsigned>
1482AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1483 MachineInstr *Pred,
1484 const MachineRegisterInfo *MRI) const {
1485 unsigned MaskOpcode = Mask->getOpcode();
1486 unsigned PredOpcode = Pred->getOpcode();
1487 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1488 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1489
1490 if (PredIsWhileLike) {
1491 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1492 // instruction and the condition is "any" since WHILcc does an implicit
1493 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1494 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1495 return PredOpcode;
1496
1497 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1498 // redundant since WHILE performs an implicit PTEST with an all active
1499 // mask.
1500 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1501 getElementSizeForOpcode(MaskOpcode) ==
1502 getElementSizeForOpcode(PredOpcode))
1503 return PredOpcode;
1504
1505 return {};
1506 }
1507
1508 if (PredIsPTestLike) {
1509 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1510 // instruction that sets the flags as PTEST would and the condition is
1511 // "any" since PG is always a subset of the governing predicate of the
1512 // ptest-like instruction.
1513 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1514 return PredOpcode;
1515
1516 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1517
1518 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1519 // to look through a copy and try again. This is because some instructions
1520 // take a predicate whose register class is a subset of its result class.
1521 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1522 PTestLikeMask->getOperand(1).getReg().isVirtual())
1523 PTestLikeMask =
1524 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1525
1526 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1527 // the element size matches and either the PTEST_LIKE instruction uses
1528 // the same all active mask or the condition is "any".
1529 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1530 getElementSizeForOpcode(MaskOpcode) ==
1531 getElementSizeForOpcode(PredOpcode)) {
1532 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1533 return PredOpcode;
1534 }
1535
1536 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1537 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1538 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1539 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1540 // performed by the compare could consider fewer lanes for these element
1541 // sizes.
1542 //
1543 // For example, consider
1544 //
1545 // ptrue p0.b ; P0=1111-1111-1111-1111
1546 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1547 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1548 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1549 // ; ^ last active
1550 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1551 // ; ^ last active
1552 //
1553 // where the compare generates a canonical all active 32-bit predicate
1554 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1555 // active flag, whereas the PTEST instruction with the same mask doesn't.
1556 // For PTEST_ANY this doesn't apply as the flags in this case would be
1557 // identical regardless of element size.
1558 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1559 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1560 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1561 return PredOpcode;
1562
1563 return {};
1564 }
1565
1566 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1567 // opcode so the PTEST becomes redundant.
1568 switch (PredOpcode) {
1569 case AArch64::AND_PPzPP:
1570 case AArch64::BIC_PPzPP:
1571 case AArch64::EOR_PPzPP:
1572 case AArch64::NAND_PPzPP:
1573 case AArch64::NOR_PPzPP:
1574 case AArch64::ORN_PPzPP:
1575 case AArch64::ORR_PPzPP:
1576 case AArch64::BRKA_PPzP:
1577 case AArch64::BRKPA_PPzPP:
1578 case AArch64::BRKB_PPzP:
1579 case AArch64::BRKPB_PPzPP:
1580 case AArch64::RDFFR_PPz: {
1581 // Check to see if our mask is the same. If not the resulting flag bits
1582 // may be different and we can't remove the ptest.
1583 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1584 if (Mask != PredMask)
1585 return {};
1586 break;
1587 }
1588 case AArch64::BRKN_PPzP: {
1589 // BRKN uses an all active implicit mask to set flags unlike the other
1590 // flag-setting instructions.
1591 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1592 if ((MaskOpcode != AArch64::PTRUE_B) ||
1593 (Mask->getOperand(1).getImm() != 31))
1594 return {};
1595 break;
1596 }
1597 case AArch64::PTRUE_B:
1598 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1599 break;
1600 default:
1601 // Bail out if we don't recognize the input
1602 return {};
1603 }
1604
1605 return convertToFlagSettingOpc(PredOpcode);
1606}
1607
1608/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1609/// operation which could set the flags in an identical manner
1610bool AArch64InstrInfo::optimizePTestInstr(
1611 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1612 const MachineRegisterInfo *MRI) const {
1613 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1614 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1615 unsigned PredOpcode = Pred->getOpcode();
1616 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1617 if (!NewOp)
1618 return false;
1619
1621
1622 // If another instruction between Pred and PTest accesses flags, don't remove
1623 // the ptest or update the earlier instruction to modify them.
1624 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1625 return false;
1626
1627 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1628 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1629 // operand to be replaced with an equivalent instruction that also sets the
1630 // flags.
1631 PTest->eraseFromParent();
1632 if (*NewOp != PredOpcode) {
1633 Pred->setDesc(get(*NewOp));
1634 bool succeeded = UpdateOperandRegClass(*Pred);
1635 (void)succeeded;
1636 assert(succeeded && "Operands have incompatible register classes!");
1637 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1638 }
1639
1640 // Ensure that the flags def is live.
1641 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1642 unsigned i = 0, e = Pred->getNumOperands();
1643 for (; i != e; ++i) {
1644 MachineOperand &MO = Pred->getOperand(i);
1645 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1646 MO.setIsDead(false);
1647 break;
1648 }
1649 }
1650 }
1651 return true;
1652}
1653
1654/// Try to optimize a compare instruction. A compare instruction is an
1655/// instruction which produces AArch64::NZCV. It can be truly compare
1656/// instruction
1657/// when there are no uses of its destination register.
1658///
1659/// The following steps are tried in order:
1660/// 1. Convert CmpInstr into an unconditional version.
1661/// 2. Remove CmpInstr if above there is an instruction producing a needed
1662/// condition code or an instruction which can be converted into such an
1663/// instruction.
1664/// Only comparison with zero is supported.
1666 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1667 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1668 assert(CmpInstr.getParent());
1669 assert(MRI);
1670
1671 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1672 int DeadNZCVIdx =
1673 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1674 if (DeadNZCVIdx != -1) {
1675 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1676 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1677 CmpInstr.eraseFromParent();
1678 return true;
1679 }
1680 unsigned Opc = CmpInstr.getOpcode();
1681 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1682 if (NewOpc == Opc)
1683 return false;
1684 const MCInstrDesc &MCID = get(NewOpc);
1685 CmpInstr.setDesc(MCID);
1686 CmpInstr.removeOperand(DeadNZCVIdx);
1687 bool succeeded = UpdateOperandRegClass(CmpInstr);
1688 (void)succeeded;
1689 assert(succeeded && "Some operands reg class are incompatible!");
1690 return true;
1691 }
1692
1693 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1694 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1695 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1696
1697 if (SrcReg2 != 0)
1698 return false;
1699
1700 // CmpInstr is a Compare instruction if destination register is not used.
1701 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1702 return false;
1703
1704 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1705 return true;
1706 return (CmpValue == 0 || CmpValue == 1) &&
1707 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1708}
1709
1710/// Get opcode of S version of Instr.
1711/// If Instr is S version its opcode is returned.
1712/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1713/// or we are not interested in it.
1714static unsigned sForm(MachineInstr &Instr) {
1715 switch (Instr.getOpcode()) {
1716 default:
1717 return AArch64::INSTRUCTION_LIST_END;
1718
1719 case AArch64::ADDSWrr:
1720 case AArch64::ADDSWri:
1721 case AArch64::ADDSXrr:
1722 case AArch64::ADDSXri:
1723 case AArch64::SUBSWrr:
1724 case AArch64::SUBSWri:
1725 case AArch64::SUBSXrr:
1726 case AArch64::SUBSXri:
1727 return Instr.getOpcode();
1728
1729 case AArch64::ADDWrr:
1730 return AArch64::ADDSWrr;
1731 case AArch64::ADDWri:
1732 return AArch64::ADDSWri;
1733 case AArch64::ADDXrr:
1734 return AArch64::ADDSXrr;
1735 case AArch64::ADDXri:
1736 return AArch64::ADDSXri;
1737 case AArch64::ADCWr:
1738 return AArch64::ADCSWr;
1739 case AArch64::ADCXr:
1740 return AArch64::ADCSXr;
1741 case AArch64::SUBWrr:
1742 return AArch64::SUBSWrr;
1743 case AArch64::SUBWri:
1744 return AArch64::SUBSWri;
1745 case AArch64::SUBXrr:
1746 return AArch64::SUBSXrr;
1747 case AArch64::SUBXri:
1748 return AArch64::SUBSXri;
1749 case AArch64::SBCWr:
1750 return AArch64::SBCSWr;
1751 case AArch64::SBCXr:
1752 return AArch64::SBCSXr;
1753 case AArch64::ANDWri:
1754 return AArch64::ANDSWri;
1755 case AArch64::ANDXri:
1756 return AArch64::ANDSXri;
1757 }
1758}
1759
1760/// Check if AArch64::NZCV should be alive in successors of MBB.
1762 for (auto *BB : MBB->successors())
1763 if (BB->isLiveIn(AArch64::NZCV))
1764 return true;
1765 return false;
1766}
1767
1768/// \returns The condition code operand index for \p Instr if it is a branch
1769/// or select and -1 otherwise.
1770static int
1772 switch (Instr.getOpcode()) {
1773 default:
1774 return -1;
1775
1776 case AArch64::Bcc: {
1777 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1778 assert(Idx >= 2);
1779 return Idx - 2;
1780 }
1781
1782 case AArch64::CSINVWr:
1783 case AArch64::CSINVXr:
1784 case AArch64::CSINCWr:
1785 case AArch64::CSINCXr:
1786 case AArch64::CSELWr:
1787 case AArch64::CSELXr:
1788 case AArch64::CSNEGWr:
1789 case AArch64::CSNEGXr:
1790 case AArch64::FCSELSrrr:
1791 case AArch64::FCSELDrrr: {
1792 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1793 assert(Idx >= 1);
1794 return Idx - 1;
1795 }
1796 }
1797}
1798
1799/// Find a condition code used by the instruction.
1800/// Returns AArch64CC::Invalid if either the instruction does not use condition
1801/// codes or we don't optimize CmpInstr in the presence of such instructions.
1804 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1805 Instr.getOperand(CCIdx).getImm())
1807}
1808
1811 UsedNZCV UsedFlags;
1812 switch (CC) {
1813 default:
1814 break;
1815
1816 case AArch64CC::EQ: // Z set
1817 case AArch64CC::NE: // Z clear
1818 UsedFlags.Z = true;
1819 break;
1820
1821 case AArch64CC::HI: // Z clear and C set
1822 case AArch64CC::LS: // Z set or C clear
1823 UsedFlags.Z = true;
1824 [[fallthrough]];
1825 case AArch64CC::HS: // C set
1826 case AArch64CC::LO: // C clear
1827 UsedFlags.C = true;
1828 break;
1829
1830 case AArch64CC::MI: // N set
1831 case AArch64CC::PL: // N clear
1832 UsedFlags.N = true;
1833 break;
1834
1835 case AArch64CC::VS: // V set
1836 case AArch64CC::VC: // V clear
1837 UsedFlags.V = true;
1838 break;
1839
1840 case AArch64CC::GT: // Z clear, N and V the same
1841 case AArch64CC::LE: // Z set, N and V differ
1842 UsedFlags.Z = true;
1843 [[fallthrough]];
1844 case AArch64CC::GE: // N and V the same
1845 case AArch64CC::LT: // N and V differ
1846 UsedFlags.N = true;
1847 UsedFlags.V = true;
1848 break;
1849 }
1850 return UsedFlags;
1851}
1852
1853/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1854/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1855/// \returns std::nullopt otherwise.
1856///
1857/// Collect instructions using that flags in \p CCUseInstrs if provided.
1858std::optional<UsedNZCV>
1860 const TargetRegisterInfo &TRI,
1861 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1862 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1863 if (MI.getParent() != CmpParent)
1864 return std::nullopt;
1865
1866 if (areCFlagsAliveInSuccessors(CmpParent))
1867 return std::nullopt;
1868
1869 UsedNZCV NZCVUsedAfterCmp;
1871 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1872 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1874 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1875 return std::nullopt;
1876 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1877 if (CCUseInstrs)
1878 CCUseInstrs->push_back(&Instr);
1879 }
1880 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1881 break;
1882 }
1883 return NZCVUsedAfterCmp;
1884}
1885
1886static bool isADDSRegImm(unsigned Opcode) {
1887 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1888}
1889
1890static bool isSUBSRegImm(unsigned Opcode) {
1891 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1892}
1893
1894/// Check if CmpInstr can be substituted by MI.
1895///
1896/// CmpInstr can be substituted:
1897/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1898/// - and, MI and CmpInstr are from the same MachineBB
1899/// - and, condition flags are not alive in successors of the CmpInstr parent
1900/// - and, if MI opcode is the S form there must be no defs of flags between
1901/// MI and CmpInstr
1902/// or if MI opcode is not the S form there must be neither defs of flags
1903/// nor uses of flags between MI and CmpInstr.
1904/// - and, if C/V flags are not used after CmpInstr
1905/// or if N flag is used but MI produces poison value if signed overflow
1906/// occurs.
1908 const TargetRegisterInfo &TRI) {
1909 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1910 // that may or may not set flags.
1911 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1912
1913 const unsigned CmpOpcode = CmpInstr.getOpcode();
1914 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1915 return false;
1916
1917 assert((CmpInstr.getOperand(2).isImm() &&
1918 CmpInstr.getOperand(2).getImm() == 0) &&
1919 "Caller guarantees that CmpInstr compares with constant 0");
1920
1921 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1922 if (!NZVCUsed || NZVCUsed->C)
1923 return false;
1924
1925 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1926 // '%vreg = add ...' or '%vreg = sub ...'.
1927 // Condition flag V is used to indicate signed overflow.
1928 // 1) MI and CmpInstr set N and V to the same value.
1929 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1930 // signed overflow occurs, so CmpInstr could still be simplified away.
1931 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1932 return false;
1933
1934 AccessKind AccessToCheck = AK_Write;
1935 if (sForm(MI) != MI.getOpcode())
1936 AccessToCheck = AK_All;
1937 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1938}
1939
1940/// Substitute an instruction comparing to zero with another instruction
1941/// which produces needed condition flags.
1942///
1943/// Return true on success.
1944bool AArch64InstrInfo::substituteCmpToZero(
1945 MachineInstr &CmpInstr, unsigned SrcReg,
1946 const MachineRegisterInfo &MRI) const {
1947 // Get the unique definition of SrcReg.
1948 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1949 if (!MI)
1950 return false;
1951
1953
1954 unsigned NewOpc = sForm(*MI);
1955 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1956 return false;
1957
1958 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1959 return false;
1960
1961 // Update the instruction to set NZCV.
1962 MI->setDesc(get(NewOpc));
1963 CmpInstr.eraseFromParent();
1965 (void)succeeded;
1966 assert(succeeded && "Some operands reg class are incompatible!");
1967 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1968 return true;
1969}
1970
1971/// \returns True if \p CmpInstr can be removed.
1972///
1973/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1974/// codes used in \p CCUseInstrs must be inverted.
1976 int CmpValue, const TargetRegisterInfo &TRI,
1978 bool &IsInvertCC) {
1979 assert((CmpValue == 0 || CmpValue == 1) &&
1980 "Only comparisons to 0 or 1 considered for removal!");
1981
1982 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1983 unsigned MIOpc = MI.getOpcode();
1984 if (MIOpc == AArch64::CSINCWr) {
1985 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1986 MI.getOperand(2).getReg() != AArch64::WZR)
1987 return false;
1988 } else if (MIOpc == AArch64::CSINCXr) {
1989 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1990 MI.getOperand(2).getReg() != AArch64::XZR)
1991 return false;
1992 } else {
1993 return false;
1994 }
1996 if (MICC == AArch64CC::Invalid)
1997 return false;
1998
1999 // NZCV needs to be defined
2000 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2001 return false;
2002
2003 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2004 const unsigned CmpOpcode = CmpInstr.getOpcode();
2005 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2006 if (CmpValue && !IsSubsRegImm)
2007 return false;
2008 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2009 return false;
2010
2011 // MI conditions allowed: eq, ne, mi, pl
2012 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2013 if (MIUsedNZCV.C || MIUsedNZCV.V)
2014 return false;
2015
2016 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2017 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2018 // Condition flags are not used in CmpInstr basic block successors and only
2019 // Z or N flags allowed to be used after CmpInstr within its basic block
2020 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2021 return false;
2022 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2023 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2024 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2025 return false;
2026 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2027 if (MIUsedNZCV.N && !CmpValue)
2028 return false;
2029
2030 // There must be no defs of flags between MI and CmpInstr
2031 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2032 return false;
2033
2034 // Condition code is inverted in the following cases:
2035 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2036 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2037 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2038 (!CmpValue && MICC == AArch64CC::NE);
2039 return true;
2040}
2041
2042/// Remove comparison in csinc-cmp sequence
2043///
2044/// Examples:
2045/// 1. \code
2046/// csinc w9, wzr, wzr, ne
2047/// cmp w9, #0
2048/// b.eq
2049/// \endcode
2050/// to
2051/// \code
2052/// csinc w9, wzr, wzr, ne
2053/// b.ne
2054/// \endcode
2055///
2056/// 2. \code
2057/// csinc x2, xzr, xzr, mi
2058/// cmp x2, #1
2059/// b.pl
2060/// \endcode
2061/// to
2062/// \code
2063/// csinc x2, xzr, xzr, mi
2064/// b.pl
2065/// \endcode
2066///
2067/// \param CmpInstr comparison instruction
2068/// \return True when comparison removed
2069bool AArch64InstrInfo::removeCmpToZeroOrOne(
2070 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2071 const MachineRegisterInfo &MRI) const {
2072 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2073 if (!MI)
2074 return false;
2077 bool IsInvertCC = false;
2078 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2079 IsInvertCC))
2080 return false;
2081 // Make transformation
2082 CmpInstr.eraseFromParent();
2083 if (IsInvertCC) {
2084 // Invert condition codes in CmpInstr CC users
2085 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2087 assert(Idx >= 0 && "Unexpected instruction using CC.");
2088 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2090 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2091 CCOperand.setImm(CCUse);
2092 }
2093 }
2094 return true;
2095}
2096
2098 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2099 MI.getOpcode() != AArch64::CATCHRET)
2100 return false;
2101
2102 MachineBasicBlock &MBB = *MI.getParent();
2103 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2104 auto TRI = Subtarget.getRegisterInfo();
2105 DebugLoc DL = MI.getDebugLoc();
2106
2107 if (MI.getOpcode() == AArch64::CATCHRET) {
2108 // Skip to the first instruction before the epilog.
2109 const TargetInstrInfo *TII =
2111 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2113 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2114 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2115 FirstEpilogSEH != MBB.begin())
2116 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2117 if (FirstEpilogSEH != MBB.begin())
2118 FirstEpilogSEH = std::next(FirstEpilogSEH);
2119 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2120 .addReg(AArch64::X0, RegState::Define)
2121 .addMBB(TargetMBB);
2122 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2123 .addReg(AArch64::X0, RegState::Define)
2124 .addReg(AArch64::X0)
2125 .addMBB(TargetMBB)
2126 .addImm(0);
2127 TargetMBB->setMachineBlockAddressTaken();
2128 return true;
2129 }
2130
2131 Register Reg = MI.getOperand(0).getReg();
2133 if (M.getStackProtectorGuard() == "sysreg") {
2134 const AArch64SysReg::SysReg *SrcReg =
2135 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2136 if (!SrcReg)
2137 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2138
2139 // mrs xN, sysreg
2140 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2142 .addImm(SrcReg->Encoding);
2143 int Offset = M.getStackProtectorGuardOffset();
2144 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2145 // ldr xN, [xN, #offset]
2146 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2147 .addDef(Reg)
2148 .addUse(Reg, RegState::Kill)
2149 .addImm(Offset / 8);
2150 } else if (Offset >= -256 && Offset <= 255) {
2151 // ldur xN, [xN, #offset]
2152 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2153 .addDef(Reg)
2154 .addUse(Reg, RegState::Kill)
2155 .addImm(Offset);
2156 } else if (Offset >= -4095 && Offset <= 4095) {
2157 if (Offset > 0) {
2158 // add xN, xN, #offset
2159 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2160 .addDef(Reg)
2161 .addUse(Reg, RegState::Kill)
2162 .addImm(Offset)
2163 .addImm(0);
2164 } else {
2165 // sub xN, xN, #offset
2166 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2167 .addDef(Reg)
2168 .addUse(Reg, RegState::Kill)
2169 .addImm(-Offset)
2170 .addImm(0);
2171 }
2172 // ldr xN, [xN]
2173 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2174 .addDef(Reg)
2175 .addUse(Reg, RegState::Kill)
2176 .addImm(0);
2177 } else {
2178 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2179 // than 23760.
2180 // It might be nice to use AArch64::MOVi32imm here, which would get
2181 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2182 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2183 // AArch64FrameLowering might help us find such a scratch register
2184 // though. If we failed to find a scratch register, we could emit a
2185 // stream of add instructions to build up the immediate. Or, we could try
2186 // to insert a AArch64::MOVi32imm before register allocation so that we
2187 // didn't need to scavenge for a scratch register.
2188 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2189 }
2190 MBB.erase(MI);
2191 return true;
2192 }
2193
2194 const GlobalValue *GV =
2195 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2196 const TargetMachine &TM = MBB.getParent()->getTarget();
2197 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2198 const unsigned char MO_NC = AArch64II::MO_NC;
2199
2200 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2201 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2202 .addGlobalAddress(GV, 0, OpFlags);
2203 if (Subtarget.isTargetILP32()) {
2204 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2205 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2206 .addDef(Reg32, RegState::Dead)
2207 .addUse(Reg, RegState::Kill)
2208 .addImm(0)
2209 .addMemOperand(*MI.memoperands_begin())
2211 } else {
2212 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2213 .addReg(Reg, RegState::Kill)
2214 .addImm(0)
2215 .addMemOperand(*MI.memoperands_begin());
2216 }
2217 } else if (TM.getCodeModel() == CodeModel::Large) {
2218 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2219 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2220 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2221 .addImm(0);
2222 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2223 .addReg(Reg, RegState::Kill)
2224 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2225 .addImm(16);
2226 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2227 .addReg(Reg, RegState::Kill)
2228 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2229 .addImm(32);
2230 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2231 .addReg(Reg, RegState::Kill)
2233 .addImm(48);
2234 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2235 .addReg(Reg, RegState::Kill)
2236 .addImm(0)
2237 .addMemOperand(*MI.memoperands_begin());
2238 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2239 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2240 .addGlobalAddress(GV, 0, OpFlags);
2241 } else {
2242 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2243 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2244 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2245 if (Subtarget.isTargetILP32()) {
2246 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2247 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2248 .addDef(Reg32, RegState::Dead)
2249 .addUse(Reg, RegState::Kill)
2250 .addGlobalAddress(GV, 0, LoFlags)
2251 .addMemOperand(*MI.memoperands_begin())
2253 } else {
2254 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2255 .addReg(Reg, RegState::Kill)
2256 .addGlobalAddress(GV, 0, LoFlags)
2257 .addMemOperand(*MI.memoperands_begin());
2258 }
2259 }
2260
2261 MBB.erase(MI);
2262
2263 return true;
2264}
2265
2266// Return true if this instruction simply sets its single destination register
2267// to zero. This is equivalent to a register rename of the zero-register.
2269 switch (MI.getOpcode()) {
2270 default:
2271 break;
2272 case AArch64::MOVZWi:
2273 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2274 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2275 assert(MI.getDesc().getNumOperands() == 3 &&
2276 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2277 return true;
2278 }
2279 break;
2280 case AArch64::ANDWri: // and Rd, Rzr, #imm
2281 return MI.getOperand(1).getReg() == AArch64::WZR;
2282 case AArch64::ANDXri:
2283 return MI.getOperand(1).getReg() == AArch64::XZR;
2284 case TargetOpcode::COPY:
2285 return MI.getOperand(1).getReg() == AArch64::WZR;
2286 }
2287 return false;
2288}
2289
2290// Return true if this instruction simply renames a general register without
2291// modifying bits.
2293 switch (MI.getOpcode()) {
2294 default:
2295 break;
2296 case TargetOpcode::COPY: {
2297 // GPR32 copies will by lowered to ORRXrs
2298 Register DstReg = MI.getOperand(0).getReg();
2299 return (AArch64::GPR32RegClass.contains(DstReg) ||
2300 AArch64::GPR64RegClass.contains(DstReg));
2301 }
2302 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2303 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2304 assert(MI.getDesc().getNumOperands() == 4 &&
2305 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2306 return true;
2307 }
2308 break;
2309 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2310 if (MI.getOperand(2).getImm() == 0) {
2311 assert(MI.getDesc().getNumOperands() == 4 &&
2312 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2313 return true;
2314 }
2315 break;
2316 }
2317 return false;
2318}
2319
2320// Return true if this instruction simply renames a general register without
2321// modifying bits.
2323 switch (MI.getOpcode()) {
2324 default:
2325 break;
2326 case TargetOpcode::COPY: {
2327 Register DstReg = MI.getOperand(0).getReg();
2328 return AArch64::FPR128RegClass.contains(DstReg);
2329 }
2330 case AArch64::ORRv16i8:
2331 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2332 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2333 "invalid ORRv16i8 operands");
2334 return true;
2335 }
2336 break;
2337 }
2338 return false;
2339}
2340
2342 int &FrameIndex) const {
2343 switch (MI.getOpcode()) {
2344 default:
2345 break;
2346 case AArch64::LDRWui:
2347 case AArch64::LDRXui:
2348 case AArch64::LDRBui:
2349 case AArch64::LDRHui:
2350 case AArch64::LDRSui:
2351 case AArch64::LDRDui:
2352 case AArch64::LDRQui:
2353 case AArch64::LDR_PXI:
2354 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2355 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2356 FrameIndex = MI.getOperand(1).getIndex();
2357 return MI.getOperand(0).getReg();
2358 }
2359 break;
2360 }
2361
2362 return 0;
2363}
2364
2366 int &FrameIndex) const {
2367 switch (MI.getOpcode()) {
2368 default:
2369 break;
2370 case AArch64::STRWui:
2371 case AArch64::STRXui:
2372 case AArch64::STRBui:
2373 case AArch64::STRHui:
2374 case AArch64::STRSui:
2375 case AArch64::STRDui:
2376 case AArch64::STRQui:
2377 case AArch64::STR_PXI:
2378 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2379 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2380 FrameIndex = MI.getOperand(1).getIndex();
2381 return MI.getOperand(0).getReg();
2382 }
2383 break;
2384 }
2385 return 0;
2386}
2387
2388/// Check all MachineMemOperands for a hint to suppress pairing.
2390 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2391 return MMO->getFlags() & MOSuppressPair;
2392 });
2393}
2394
2395/// Set a flag on the first MachineMemOperand to suppress pairing.
2397 if (MI.memoperands_empty())
2398 return;
2399 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2400}
2401
2402/// Check all MachineMemOperands for a hint that the load/store is strided.
2404 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2405 return MMO->getFlags() & MOStridedAccess;
2406 });
2407}
2408
2410 switch (Opc) {
2411 default:
2412 return false;
2413 case AArch64::STURSi:
2414 case AArch64::STRSpre:
2415 case AArch64::STURDi:
2416 case AArch64::STRDpre:
2417 case AArch64::STURQi:
2418 case AArch64::STRQpre:
2419 case AArch64::STURBBi:
2420 case AArch64::STURHHi:
2421 case AArch64::STURWi:
2422 case AArch64::STRWpre:
2423 case AArch64::STURXi:
2424 case AArch64::STRXpre:
2425 case AArch64::LDURSi:
2426 case AArch64::LDRSpre:
2427 case AArch64::LDURDi:
2428 case AArch64::LDRDpre:
2429 case AArch64::LDURQi:
2430 case AArch64::LDRQpre:
2431 case AArch64::LDURWi:
2432 case AArch64::LDRWpre:
2433 case AArch64::LDURXi:
2434 case AArch64::LDRXpre:
2435 case AArch64::LDRSWpre:
2436 case AArch64::LDURSWi:
2437 case AArch64::LDURHHi:
2438 case AArch64::LDURBBi:
2439 case AArch64::LDURSBWi:
2440 case AArch64::LDURSHWi:
2441 return true;
2442 }
2443}
2444
2445std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2446 switch (Opc) {
2447 default: return {};
2448 case AArch64::PRFMui: return AArch64::PRFUMi;
2449 case AArch64::LDRXui: return AArch64::LDURXi;
2450 case AArch64::LDRWui: return AArch64::LDURWi;
2451 case AArch64::LDRBui: return AArch64::LDURBi;
2452 case AArch64::LDRHui: return AArch64::LDURHi;
2453 case AArch64::LDRSui: return AArch64::LDURSi;
2454 case AArch64::LDRDui: return AArch64::LDURDi;
2455 case AArch64::LDRQui: return AArch64::LDURQi;
2456 case AArch64::LDRBBui: return AArch64::LDURBBi;
2457 case AArch64::LDRHHui: return AArch64::LDURHHi;
2458 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2459 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2460 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2461 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2462 case AArch64::LDRSWui: return AArch64::LDURSWi;
2463 case AArch64::STRXui: return AArch64::STURXi;
2464 case AArch64::STRWui: return AArch64::STURWi;
2465 case AArch64::STRBui: return AArch64::STURBi;
2466 case AArch64::STRHui: return AArch64::STURHi;
2467 case AArch64::STRSui: return AArch64::STURSi;
2468 case AArch64::STRDui: return AArch64::STURDi;
2469 case AArch64::STRQui: return AArch64::STURQi;
2470 case AArch64::STRBBui: return AArch64::STURBBi;
2471 case AArch64::STRHHui: return AArch64::STURHHi;
2472 }
2473}
2474
2476 switch (Opc) {
2477 default:
2478 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2479 case AArch64::ADDG:
2480 case AArch64::LDAPURBi:
2481 case AArch64::LDAPURHi:
2482 case AArch64::LDAPURi:
2483 case AArch64::LDAPURSBWi:
2484 case AArch64::LDAPURSBXi:
2485 case AArch64::LDAPURSHWi:
2486 case AArch64::LDAPURSHXi:
2487 case AArch64::LDAPURSWi:
2488 case AArch64::LDAPURXi:
2489 case AArch64::LDR_PPXI:
2490 case AArch64::LDR_PXI:
2491 case AArch64::LDR_ZXI:
2492 case AArch64::LDR_ZZXI:
2493 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2494 case AArch64::LDR_ZZZXI:
2495 case AArch64::LDR_ZZZZXI:
2496 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2497 case AArch64::LDRBBui:
2498 case AArch64::LDRBui:
2499 case AArch64::LDRDui:
2500 case AArch64::LDRHHui:
2501 case AArch64::LDRHui:
2502 case AArch64::LDRQui:
2503 case AArch64::LDRSBWui:
2504 case AArch64::LDRSBXui:
2505 case AArch64::LDRSHWui:
2506 case AArch64::LDRSHXui:
2507 case AArch64::LDRSui:
2508 case AArch64::LDRSWui:
2509 case AArch64::LDRWui:
2510 case AArch64::LDRXui:
2511 case AArch64::LDURBBi:
2512 case AArch64::LDURBi:
2513 case AArch64::LDURDi:
2514 case AArch64::LDURHHi:
2515 case AArch64::LDURHi:
2516 case AArch64::LDURQi:
2517 case AArch64::LDURSBWi:
2518 case AArch64::LDURSBXi:
2519 case AArch64::LDURSHWi:
2520 case AArch64::LDURSHXi:
2521 case AArch64::LDURSi:
2522 case AArch64::LDURSWi:
2523 case AArch64::LDURWi:
2524 case AArch64::LDURXi:
2525 case AArch64::PRFMui:
2526 case AArch64::PRFUMi:
2527 case AArch64::ST2Gi:
2528 case AArch64::STGi:
2529 case AArch64::STLURBi:
2530 case AArch64::STLURHi:
2531 case AArch64::STLURWi:
2532 case AArch64::STLURXi:
2533 case AArch64::StoreSwiftAsyncContext:
2534 case AArch64::STR_PPXI:
2535 case AArch64::STR_PXI:
2536 case AArch64::STR_ZXI:
2537 case AArch64::STR_ZZXI:
2538 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2539 case AArch64::STR_ZZZXI:
2540 case AArch64::STR_ZZZZXI:
2541 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2542 case AArch64::STRBBui:
2543 case AArch64::STRBui:
2544 case AArch64::STRDui:
2545 case AArch64::STRHHui:
2546 case AArch64::STRHui:
2547 case AArch64::STRQui:
2548 case AArch64::STRSui:
2549 case AArch64::STRWui:
2550 case AArch64::STRXui:
2551 case AArch64::STURBBi:
2552 case AArch64::STURBi:
2553 case AArch64::STURDi:
2554 case AArch64::STURHHi:
2555 case AArch64::STURHi:
2556 case AArch64::STURQi:
2557 case AArch64::STURSi:
2558 case AArch64::STURWi:
2559 case AArch64::STURXi:
2560 case AArch64::STZ2Gi:
2561 case AArch64::STZGi:
2562 case AArch64::TAGPstack:
2563 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2564 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
2565 return 2;
2566 case AArch64::LD1B_D_IMM:
2567 case AArch64::LD1B_H_IMM:
2568 case AArch64::LD1B_IMM:
2569 case AArch64::LD1B_S_IMM:
2570 case AArch64::LD1D_IMM:
2571 case AArch64::LD1H_D_IMM:
2572 case AArch64::LD1H_IMM:
2573 case AArch64::LD1H_S_IMM:
2574 case AArch64::LD1RB_D_IMM:
2575 case AArch64::LD1RB_H_IMM:
2576 case AArch64::LD1RB_IMM:
2577 case AArch64::LD1RB_S_IMM:
2578 case AArch64::LD1RD_IMM:
2579 case AArch64::LD1RH_D_IMM:
2580 case AArch64::LD1RH_IMM:
2581 case AArch64::LD1RH_S_IMM:
2582 case AArch64::LD1RSB_D_IMM:
2583 case AArch64::LD1RSB_H_IMM:
2584 case AArch64::LD1RSB_S_IMM:
2585 case AArch64::LD1RSH_D_IMM:
2586 case AArch64::LD1RSH_S_IMM:
2587 case AArch64::LD1RSW_IMM:
2588 case AArch64::LD1RW_D_IMM:
2589 case AArch64::LD1RW_IMM:
2590 case AArch64::LD1SB_D_IMM:
2591 case AArch64::LD1SB_H_IMM:
2592 case AArch64::LD1SB_S_IMM:
2593 case AArch64::LD1SH_D_IMM:
2594 case AArch64::LD1SH_S_IMM:
2595 case AArch64::LD1SW_D_IMM:
2596 case AArch64::LD1W_D_IMM:
2597 case AArch64::LD1W_IMM:
2598 case AArch64::LD2B_IMM:
2599 case AArch64::LD2D_IMM:
2600 case AArch64::LD2H_IMM:
2601 case AArch64::LD2W_IMM:
2602 case AArch64::LD3B_IMM:
2603 case AArch64::LD3D_IMM:
2604 case AArch64::LD3H_IMM:
2605 case AArch64::LD3W_IMM:
2606 case AArch64::LD4B_IMM:
2607 case AArch64::LD4D_IMM:
2608 case AArch64::LD4H_IMM:
2609 case AArch64::LD4W_IMM:
2610 case AArch64::LDG:
2611 case AArch64::LDNF1B_D_IMM:
2612 case AArch64::LDNF1B_H_IMM:
2613 case AArch64::LDNF1B_IMM:
2614 case AArch64::LDNF1B_S_IMM:
2615 case AArch64::LDNF1D_IMM:
2616 case AArch64::LDNF1H_D_IMM:
2617 case AArch64::LDNF1H_IMM:
2618 case AArch64::LDNF1H_S_IMM:
2619 case AArch64::LDNF1SB_D_IMM:
2620 case AArch64::LDNF1SB_H_IMM:
2621 case AArch64::LDNF1SB_S_IMM:
2622 case AArch64::LDNF1SH_D_IMM:
2623 case AArch64::LDNF1SH_S_IMM:
2624 case AArch64::LDNF1SW_D_IMM:
2625 case AArch64::LDNF1W_D_IMM:
2626 case AArch64::LDNF1W_IMM:
2627 case AArch64::LDNPDi:
2628 case AArch64::LDNPQi:
2629 case AArch64::LDNPSi:
2630 case AArch64::LDNPWi:
2631 case AArch64::LDNPXi:
2632 case AArch64::LDNT1B_ZRI:
2633 case AArch64::LDNT1D_ZRI:
2634 case AArch64::LDNT1H_ZRI:
2635 case AArch64::LDNT1W_ZRI:
2636 case AArch64::LDPDi:
2637 case AArch64::LDPQi:
2638 case AArch64::LDPSi:
2639 case AArch64::LDPWi:
2640 case AArch64::LDPXi:
2641 case AArch64::LDRBBpost:
2642 case AArch64::LDRBBpre:
2643 case AArch64::LDRBpost:
2644 case AArch64::LDRBpre:
2645 case AArch64::LDRDpost:
2646 case AArch64::LDRDpre:
2647 case AArch64::LDRHHpost:
2648 case AArch64::LDRHHpre:
2649 case AArch64::LDRHpost:
2650 case AArch64::LDRHpre:
2651 case AArch64::LDRQpost:
2652 case AArch64::LDRQpre:
2653 case AArch64::LDRSpost:
2654 case AArch64::LDRSpre:
2655 case AArch64::LDRWpost:
2656 case AArch64::LDRWpre:
2657 case AArch64::LDRXpost:
2658 case AArch64::LDRXpre:
2659 case AArch64::ST1B_D_IMM:
2660 case AArch64::ST1B_H_IMM:
2661 case AArch64::ST1B_IMM:
2662 case AArch64::ST1B_S_IMM:
2663 case AArch64::ST1D_IMM:
2664 case AArch64::ST1H_D_IMM:
2665 case AArch64::ST1H_IMM:
2666 case AArch64::ST1H_S_IMM:
2667 case AArch64::ST1W_D_IMM:
2668 case AArch64::ST1W_IMM:
2669 case AArch64::ST2B_IMM:
2670 case AArch64::ST2D_IMM:
2671 case AArch64::ST2H_IMM:
2672 case AArch64::ST2W_IMM:
2673 case AArch64::ST3B_IMM:
2674 case AArch64::ST3D_IMM:
2675 case AArch64::ST3H_IMM:
2676 case AArch64::ST3W_IMM:
2677 case AArch64::ST4B_IMM:
2678 case AArch64::ST4D_IMM:
2679 case AArch64::ST4H_IMM:
2680 case AArch64::ST4W_IMM:
2681 case AArch64::STGPi:
2682 case AArch64::STGPreIndex:
2683 case AArch64::STZGPreIndex:
2684 case AArch64::ST2GPreIndex:
2685 case AArch64::STZ2GPreIndex:
2686 case AArch64::STGPostIndex:
2687 case AArch64::STZGPostIndex:
2688 case AArch64::ST2GPostIndex:
2689 case AArch64::STZ2GPostIndex:
2690 case AArch64::STNPDi:
2691 case AArch64::STNPQi:
2692 case AArch64::STNPSi:
2693 case AArch64::STNPWi:
2694 case AArch64::STNPXi:
2695 case AArch64::STNT1B_ZRI:
2696 case AArch64::STNT1D_ZRI:
2697 case AArch64::STNT1H_ZRI:
2698 case AArch64::STNT1W_ZRI:
2699 case AArch64::STPDi:
2700 case AArch64::STPQi:
2701 case AArch64::STPSi:
2702 case AArch64::STPWi:
2703 case AArch64::STPXi:
2704 case AArch64::STRBBpost:
2705 case AArch64::STRBBpre:
2706 case AArch64::STRBpost:
2707 case AArch64::STRBpre:
2708 case AArch64::STRDpost:
2709 case AArch64::STRDpre:
2710 case AArch64::STRHHpost:
2711 case AArch64::STRHHpre:
2712 case AArch64::STRHpost:
2713 case AArch64::STRHpre:
2714 case AArch64::STRQpost:
2715 case AArch64::STRQpre:
2716 case AArch64::STRSpost:
2717 case AArch64::STRSpre:
2718 case AArch64::STRWpost:
2719 case AArch64::STRWpre:
2720 case AArch64::STRXpost:
2721 case AArch64::STRXpre:
2722 return 3;
2723 case AArch64::LDPDpost:
2724 case AArch64::LDPDpre:
2725 case AArch64::LDPQpost:
2726 case AArch64::LDPQpre:
2727 case AArch64::LDPSpost:
2728 case AArch64::LDPSpre:
2729 case AArch64::LDPWpost:
2730 case AArch64::LDPWpre:
2731 case AArch64::LDPXpost:
2732 case AArch64::LDPXpre:
2733 case AArch64::STGPpre:
2734 case AArch64::STGPpost:
2735 case AArch64::STPDpost:
2736 case AArch64::STPDpre:
2737 case AArch64::STPQpost:
2738 case AArch64::STPQpre:
2739 case AArch64::STPSpost:
2740 case AArch64::STPSpre:
2741 case AArch64::STPWpost:
2742 case AArch64::STPWpre:
2743 case AArch64::STPXpost:
2744 case AArch64::STPXpre:
2745 return 4;
2746 }
2747}
2748
2750 switch (MI.getOpcode()) {
2751 default:
2752 return false;
2753 // Scaled instructions.
2754 case AArch64::STRSui:
2755 case AArch64::STRDui:
2756 case AArch64::STRQui:
2757 case AArch64::STRXui:
2758 case AArch64::STRWui:
2759 case AArch64::LDRSui:
2760 case AArch64::LDRDui:
2761 case AArch64::LDRQui:
2762 case AArch64::LDRXui:
2763 case AArch64::LDRWui:
2764 case AArch64::LDRSWui:
2765 // Unscaled instructions.
2766 case AArch64::STURSi:
2767 case AArch64::STRSpre:
2768 case AArch64::STURDi:
2769 case AArch64::STRDpre:
2770 case AArch64::STURQi:
2771 case AArch64::STRQpre:
2772 case AArch64::STURWi:
2773 case AArch64::STRWpre:
2774 case AArch64::STURXi:
2775 case AArch64::STRXpre:
2776 case AArch64::LDURSi:
2777 case AArch64::LDRSpre:
2778 case AArch64::LDURDi:
2779 case AArch64::LDRDpre:
2780 case AArch64::LDURQi:
2781 case AArch64::LDRQpre:
2782 case AArch64::LDURWi:
2783 case AArch64::LDRWpre:
2784 case AArch64::LDURXi:
2785 case AArch64::LDRXpre:
2786 case AArch64::LDURSWi:
2787 case AArch64::LDRSWpre:
2788 // SVE instructions.
2789 case AArch64::LDR_ZXI:
2790 case AArch64::STR_ZXI:
2791 return true;
2792 }
2793}
2794
2796 switch (MI.getOpcode()) {
2797 default:
2798 assert((!MI.isCall() || !MI.isReturn()) &&
2799 "Unexpected instruction - was a new tail call opcode introduced?");
2800 return false;
2801 case AArch64::TCRETURNdi:
2802 case AArch64::TCRETURNri:
2803 case AArch64::TCRETURNrix16x17:
2804 case AArch64::TCRETURNrix17:
2805 case AArch64::TCRETURNrinotx16:
2806 case AArch64::TCRETURNriALL:
2807 case AArch64::AUTH_TCRETURN:
2808 case AArch64::AUTH_TCRETURN_BTI:
2809 return true;
2810 }
2811}
2812
2814 switch (Opc) {
2815 default:
2816 llvm_unreachable("Opcode has no flag setting equivalent!");
2817 // 32-bit cases:
2818 case AArch64::ADDWri:
2819 return AArch64::ADDSWri;
2820 case AArch64::ADDWrr:
2821 return AArch64::ADDSWrr;
2822 case AArch64::ADDWrs:
2823 return AArch64::ADDSWrs;
2824 case AArch64::ADDWrx:
2825 return AArch64::ADDSWrx;
2826 case AArch64::ANDWri:
2827 return AArch64::ANDSWri;
2828 case AArch64::ANDWrr:
2829 return AArch64::ANDSWrr;
2830 case AArch64::ANDWrs:
2831 return AArch64::ANDSWrs;
2832 case AArch64::BICWrr:
2833 return AArch64::BICSWrr;
2834 case AArch64::BICWrs:
2835 return AArch64::BICSWrs;
2836 case AArch64::SUBWri:
2837 return AArch64::SUBSWri;
2838 case AArch64::SUBWrr:
2839 return AArch64::SUBSWrr;
2840 case AArch64::SUBWrs:
2841 return AArch64::SUBSWrs;
2842 case AArch64::SUBWrx:
2843 return AArch64::SUBSWrx;
2844 // 64-bit cases:
2845 case AArch64::ADDXri:
2846 return AArch64::ADDSXri;
2847 case AArch64::ADDXrr:
2848 return AArch64::ADDSXrr;
2849 case AArch64::ADDXrs:
2850 return AArch64::ADDSXrs;
2851 case AArch64::ADDXrx:
2852 return AArch64::ADDSXrx;
2853 case AArch64::ANDXri:
2854 return AArch64::ANDSXri;
2855 case AArch64::ANDXrr:
2856 return AArch64::ANDSXrr;
2857 case AArch64::ANDXrs:
2858 return AArch64::ANDSXrs;
2859 case AArch64::BICXrr:
2860 return AArch64::BICSXrr;
2861 case AArch64::BICXrs:
2862 return AArch64::BICSXrs;
2863 case AArch64::SUBXri:
2864 return AArch64::SUBSXri;
2865 case AArch64::SUBXrr:
2866 return AArch64::SUBSXrr;
2867 case AArch64::SUBXrs:
2868 return AArch64::SUBSXrs;
2869 case AArch64::SUBXrx:
2870 return AArch64::SUBSXrx;
2871 // SVE instructions:
2872 case AArch64::AND_PPzPP:
2873 return AArch64::ANDS_PPzPP;
2874 case AArch64::BIC_PPzPP:
2875 return AArch64::BICS_PPzPP;
2876 case AArch64::EOR_PPzPP:
2877 return AArch64::EORS_PPzPP;
2878 case AArch64::NAND_PPzPP:
2879 return AArch64::NANDS_PPzPP;
2880 case AArch64::NOR_PPzPP:
2881 return AArch64::NORS_PPzPP;
2882 case AArch64::ORN_PPzPP:
2883 return AArch64::ORNS_PPzPP;
2884 case AArch64::ORR_PPzPP:
2885 return AArch64::ORRS_PPzPP;
2886 case AArch64::BRKA_PPzP:
2887 return AArch64::BRKAS_PPzP;
2888 case AArch64::BRKPA_PPzPP:
2889 return AArch64::BRKPAS_PPzPP;
2890 case AArch64::BRKB_PPzP:
2891 return AArch64::BRKBS_PPzP;
2892 case AArch64::BRKPB_PPzPP:
2893 return AArch64::BRKPBS_PPzPP;
2894 case AArch64::BRKN_PPzP:
2895 return AArch64::BRKNS_PPzP;
2896 case AArch64::RDFFR_PPz:
2897 return AArch64::RDFFRS_PPz;
2898 case AArch64::PTRUE_B:
2899 return AArch64::PTRUES_B;
2900 }
2901}
2902
2903// Is this a candidate for ld/st merging or pairing? For example, we don't
2904// touch volatiles or load/stores that have a hint to avoid pair formation.
2906
2907 bool IsPreLdSt = isPreLdSt(MI);
2908
2909 // If this is a volatile load/store, don't mess with it.
2910 if (MI.hasOrderedMemoryRef())
2911 return false;
2912
2913 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2914 // For Pre-inc LD/ST, the operand is shifted by one.
2915 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2916 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2917 "Expected a reg or frame index operand.");
2918
2919 // For Pre-indexed addressing quadword instructions, the third operand is the
2920 // immediate value.
2921 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2922
2923 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2924 return false;
2925
2926 // Can't merge/pair if the instruction modifies the base register.
2927 // e.g., ldr x0, [x0]
2928 // This case will never occur with an FI base.
2929 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2930 // STR<S,D,Q,W,X>pre, it can be merged.
2931 // For example:
2932 // ldr q0, [x11, #32]!
2933 // ldr q1, [x11, #16]
2934 // to
2935 // ldp q0, q1, [x11, #32]!
2936 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2937 Register BaseReg = MI.getOperand(1).getReg();
2939 if (MI.modifiesRegister(BaseReg, TRI))
2940 return false;
2941 }
2942
2943 // Pairing SVE fills/spills is only valid for little-endian targets that
2944 // implement VLS 128.
2945 switch (MI.getOpcode()) {
2946 default:
2947 break;
2948 case AArch64::LDR_ZXI:
2949 case AArch64::STR_ZXI:
2950 if (!Subtarget.isLittleEndian() ||
2951 Subtarget.getSVEVectorSizeInBits() != 128)
2952 return false;
2953 }
2954
2955 // Check if this load/store has a hint to avoid pair formation.
2956 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2958 return false;
2959
2960 // Do not pair any callee-save store/reload instructions in the
2961 // prologue/epilogue if the CFI information encoded the operations as separate
2962 // instructions, as that will cause the size of the actual prologue to mismatch
2963 // with the prologue size recorded in the Windows CFI.
2964 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2965 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2966 MI.getMF()->getFunction().needsUnwindTableEntry();
2967 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2969 return false;
2970
2971 // On some CPUs quad load/store pairs are slower than two single load/stores.
2972 if (Subtarget.isPaired128Slow()) {
2973 switch (MI.getOpcode()) {
2974 default:
2975 break;
2976 case AArch64::LDURQi:
2977 case AArch64::STURQi:
2978 case AArch64::LDRQui:
2979 case AArch64::STRQui:
2980 return false;
2981 }
2982 }
2983
2984 return true;
2985}
2986
2989 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2990 const TargetRegisterInfo *TRI) const {
2991 if (!LdSt.mayLoadOrStore())
2992 return false;
2993
2994 const MachineOperand *BaseOp;
2995 TypeSize WidthN(0, false);
2996 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2997 WidthN, TRI))
2998 return false;
2999 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3000 // vector.
3001 Width = LocationSize::precise(WidthN);
3002 BaseOps.push_back(BaseOp);
3003 return true;
3004}
3005
3006std::optional<ExtAddrMode>
3008 const TargetRegisterInfo *TRI) const {
3009 const MachineOperand *Base; // Filled with the base operand of MI.
3010 int64_t Offset; // Filled with the offset of MI.
3011 bool OffsetIsScalable;
3012 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3013 return std::nullopt;
3014
3015 if (!Base->isReg())
3016 return std::nullopt;
3017 ExtAddrMode AM;
3018 AM.BaseReg = Base->getReg();
3019 AM.Displacement = Offset;
3020 AM.ScaledReg = 0;
3021 AM.Scale = 0;
3022 return AM;
3023}
3024
3026 Register Reg,
3027 const MachineInstr &AddrI,
3028 ExtAddrMode &AM) const {
3029 // Filter out instructions into which we cannot fold.
3030 unsigned NumBytes;
3031 int64_t OffsetScale = 1;
3032 switch (MemI.getOpcode()) {
3033 default:
3034 return false;
3035
3036 case AArch64::LDURQi:
3037 case AArch64::STURQi:
3038 NumBytes = 16;
3039 break;
3040
3041 case AArch64::LDURDi:
3042 case AArch64::STURDi:
3043 case AArch64::LDURXi:
3044 case AArch64::STURXi:
3045 NumBytes = 8;
3046 break;
3047
3048 case AArch64::LDURWi:
3049 case AArch64::LDURSWi:
3050 case AArch64::STURWi:
3051 NumBytes = 4;
3052 break;
3053
3054 case AArch64::LDURHi:
3055 case AArch64::STURHi:
3056 case AArch64::LDURHHi:
3057 case AArch64::STURHHi:
3058 case AArch64::LDURSHXi:
3059 case AArch64::LDURSHWi:
3060 NumBytes = 2;
3061 break;
3062
3063 case AArch64::LDRBroX:
3064 case AArch64::LDRBBroX:
3065 case AArch64::LDRSBXroX:
3066 case AArch64::LDRSBWroX:
3067 case AArch64::STRBroX:
3068 case AArch64::STRBBroX:
3069 case AArch64::LDURBi:
3070 case AArch64::LDURBBi:
3071 case AArch64::LDURSBXi:
3072 case AArch64::LDURSBWi:
3073 case AArch64::STURBi:
3074 case AArch64::STURBBi:
3075 case AArch64::LDRBui:
3076 case AArch64::LDRBBui:
3077 case AArch64::LDRSBXui:
3078 case AArch64::LDRSBWui:
3079 case AArch64::STRBui:
3080 case AArch64::STRBBui:
3081 NumBytes = 1;
3082 break;
3083
3084 case AArch64::LDRQroX:
3085 case AArch64::STRQroX:
3086 case AArch64::LDRQui:
3087 case AArch64::STRQui:
3088 NumBytes = 16;
3089 OffsetScale = 16;
3090 break;
3091
3092 case AArch64::LDRDroX:
3093 case AArch64::STRDroX:
3094 case AArch64::LDRXroX:
3095 case AArch64::STRXroX:
3096 case AArch64::LDRDui:
3097 case AArch64::STRDui:
3098 case AArch64::LDRXui:
3099 case AArch64::STRXui:
3100 NumBytes = 8;
3101 OffsetScale = 8;
3102 break;
3103
3104 case AArch64::LDRWroX:
3105 case AArch64::LDRSWroX:
3106 case AArch64::STRWroX:
3107 case AArch64::LDRWui:
3108 case AArch64::LDRSWui:
3109 case AArch64::STRWui:
3110 NumBytes = 4;
3111 OffsetScale = 4;
3112 break;
3113
3114 case AArch64::LDRHroX:
3115 case AArch64::STRHroX:
3116 case AArch64::LDRHHroX:
3117 case AArch64::STRHHroX:
3118 case AArch64::LDRSHXroX:
3119 case AArch64::LDRSHWroX:
3120 case AArch64::LDRHui:
3121 case AArch64::STRHui:
3122 case AArch64::LDRHHui:
3123 case AArch64::STRHHui:
3124 case AArch64::LDRSHXui:
3125 case AArch64::LDRSHWui:
3126 NumBytes = 2;
3127 OffsetScale = 2;
3128 break;
3129 }
3130
3131 // Check the fold operand is not the loaded/stored value.
3132 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3133 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3134 return false;
3135
3136 // Handle memory instructions with a [Reg, Reg] addressing mode.
3137 if (MemI.getOperand(2).isReg()) {
3138 // Bail if the addressing mode already includes extension of the offset
3139 // register.
3140 if (MemI.getOperand(3).getImm())
3141 return false;
3142
3143 // Check if we actually have a scaled offset.
3144 if (MemI.getOperand(4).getImm() == 0)
3145 OffsetScale = 1;
3146
3147 // If the address instructions is folded into the base register, then the
3148 // addressing mode must not have a scale. Then we can swap the base and the
3149 // scaled registers.
3150 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3151 return false;
3152
3153 switch (AddrI.getOpcode()) {
3154 default:
3155 return false;
3156
3157 case AArch64::SBFMXri:
3158 // sxtw Xa, Wm
3159 // ldr Xd, [Xn, Xa, lsl #N]
3160 // ->
3161 // ldr Xd, [Xn, Wm, sxtw #N]
3162 if (AddrI.getOperand(2).getImm() != 0 ||
3163 AddrI.getOperand(3).getImm() != 31)
3164 return false;
3165
3166 AM.BaseReg = MemI.getOperand(1).getReg();
3167 if (AM.BaseReg == Reg)
3168 AM.BaseReg = MemI.getOperand(2).getReg();
3169 AM.ScaledReg = AddrI.getOperand(1).getReg();
3170 AM.Scale = OffsetScale;
3171 AM.Displacement = 0;
3173 return true;
3174
3175 case TargetOpcode::SUBREG_TO_REG: {
3176 // mov Wa, Wm
3177 // ldr Xd, [Xn, Xa, lsl #N]
3178 // ->
3179 // ldr Xd, [Xn, Wm, uxtw #N]
3180
3181 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3182 if (AddrI.getOperand(1).getImm() != 0 ||
3183 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3184 return false;
3185
3186 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3187 Register OffsetReg = AddrI.getOperand(2).getReg();
3188 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3189 return false;
3190
3191 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3192 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3193 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3194 DefMI.getOperand(3).getImm() != 0)
3195 return false;
3196
3197 AM.BaseReg = MemI.getOperand(1).getReg();
3198 if (AM.BaseReg == Reg)
3199 AM.BaseReg = MemI.getOperand(2).getReg();
3200 AM.ScaledReg = DefMI.getOperand(2).getReg();
3201 AM.Scale = OffsetScale;
3202 AM.Displacement = 0;
3204 return true;
3205 }
3206 }
3207 }
3208
3209 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3210
3211 // Check we are not breaking a potential conversion to an LDP.
3212 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3213 int64_t NewOffset) -> bool {
3214 int64_t MinOffset, MaxOffset;
3215 switch (NumBytes) {
3216 default:
3217 return true;
3218 case 4:
3219 MinOffset = -256;
3220 MaxOffset = 252;
3221 break;
3222 case 8:
3223 MinOffset = -512;
3224 MaxOffset = 504;
3225 break;
3226 case 16:
3227 MinOffset = -1024;
3228 MaxOffset = 1008;
3229 break;
3230 }
3231 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3232 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3233 };
3234 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3235 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3236 int64_t NewOffset = OldOffset + Disp;
3237 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3238 return false;
3239 // If the old offset would fit into an LDP, but the new offset wouldn't,
3240 // bail out.
3241 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3242 return false;
3243 AM.BaseReg = AddrI.getOperand(1).getReg();
3244 AM.ScaledReg = 0;
3245 AM.Scale = 0;
3246 AM.Displacement = NewOffset;
3248 return true;
3249 };
3250
3251 auto canFoldAddRegIntoAddrMode =
3252 [&](int64_t Scale,
3254 if (MemI.getOperand(2).getImm() != 0)
3255 return false;
3256 if ((unsigned)Scale != Scale)
3257 return false;
3258 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3259 return false;
3260 AM.BaseReg = AddrI.getOperand(1).getReg();
3261 AM.ScaledReg = AddrI.getOperand(2).getReg();
3262 AM.Scale = Scale;
3263 AM.Displacement = 0;
3264 AM.Form = Form;
3265 return true;
3266 };
3267
3268 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3269 unsigned Opcode = MemI.getOpcode();
3270 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3271 Subtarget.isSTRQroSlow();
3272 };
3273
3274 int64_t Disp = 0;
3275 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3276 switch (AddrI.getOpcode()) {
3277 default:
3278 return false;
3279
3280 case AArch64::ADDXri:
3281 // add Xa, Xn, #N
3282 // ldr Xd, [Xa, #M]
3283 // ->
3284 // ldr Xd, [Xn, #N'+M]
3285 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3286 return canFoldAddSubImmIntoAddrMode(Disp);
3287
3288 case AArch64::SUBXri:
3289 // sub Xa, Xn, #N
3290 // ldr Xd, [Xa, #M]
3291 // ->
3292 // ldr Xd, [Xn, #N'+M]
3293 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3294 return canFoldAddSubImmIntoAddrMode(-Disp);
3295
3296 case AArch64::ADDXrs: {
3297 // add Xa, Xn, Xm, lsl #N
3298 // ldr Xd, [Xa]
3299 // ->
3300 // ldr Xd, [Xn, Xm, lsl #N]
3301
3302 // Don't fold the add if the result would be slower, unless optimising for
3303 // size.
3304 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3306 return false;
3307 Shift = AArch64_AM::getShiftValue(Shift);
3308 if (!OptSize) {
3309 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3310 return false;
3311 if (avoidSlowSTRQ(MemI))
3312 return false;
3313 }
3314 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3315 }
3316
3317 case AArch64::ADDXrr:
3318 // add Xa, Xn, Xm
3319 // ldr Xd, [Xa]
3320 // ->
3321 // ldr Xd, [Xn, Xm, lsl #0]
3322
3323 // Don't fold the add if the result would be slower, unless optimising for
3324 // size.
3325 if (!OptSize && avoidSlowSTRQ(MemI))
3326 return false;
3327 return canFoldAddRegIntoAddrMode(1);
3328
3329 case AArch64::ADDXrx:
3330 // add Xa, Xn, Wm, {s,u}xtw #N
3331 // ldr Xd, [Xa]
3332 // ->
3333 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3334
3335 // Don't fold the add if the result would be slower, unless optimising for
3336 // size.
3337 if (!OptSize && avoidSlowSTRQ(MemI))
3338 return false;
3339
3340 // Can fold only sign-/zero-extend of a word.
3341 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3343 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3344 return false;
3345
3346 return canFoldAddRegIntoAddrMode(
3347 1ULL << AArch64_AM::getArithShiftValue(Imm),
3350 }
3351}
3352
3353// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3354// return the opcode of an instruction performing the same operation, but using
3355// the [Reg, Reg] addressing mode.
3356static unsigned regOffsetOpcode(unsigned Opcode) {
3357 switch (Opcode) {
3358 default:
3359 llvm_unreachable("Address folding not implemented for instruction");
3360
3361 case AArch64::LDURQi:
3362 case AArch64::LDRQui:
3363 return AArch64::LDRQroX;
3364 case AArch64::STURQi:
3365 case AArch64::STRQui:
3366 return AArch64::STRQroX;
3367 case AArch64::LDURDi:
3368 case AArch64::LDRDui:
3369 return AArch64::LDRDroX;
3370 case AArch64::STURDi:
3371 case AArch64::STRDui:
3372 return AArch64::STRDroX;
3373 case AArch64::LDURXi:
3374 case AArch64::LDRXui:
3375 return AArch64::LDRXroX;
3376 case AArch64::STURXi:
3377 case AArch64::STRXui:
3378 return AArch64::STRXroX;
3379 case AArch64::LDURWi:
3380 case AArch64::LDRWui:
3381 return AArch64::LDRWroX;
3382 case AArch64::LDURSWi:
3383 case AArch64::LDRSWui:
3384 return AArch64::LDRSWroX;
3385 case AArch64::STURWi:
3386 case AArch64::STRWui:
3387 return AArch64::STRWroX;
3388 case AArch64::LDURHi:
3389 case AArch64::LDRHui:
3390 return AArch64::LDRHroX;
3391 case AArch64::STURHi:
3392 case AArch64::STRHui:
3393 return AArch64::STRHroX;
3394 case AArch64::LDURHHi:
3395 case AArch64::LDRHHui:
3396 return AArch64::LDRHHroX;
3397 case AArch64::STURHHi:
3398 case AArch64::STRHHui:
3399 return AArch64::STRHHroX;
3400 case AArch64::LDURSHXi:
3401 case AArch64::LDRSHXui:
3402 return AArch64::LDRSHXroX;
3403 case AArch64::LDURSHWi:
3404 case AArch64::LDRSHWui:
3405 return AArch64::LDRSHWroX;
3406 case AArch64::LDURBi:
3407 case AArch64::LDRBui:
3408 return AArch64::LDRBroX;
3409 case AArch64::LDURBBi:
3410 case AArch64::LDRBBui:
3411 return AArch64::LDRBBroX;
3412 case AArch64::LDURSBXi:
3413 case AArch64::LDRSBXui:
3414 return AArch64::LDRSBXroX;
3415 case AArch64::LDURSBWi:
3416 case AArch64::LDRSBWui:
3417 return AArch64::LDRSBWroX;
3418 case AArch64::STURBi:
3419 case AArch64::STRBui:
3420 return AArch64::STRBroX;
3421 case AArch64::STURBBi:
3422 case AArch64::STRBBui:
3423 return AArch64::STRBBroX;
3424 }
3425}
3426
3427// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3428// the opcode of an instruction performing the same operation, but using the
3429// [Reg, #Imm] addressing mode with scaled offset.
3430unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3431 switch (Opcode) {
3432 default:
3433 llvm_unreachable("Address folding not implemented for instruction");
3434
3435 case AArch64::LDURQi:
3436 Scale = 16;
3437 return AArch64::LDRQui;
3438 case AArch64::STURQi:
3439 Scale = 16;
3440 return AArch64::STRQui;
3441 case AArch64::LDURDi:
3442 Scale = 8;
3443 return AArch64::LDRDui;
3444 case AArch64::STURDi:
3445 Scale = 8;
3446 return AArch64::STRDui;
3447 case AArch64::LDURXi:
3448 Scale = 8;
3449 return AArch64::LDRXui;
3450 case AArch64::STURXi:
3451 Scale = 8;
3452 return AArch64::STRXui;
3453 case AArch64::LDURWi:
3454 Scale = 4;
3455 return AArch64::LDRWui;
3456 case AArch64::LDURSWi:
3457 Scale = 4;
3458 return AArch64::LDRSWui;
3459 case AArch64::STURWi:
3460 Scale = 4;
3461 return AArch64::STRWui;
3462 case AArch64::LDURHi:
3463 Scale = 2;
3464 return AArch64::LDRHui;
3465 case AArch64::STURHi:
3466 Scale = 2;
3467 return AArch64::STRHui;
3468 case AArch64::LDURHHi:
3469 Scale = 2;
3470 return AArch64::LDRHHui;
3471 case AArch64::STURHHi:
3472 Scale = 2;
3473 return AArch64::STRHHui;
3474 case AArch64::LDURSHXi:
3475 Scale = 2;
3476 return AArch64::LDRSHXui;
3477 case AArch64::LDURSHWi:
3478 Scale = 2;
3479 return AArch64::LDRSHWui;
3480 case AArch64::LDURBi:
3481 Scale = 1;
3482 return AArch64::LDRBui;
3483 case AArch64::LDURBBi:
3484 Scale = 1;
3485 return AArch64::LDRBBui;
3486 case AArch64::LDURSBXi:
3487 Scale = 1;
3488 return AArch64::LDRSBXui;
3489 case AArch64::LDURSBWi:
3490 Scale = 1;
3491 return AArch64::LDRSBWui;
3492 case AArch64::STURBi:
3493 Scale = 1;
3494 return AArch64::STRBui;
3495 case AArch64::STURBBi:
3496 Scale = 1;
3497 return AArch64::STRBBui;
3498 case AArch64::LDRQui:
3499 case AArch64::STRQui:
3500 Scale = 16;
3501 return Opcode;
3502 case AArch64::LDRDui:
3503 case AArch64::STRDui:
3504 case AArch64::LDRXui:
3505 case AArch64::STRXui:
3506 Scale = 8;
3507 return Opcode;
3508 case AArch64::LDRWui:
3509 case AArch64::LDRSWui:
3510 case AArch64::STRWui:
3511 Scale = 4;
3512 return Opcode;
3513 case AArch64::LDRHui:
3514 case AArch64::STRHui:
3515 case AArch64::LDRHHui:
3516 case AArch64::STRHHui:
3517 case AArch64::LDRSHXui:
3518 case AArch64::LDRSHWui:
3519 Scale = 2;
3520 return Opcode;
3521 case AArch64::LDRBui:
3522 case AArch64::LDRBBui:
3523 case AArch64::LDRSBXui:
3524 case AArch64::LDRSBWui:
3525 case AArch64::STRBui:
3526 case AArch64::STRBBui:
3527 Scale = 1;
3528 return Opcode;
3529 }
3530}
3531
3532// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3533// the opcode of an instruction performing the same operation, but using the
3534// [Reg, #Imm] addressing mode with unscaled offset.
3535unsigned unscaledOffsetOpcode(unsigned Opcode) {
3536 switch (Opcode) {
3537 default:
3538 llvm_unreachable("Address folding not implemented for instruction");
3539
3540 case AArch64::LDURQi:
3541 case AArch64::STURQi:
3542 case AArch64::LDURDi:
3543 case AArch64::STURDi:
3544 case AArch64::LDURXi:
3545 case AArch64::STURXi:
3546 case AArch64::LDURWi:
3547 case AArch64::LDURSWi:
3548 case AArch64::STURWi:
3549 case AArch64::LDURHi:
3550 case AArch64::STURHi:
3551 case AArch64::LDURHHi:
3552 case AArch64::STURHHi:
3553 case AArch64::LDURSHXi:
3554 case AArch64::LDURSHWi:
3555 case AArch64::LDURBi:
3556 case AArch64::STURBi:
3557 case AArch64::LDURBBi:
3558 case AArch64::STURBBi:
3559 case AArch64::LDURSBWi:
3560 case AArch64::LDURSBXi:
3561 return Opcode;
3562 case AArch64::LDRQui:
3563 return AArch64::LDURQi;
3564 case AArch64::STRQui:
3565 return AArch64::STURQi;
3566 case AArch64::LDRDui:
3567 return AArch64::LDURDi;
3568 case AArch64::STRDui:
3569 return AArch64::STURDi;
3570 case AArch64::LDRXui:
3571 return AArch64::LDURXi;
3572 case AArch64::STRXui:
3573 return AArch64::STURXi;
3574 case AArch64::LDRWui:
3575 return AArch64::LDURWi;
3576 case AArch64::LDRSWui:
3577 return AArch64::LDURSWi;
3578 case AArch64::STRWui:
3579 return AArch64::STURWi;
3580 case AArch64::LDRHui:
3581 return AArch64::LDURHi;
3582 case AArch64::STRHui:
3583 return AArch64::STURHi;
3584 case AArch64::LDRHHui:
3585 return AArch64::LDURHHi;
3586 case AArch64::STRHHui:
3587 return AArch64::STURHHi;
3588 case AArch64::LDRSHXui:
3589 return AArch64::LDURSHXi;
3590 case AArch64::LDRSHWui:
3591 return AArch64::LDURSHWi;
3592 case AArch64::LDRBBui:
3593 return AArch64::LDURBBi;
3594 case AArch64::LDRBui:
3595 return AArch64::LDURBi;
3596 case AArch64::STRBBui:
3597 return AArch64::STURBBi;
3598 case AArch64::STRBui:
3599 return AArch64::STURBi;
3600 case AArch64::LDRSBWui:
3601 return AArch64::LDURSBWi;
3602 case AArch64::LDRSBXui:
3603 return AArch64::LDURSBXi;
3604 }
3605}
3606
3607// Given the opcode of a memory load/store instruction, return the opcode of an
3608// instruction performing the same operation, but using
3609// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3610// offset register.
3611static unsigned offsetExtendOpcode(unsigned Opcode) {
3612 switch (Opcode) {
3613 default:
3614 llvm_unreachable("Address folding not implemented for instruction");
3615
3616 case AArch64::LDRQroX:
3617 case AArch64::LDURQi:
3618 case AArch64::LDRQui:
3619 return AArch64::LDRQroW;
3620 case AArch64::STRQroX:
3621 case AArch64::STURQi:
3622 case AArch64::STRQui:
3623 return AArch64::STRQroW;
3624 case AArch64::LDRDroX:
3625 case AArch64::LDURDi:
3626 case AArch64::LDRDui:
3627 return AArch64::LDRDroW;
3628 case AArch64::STRDroX:
3629 case AArch64::STURDi:
3630 case AArch64::STRDui:
3631 return AArch64::STRDroW;
3632 case AArch64::LDRXroX:
3633 case AArch64::LDURXi:
3634 case AArch64::LDRXui:
3635 return AArch64::LDRXroW;
3636 case AArch64::STRXroX:
3637 case AArch64::STURXi:
3638 case AArch64::STRXui:
3639 return AArch64::STRXroW;
3640 case AArch64::LDRWroX:
3641 case AArch64::LDURWi:
3642 case AArch64::LDRWui:
3643 return AArch64::LDRWroW;
3644 case AArch64::LDRSWroX:
3645 case AArch64::LDURSWi:
3646 case AArch64::LDRSWui:
3647 return AArch64::LDRSWroW;
3648 case AArch64::STRWroX:
3649 case AArch64::STURWi:
3650 case AArch64::STRWui:
3651 return AArch64::STRWroW;
3652 case AArch64::LDRHroX:
3653 case AArch64::LDURHi:
3654 case AArch64::LDRHui:
3655 return AArch64::LDRHroW;
3656 case AArch64::STRHroX:
3657 case AArch64::STURHi:
3658 case AArch64::STRHui:
3659 return AArch64::STRHroW;
3660 case AArch64::LDRHHroX:
3661 case AArch64::LDURHHi:
3662 case AArch64::LDRHHui:
3663 return AArch64::LDRHHroW;
3664 case AArch64::STRHHroX:
3665 case AArch64::STURHHi:
3666 case AArch64::STRHHui:
3667 return AArch64::STRHHroW;
3668 case AArch64::LDRSHXroX:
3669 case AArch64::LDURSHXi:
3670 case AArch64::LDRSHXui:
3671 return AArch64::LDRSHXroW;
3672 case AArch64::LDRSHWroX:
3673 case AArch64::LDURSHWi:
3674 case AArch64::LDRSHWui:
3675 return AArch64::LDRSHWroW;
3676 case AArch64::LDRBroX:
3677 case AArch64::LDURBi:
3678 case AArch64::LDRBui:
3679 return AArch64::LDRBroW;
3680 case AArch64::LDRBBroX:
3681 case AArch64::LDURBBi:
3682 case AArch64::LDRBBui:
3683 return AArch64::LDRBBroW;
3684 case AArch64::LDRSBXroX:
3685 case AArch64::LDURSBXi:
3686 case AArch64::LDRSBXui:
3687 return AArch64::LDRSBXroW;
3688 case AArch64::LDRSBWroX:
3689 case AArch64::LDURSBWi:
3690 case AArch64::LDRSBWui:
3691 return AArch64::LDRSBWroW;
3692 case AArch64::STRBroX:
3693 case AArch64::STURBi:
3694 case AArch64::STRBui:
3695 return AArch64::STRBroW;
3696 case AArch64::STRBBroX:
3697 case AArch64::STURBBi:
3698 case AArch64::STRBBui:
3699 return AArch64::STRBBroW;
3700 }
3701}
3702
3704 const ExtAddrMode &AM) const {
3705
3706 const DebugLoc &DL = MemI.getDebugLoc();
3707 MachineBasicBlock &MBB = *MemI.getParent();
3709
3711 if (AM.ScaledReg) {
3712 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3713 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3714 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3715 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3716 .addReg(MemI.getOperand(0).getReg(),
3717 MemI.mayLoad() ? RegState::Define : 0)
3718 .addReg(AM.BaseReg)
3719 .addReg(AM.ScaledReg)
3720 .addImm(0)
3721 .addImm(AM.Scale > 1)
3722 .setMemRefs(MemI.memoperands())
3723 .setMIFlags(MemI.getFlags());
3724 return B.getInstr();
3725 }
3726
3727 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3728 "Addressing mode not supported for folding");
3729
3730 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3731 unsigned Scale = 1;
3732 unsigned Opcode = MemI.getOpcode();
3733 if (isInt<9>(AM.Displacement))
3734 Opcode = unscaledOffsetOpcode(Opcode);
3735 else
3736 Opcode = scaledOffsetOpcode(Opcode, Scale);
3737
3738 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3739 .addReg(MemI.getOperand(0).getReg(),
3740 MemI.mayLoad() ? RegState::Define : 0)
3741 .addReg(AM.BaseReg)
3742 .addImm(AM.Displacement / Scale)
3743 .setMemRefs(MemI.memoperands())
3744 .setMIFlags(MemI.getFlags());
3745 return B.getInstr();
3746 }
3747
3750 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3751 assert(AM.ScaledReg && !AM.Displacement &&
3752 "Address offset can be a register or an immediate, but not both");
3753 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3754 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3755 // Make sure the offset register is in the correct register class.
3756 Register OffsetReg = AM.ScaledReg;
3757 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3758 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3759 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3760 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3761 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3762 }
3763 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3764 .addReg(MemI.getOperand(0).getReg(),
3765 MemI.mayLoad() ? RegState::Define : 0)
3766 .addReg(AM.BaseReg)
3767 .addReg(OffsetReg)
3769 .addImm(AM.Scale != 1)
3770 .setMemRefs(MemI.memoperands())
3771 .setMIFlags(MemI.getFlags());
3772
3773 return B.getInstr();
3774 }
3775
3777 "Function must not be called with an addressing mode it can't handle");
3778}
3779
3780/// Return true if the opcode is a post-index ld/st instruction, which really
3781/// loads from base+0.
3782static bool isPostIndexLdStOpcode(unsigned Opcode) {
3783 switch (Opcode) {
3784 default:
3785 return false;
3786 case AArch64::LD1Fourv16b_POST:
3787 case AArch64::LD1Fourv1d_POST:
3788 case AArch64::LD1Fourv2d_POST:
3789 case AArch64::LD1Fourv2s_POST:
3790 case AArch64::LD1Fourv4h_POST:
3791 case AArch64::LD1Fourv4s_POST:
3792 case AArch64::LD1Fourv8b_POST:
3793 case AArch64::LD1Fourv8h_POST:
3794 case AArch64::LD1Onev16b_POST:
3795 case AArch64::LD1Onev1d_POST:
3796 case AArch64::LD1Onev2d_POST:
3797 case AArch64::LD1Onev2s_POST:
3798 case AArch64::LD1Onev4h_POST:
3799 case AArch64::LD1Onev4s_POST:
3800 case AArch64::LD1Onev8b_POST:
3801 case AArch64::LD1Onev8h_POST:
3802 case AArch64::LD1Rv16b_POST:
3803 case AArch64::LD1Rv1d_POST:
3804 case AArch64::LD1Rv2d_POST:
3805 case AArch64::LD1Rv2s_POST:
3806 case AArch64::LD1Rv4h_POST:
3807 case AArch64::LD1Rv4s_POST:
3808 case AArch64::LD1Rv8b_POST:
3809 case AArch64::LD1Rv8h_POST:
3810 case AArch64::LD1Threev16b_POST:
3811 case AArch64::LD1Threev1d_POST:
3812 case AArch64::LD1Threev2d_POST:
3813 case AArch64::LD1Threev2s_POST:
3814 case AArch64::LD1Threev4h_POST:
3815 case AArch64::LD1Threev4s_POST:
3816 case AArch64::LD1Threev8b_POST:
3817 case AArch64::LD1Threev8h_POST:
3818 case AArch64::LD1Twov16b_POST:
3819 case AArch64::LD1Twov1d_POST:
3820 case AArch64::LD1Twov2d_POST:
3821 case AArch64::LD1Twov2s_POST:
3822 case AArch64::LD1Twov4h_POST:
3823 case AArch64::LD1Twov4s_POST:
3824 case AArch64::LD1Twov8b_POST:
3825 case AArch64::LD1Twov8h_POST:
3826 case AArch64::LD1i16_POST:
3827 case AArch64::LD1i32_POST:
3828 case AArch64::LD1i64_POST:
3829 case AArch64::LD1i8_POST:
3830 case AArch64::LD2Rv16b_POST:
3831 case AArch64::LD2Rv1d_POST:
3832 case AArch64::LD2Rv2d_POST:
3833 case AArch64::LD2Rv2s_POST:
3834 case AArch64::LD2Rv4h_POST:
3835 case AArch64::LD2Rv4s_POST:
3836 case AArch64::LD2Rv8b_POST:
3837 case AArch64::LD2Rv8h_POST:
3838 case AArch64::LD2Twov16b_POST:
3839 case AArch64::LD2Twov2d_POST:
3840 case AArch64::LD2Twov2s_POST:
3841 case AArch64::LD2Twov4h_POST:
3842 case AArch64::LD2Twov4s_POST:
3843 case AArch64::LD2Twov8b_POST:
3844 case AArch64::LD2Twov8h_POST:
3845 case AArch64::LD2i16_POST:
3846 case AArch64::LD2i32_POST:
3847 case AArch64::LD2i64_POST:
3848 case AArch64::LD2i8_POST:
3849 case AArch64::LD3Rv16b_POST:
3850 case AArch64::LD3Rv1d_POST:
3851 case AArch64::LD3Rv2d_POST:
3852 case AArch64::LD3Rv2s_POST:
3853 case AArch64::LD3Rv4h_POST:
3854 case AArch64::LD3Rv4s_POST:
3855 case AArch64::LD3Rv8b_POST:
3856 case AArch64::LD3Rv8h_POST:
3857 case AArch64::LD3Threev16b_POST:
3858 case AArch64::LD3Threev2d_POST:
3859 case AArch64::LD3Threev2s_POST:
3860 case AArch64::LD3Threev4h_POST:
3861 case AArch64::LD3Threev4s_POST:
3862 case AArch64::LD3Threev8b_POST:
3863 case AArch64::LD3Threev8h_POST:
3864 case AArch64::LD3i16_POST:
3865 case AArch64::LD3i32_POST:
3866 case AArch64::LD3i64_POST:
3867 case AArch64::LD3i8_POST:
3868 case AArch64::LD4Fourv16b_POST:
3869 case AArch64::LD4Fourv2d_POST:
3870 case AArch64::LD4Fourv2s_POST:
3871 case AArch64::LD4Fourv4h_POST:
3872 case AArch64::LD4Fourv4s_POST:
3873 case AArch64::LD4Fourv8b_POST:
3874 case AArch64::LD4Fourv8h_POST:
3875 case AArch64::LD4Rv16b_POST:
3876 case AArch64::LD4Rv1d_POST:
3877 case AArch64::LD4Rv2d_POST:
3878 case AArch64::LD4Rv2s_POST:
3879 case AArch64::LD4Rv4h_POST:
3880 case AArch64::LD4Rv4s_POST:
3881 case AArch64::LD4Rv8b_POST:
3882 case AArch64::LD4Rv8h_POST:
3883 case AArch64::LD4i16_POST:
3884 case AArch64::LD4i32_POST:
3885 case AArch64::LD4i64_POST:
3886 case AArch64::LD4i8_POST:
3887 case AArch64::LDAPRWpost:
3888 case AArch64::LDAPRXpost:
3889 case AArch64::LDIAPPWpost:
3890 case AArch64::LDIAPPXpost:
3891 case AArch64::LDPDpost:
3892 case AArch64::LDPQpost:
3893 case AArch64::LDPSWpost:
3894 case AArch64::LDPSpost:
3895 case AArch64::LDPWpost:
3896 case AArch64::LDPXpost:
3897 case AArch64::LDRBBpost:
3898 case AArch64::LDRBpost:
3899 case AArch64::LDRDpost:
3900 case AArch64::LDRHHpost:
3901 case AArch64::LDRHpost:
3902 case AArch64::LDRQpost:
3903 case AArch64::LDRSBWpost:
3904 case AArch64::LDRSBXpost:
3905 case AArch64::LDRSHWpost:
3906 case AArch64::LDRSHXpost:
3907 case AArch64::LDRSWpost:
3908 case AArch64::LDRSpost:
3909 case AArch64::LDRWpost:
3910 case AArch64::LDRXpost:
3911 case AArch64::ST1Fourv16b_POST:
3912 case AArch64::ST1Fourv1d_POST:
3913 case AArch64::ST1Fourv2d_POST:
3914 case AArch64::ST1Fourv2s_POST:
3915 case AArch64::ST1Fourv4h_POST:
3916 case AArch64::ST1Fourv4s_POST:
3917 case AArch64::ST1Fourv8b_POST:
3918 case AArch64::ST1Fourv8h_POST:
3919 case AArch64::ST1Onev16b_POST:
3920 case AArch64::ST1Onev1d_POST:
3921 case AArch64::ST1Onev2d_POST:
3922 case AArch64::ST1Onev2s_POST:
3923 case AArch64::ST1Onev4h_POST:
3924 case AArch64::ST1Onev4s_POST:
3925 case AArch64::ST1Onev8b_POST:
3926 case AArch64::ST1Onev8h_POST:
3927 case AArch64::ST1Threev16b_POST:
3928 case AArch64::ST1Threev1d_POST:
3929 case AArch64::ST1Threev2d_POST:
3930 case AArch64::ST1Threev2s_POST:
3931 case AArch64::ST1Threev4h_POST:
3932 case AArch64::ST1Threev4s_POST:
3933 case AArch64::ST1Threev8b_POST:
3934 case AArch64::ST1Threev8h_POST:
3935 case AArch64::ST1Twov16b_POST:
3936 case AArch64::ST1Twov1d_POST:
3937 case AArch64::ST1Twov2d_POST:
3938 case AArch64::ST1Twov2s_POST:
3939 case AArch64::ST1Twov4h_POST:
3940 case AArch64::ST1Twov4s_POST:
3941 case AArch64::ST1Twov8b_POST:
3942 case AArch64::ST1Twov8h_POST:
3943 case AArch64::ST1i16_POST:
3944 case AArch64::ST1i32_POST:
3945 case AArch64::ST1i64_POST:
3946 case AArch64::ST1i8_POST:
3947 case AArch64::ST2GPostIndex:
3948 case AArch64::ST2Twov16b_POST:
3949 case AArch64::ST2Twov2d_POST:
3950 case AArch64::ST2Twov2s_POST:
3951 case AArch64::ST2Twov4h_POST:
3952 case AArch64::ST2Twov4s_POST:
3953 case AArch64::ST2Twov8b_POST:
3954 case AArch64::ST2Twov8h_POST:
3955 case AArch64::ST2i16_POST:
3956 case AArch64::ST2i32_POST:
3957 case AArch64::ST2i64_POST:
3958 case AArch64::ST2i8_POST:
3959 case AArch64::ST3Threev16b_POST:
3960 case AArch64::ST3Threev2d_POST:
3961 case AArch64::ST3Threev2s_POST:
3962 case AArch64::ST3Threev4h_POST:
3963 case AArch64::ST3Threev4s_POST:
3964 case AArch64::ST3Threev8b_POST:
3965 case AArch64::ST3Threev8h_POST:
3966 case AArch64::ST3i16_POST:
3967 case AArch64::ST3i32_POST:
3968 case AArch64::ST3i64_POST:
3969 case AArch64::ST3i8_POST:
3970 case AArch64::ST4Fourv16b_POST:
3971 case AArch64::ST4Fourv2d_POST:
3972 case AArch64::ST4Fourv2s_POST:
3973 case AArch64::ST4Fourv4h_POST:
3974 case AArch64::ST4Fourv4s_POST:
3975 case AArch64::ST4Fourv8b_POST:
3976 case AArch64::ST4Fourv8h_POST:
3977 case AArch64::ST4i16_POST:
3978 case AArch64::ST4i32_POST:
3979 case AArch64::ST4i64_POST:
3980 case AArch64::ST4i8_POST:
3981 case AArch64::STGPostIndex:
3982 case AArch64::STGPpost:
3983 case AArch64::STPDpost:
3984 case AArch64::STPQpost:
3985 case AArch64::STPSpost:
3986 case AArch64::STPWpost:
3987 case AArch64::STPXpost:
3988 case AArch64::STRBBpost:
3989 case AArch64::STRBpost:
3990 case AArch64::STRDpost:
3991 case AArch64::STRHHpost:
3992 case AArch64::STRHpost:
3993 case AArch64::STRQpost:
3994 case AArch64::STRSpost:
3995 case AArch64::STRWpost:
3996 case AArch64::STRXpost:
3997 case AArch64::STZ2GPostIndex:
3998 case AArch64::STZGPostIndex:
3999 return true;
4000 }
4001}
4002
4004 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4005 bool &OffsetIsScalable, TypeSize &Width,
4006 const TargetRegisterInfo *TRI) const {
4007 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4008 // Handle only loads/stores with base register followed by immediate offset.
4009 if (LdSt.getNumExplicitOperands() == 3) {
4010 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4011 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4012 !LdSt.getOperand(2).isImm())
4013 return false;
4014 } else if (LdSt.getNumExplicitOperands() == 4) {
4015 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4016 if (!LdSt.getOperand(1).isReg() ||
4017 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4018 !LdSt.getOperand(3).isImm())
4019 return false;
4020 } else
4021 return false;
4022
4023 // Get the scaling factor for the instruction and set the width for the
4024 // instruction.
4025 TypeSize Scale(0U, false);
4026 int64_t Dummy1, Dummy2;
4027
4028 // If this returns false, then it's an instruction we don't want to handle.
4029 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4030 return false;
4031
4032 // Compute the offset. Offset is calculated as the immediate operand
4033 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4034 // set to 1. Postindex are a special case which have an offset of 0.
4035 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4036 BaseOp = &LdSt.getOperand(2);
4037 Offset = 0;
4038 } else if (LdSt.getNumExplicitOperands() == 3) {
4039 BaseOp = &LdSt.getOperand(1);
4040 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4041 } else {
4042 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4043 BaseOp = &LdSt.getOperand(2);
4044 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4045 }
4046 OffsetIsScalable = Scale.isScalable();
4047
4048 return BaseOp->isReg() || BaseOp->isFI();
4049}
4050
4053 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4054 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4055 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4056 return OfsOp;
4057}
4058
4059bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4060 TypeSize &Width, int64_t &MinOffset,
4061 int64_t &MaxOffset) {
4062 switch (Opcode) {
4063 // Not a memory operation or something we want to handle.
4064 default:
4065 Scale = TypeSize::getFixed(0);
4066 Width = TypeSize::getFixed(0);
4067 MinOffset = MaxOffset = 0;
4068 return false;
4069 // LDR / STR
4070 case AArch64::LDRQui:
4071 case AArch64::STRQui:
4072 Scale = TypeSize::getFixed(16);
4073 Width = TypeSize::getFixed(16);
4074 MinOffset = 0;
4075 MaxOffset = 4095;
4076 break;
4077 case AArch64::LDRXui:
4078 case AArch64::LDRDui:
4079 case AArch64::STRXui:
4080 case AArch64::STRDui:
4081 case AArch64::PRFMui:
4082 Scale = TypeSize::getFixed(8);
4083 Width = TypeSize::getFixed(8);
4084 MinOffset = 0;
4085 MaxOffset = 4095;
4086 break;
4087 case AArch64::LDRWui:
4088 case AArch64::LDRSui:
4089 case AArch64::LDRSWui:
4090 case AArch64::STRWui:
4091 case AArch64::STRSui:
4092 Scale = TypeSize::getFixed(4);
4093 Width = TypeSize::getFixed(4);
4094 MinOffset = 0;
4095 MaxOffset = 4095;
4096 break;
4097 case AArch64::LDRHui:
4098 case AArch64::LDRHHui:
4099 case AArch64::LDRSHWui:
4100 case AArch64::LDRSHXui:
4101 case AArch64::STRHui:
4102 case AArch64::STRHHui:
4103 Scale = TypeSize::getFixed(2);
4104 Width = TypeSize::getFixed(2);
4105 MinOffset = 0;
4106 MaxOffset = 4095;
4107 break;
4108 case AArch64::LDRBui:
4109 case AArch64::LDRBBui:
4110 case AArch64::LDRSBWui:
4111 case AArch64::LDRSBXui:
4112 case AArch64::STRBui:
4113 case AArch64::STRBBui:
4114 Scale = TypeSize::getFixed(1);
4115 Width = TypeSize::getFixed(1);
4116 MinOffset = 0;
4117 MaxOffset = 4095;
4118 break;
4119 // post/pre inc
4120 case AArch64::STRQpre:
4121 case AArch64::LDRQpost:
4122 Scale = TypeSize::getFixed(1);
4123 Width = TypeSize::getFixed(16);
4124 MinOffset = -256;
4125 MaxOffset = 255;
4126 break;
4127 case AArch64::LDRDpost:
4128 case AArch64::LDRDpre:
4129 case AArch64::LDRXpost:
4130 case AArch64::LDRXpre:
4131 case AArch64::STRDpost:
4132 case AArch64::STRDpre:
4133 case AArch64::STRXpost:
4134 case AArch64::STRXpre:
4135 Scale = TypeSize::getFixed(1);
4136 Width = TypeSize::getFixed(8);
4137 MinOffset = -256;
4138 MaxOffset = 255;
4139 break;
4140 case AArch64::STRWpost:
4141 case AArch64::STRWpre:
4142 case AArch64::LDRWpost:
4143 case AArch64::LDRWpre:
4144 case AArch64::STRSpost:
4145 case AArch64::STRSpre:
4146 case AArch64::LDRSpost:
4147 case AArch64::LDRSpre:
4148 Scale = TypeSize::getFixed(1);
4149 Width = TypeSize::getFixed(4);
4150 MinOffset = -256;
4151 MaxOffset = 255;
4152 break;
4153 case AArch64::LDRHpost:
4154 case AArch64::LDRHpre:
4155 case AArch64::STRHpost:
4156 case AArch64::STRHpre:
4157 case AArch64::LDRHHpost:
4158 case AArch64::LDRHHpre:
4159 case AArch64::STRHHpost:
4160 case AArch64::STRHHpre:
4161 Scale = TypeSize::getFixed(1);
4162 Width = TypeSize::getFixed(2);
4163 MinOffset = -256;
4164 MaxOffset = 255;
4165 break;
4166 case AArch64::LDRBpost:
4167 case AArch64::LDRBpre:
4168 case AArch64::STRBpost:
4169 case AArch64::STRBpre:
4170 case AArch64::LDRBBpost:
4171 case AArch64::LDRBBpre:
4172 case AArch64::STRBBpost:
4173 case AArch64::STRBBpre:
4174 Scale = TypeSize::getFixed(1);
4175 Width = TypeSize::getFixed(1);
4176 MinOffset = -256;
4177 MaxOffset = 255;
4178 break;
4179 // Unscaled
4180 case AArch64::LDURQi:
4181 case AArch64::STURQi:
4182 Scale = TypeSize::getFixed(1);
4183 Width = TypeSize::getFixed(16);
4184 MinOffset = -256;
4185 MaxOffset = 255;
4186 break;
4187 case AArch64::LDURXi:
4188 case AArch64::LDURDi:
4189 case AArch64::LDAPURXi:
4190 case AArch64::STURXi:
4191 case AArch64::STURDi:
4192 case AArch64::STLURXi:
4193 case AArch64::PRFUMi:
4194 Scale = TypeSize::getFixed(1);
4195 Width = TypeSize::getFixed(8);
4196 MinOffset = -256;
4197 MaxOffset = 255;
4198 break;
4199 case AArch64::LDURWi:
4200 case AArch64::LDURSi:
4201 case AArch64::LDURSWi:
4202 case AArch64::LDAPURi:
4203 case AArch64::LDAPURSWi:
4204 case AArch64::STURWi:
4205 case AArch64::STURSi:
4206 case AArch64::STLURWi:
4207 Scale = TypeSize::getFixed(1);
4208 Width = TypeSize::getFixed(4);
4209 MinOffset = -256;
4210 MaxOffset = 255;
4211 break;
4212 case AArch64::LDURHi:
4213 case AArch64::LDURHHi:
4214 case AArch64::LDURSHXi:
4215 case AArch64::LDURSHWi:
4216 case AArch64::LDAPURHi:
4217 case AArch64::LDAPURSHWi:
4218 case AArch64::LDAPURSHXi:
4219 case AArch64::STURHi:
4220 case AArch64::STURHHi:
4221 case AArch64::STLURHi:
4222 Scale = TypeSize::getFixed(1);
4223 Width = TypeSize::getFixed(2);
4224 MinOffset = -256;
4225 MaxOffset = 255;
4226 break;
4227 case AArch64::LDURBi:
4228 case AArch64::LDURBBi:
4229 case AArch64::LDURSBXi:
4230 case AArch64::LDURSBWi:
4231 case AArch64::LDAPURBi:
4232 case AArch64::LDAPURSBWi:
4233 case AArch64::LDAPURSBXi:
4234 case AArch64::STURBi:
4235 case AArch64::STURBBi:
4236 case AArch64::STLURBi:
4237 Scale = TypeSize::getFixed(1);
4238 Width = TypeSize::getFixed(1);
4239 MinOffset = -256;
4240 MaxOffset = 255;
4241 break;
4242 // LDP / STP (including pre/post inc)
4243 case AArch64::LDPQi:
4244 case AArch64::LDNPQi:
4245 case AArch64::STPQi:
4246 case AArch64::STNPQi:
4247 case AArch64::LDPQpost:
4248 case AArch64::LDPQpre:
4249 case AArch64::STPQpost:
4250 case AArch64::STPQpre:
4251 Scale = TypeSize::getFixed(16);
4252 Width = TypeSize::getFixed(16 * 2);
4253 MinOffset = -64;
4254 MaxOffset = 63;
4255 break;
4256 case AArch64::LDPXi:
4257 case AArch64::LDPDi:
4258 case AArch64::LDNPXi:
4259 case AArch64::LDNPDi:
4260 case AArch64::STPXi:
4261 case AArch64::STPDi:
4262 case AArch64::STNPXi:
4263 case AArch64::STNPDi:
4264 case AArch64::LDPDpost:
4265 case AArch64::LDPDpre:
4266 case AArch64::LDPXpost:
4267 case AArch64::LDPXpre:
4268 case AArch64::STPDpost:
4269 case AArch64::STPDpre:
4270 case AArch64::STPXpost:
4271 case AArch64::STPXpre:
4272 Scale = TypeSize::getFixed(8);
4273 Width = TypeSize::getFixed(8 * 2);
4274 MinOffset = -64;
4275 MaxOffset = 63;
4276 break;
4277 case AArch64::LDPWi:
4278 case AArch64::LDPSi:
4279 case AArch64::LDNPWi:
4280 case AArch64::LDNPSi:
4281 case AArch64::STPWi:
4282 case AArch64::STPSi:
4283 case AArch64::STNPWi:
4284 case AArch64::STNPSi:
4285 case AArch64::LDPSpost:
4286 case AArch64::LDPSpre:
4287 case AArch64::LDPWpost:
4288 case AArch64::LDPWpre:
4289 case AArch64::STPSpost:
4290 case AArch64::STPSpre:
4291 case AArch64::STPWpost:
4292 case AArch64::STPWpre:
4293 Scale = TypeSize::getFixed(4);
4294 Width = TypeSize::getFixed(4 * 2);
4295 MinOffset = -64;
4296 MaxOffset = 63;
4297 break;
4298 case AArch64::StoreSwiftAsyncContext:
4299 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4300 Scale = TypeSize::getFixed(1);
4301 Width = TypeSize::getFixed(8);
4302 MinOffset = 0;
4303 MaxOffset = 4095;
4304 break;
4305 case AArch64::ADDG:
4306 Scale = TypeSize::getFixed(16);
4307 Width = TypeSize::getFixed(0);
4308 MinOffset = 0;
4309 MaxOffset = 63;
4310 break;
4311 case AArch64::TAGPstack:
4312 Scale = TypeSize::getFixed(16);
4313 Width = TypeSize::getFixed(0);
4314 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4315 // of 63 (not 64!).
4316 MinOffset = -63;
4317 MaxOffset = 63;
4318 break;
4319 case AArch64::LDG:
4320 case AArch64::STGi:
4321 case AArch64::STGPreIndex:
4322 case AArch64::STGPostIndex:
4323 case AArch64::STZGi:
4324 case AArch64::STZGPreIndex:
4325 case AArch64::STZGPostIndex:
4326 Scale = TypeSize::getFixed(16);
4327 Width = TypeSize::getFixed(16);
4328 MinOffset = -256;
4329 MaxOffset = 255;
4330 break;
4331 // SVE
4332 case AArch64::STR_ZZZZXI:
4333 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4334 case AArch64::LDR_ZZZZXI:
4335 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4336 Scale = TypeSize::getScalable(16);
4337 Width = TypeSize::getScalable(16 * 4);
4338 MinOffset = -256;
4339 MaxOffset = 252;
4340 break;
4341 case AArch64::STR_ZZZXI:
4342 case AArch64::LDR_ZZZXI:
4343 Scale = TypeSize::getScalable(16);
4344 Width = TypeSize::getScalable(16 * 3);
4345 MinOffset = -256;
4346 MaxOffset = 253;
4347 break;
4348 case AArch64::STR_ZZXI:
4349 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4350 case AArch64::LDR_ZZXI:
4351 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4352 Scale = TypeSize::getScalable(16);
4353 Width = TypeSize::getScalable(16 * 2);
4354 MinOffset = -256;
4355 MaxOffset = 254;
4356 break;
4357 case AArch64::LDR_PXI:
4358 case AArch64::STR_PXI:
4359 Scale = TypeSize::getScalable(2);
4360 Width = TypeSize::getScalable(2);
4361 MinOffset = -256;
4362 MaxOffset = 255;
4363 break;
4364 case AArch64::LDR_PPXI:
4365 case AArch64::STR_PPXI:
4366 Scale = TypeSize::getScalable(2);
4367 Width = TypeSize::getScalable(2 * 2);
4368 MinOffset = -256;
4369 MaxOffset = 254;
4370 break;
4371 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4372 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4373 case AArch64::LDR_ZXI:
4374 case AArch64::STR_ZXI:
4375 Scale = TypeSize::getScalable(16);
4376 Width = TypeSize::getScalable(16);
4377 MinOffset = -256;
4378 MaxOffset = 255;
4379 break;
4380 case AArch64::LD1B_IMM:
4381 case AArch64::LD1H_IMM:
4382 case AArch64::LD1W_IMM:
4383 case AArch64::LD1D_IMM:
4384 case AArch64::LDNT1B_ZRI:
4385 case AArch64::LDNT1H_ZRI:
4386 case AArch64::LDNT1W_ZRI:
4387 case AArch64::LDNT1D_ZRI:
4388 case AArch64::ST1B_IMM:
4389 case AArch64::ST1H_IMM:
4390 case AArch64::ST1W_IMM:
4391 case AArch64::ST1D_IMM:
4392 case AArch64::STNT1B_ZRI:
4393 case AArch64::STNT1H_ZRI:
4394 case AArch64::STNT1W_ZRI:
4395 case AArch64::STNT1D_ZRI:
4396 case AArch64::LDNF1B_IMM:
4397 case AArch64::LDNF1H_IMM:
4398 case AArch64::LDNF1W_IMM:
4399 case AArch64::LDNF1D_IMM:
4400 // A full vectors worth of data
4401 // Width = mbytes * elements
4402 Scale = TypeSize::getScalable(16);
4403 Width = TypeSize::getScalable(16);
4404 MinOffset = -8;
4405 MaxOffset = 7;
4406 break;
4407 case AArch64::LD2B_IMM:
4408 case AArch64::LD2H_IMM:
4409 case AArch64::LD2W_IMM:
4410 case AArch64::LD2D_IMM:
4411 case AArch64::ST2B_IMM:
4412 case AArch64::ST2H_IMM:
4413 case AArch64::ST2W_IMM:
4414 case AArch64::ST2D_IMM:
4415 Scale = TypeSize::getScalable(32);
4416 Width = TypeSize::getScalable(16 * 2);
4417 MinOffset = -8;
4418 MaxOffset = 7;
4419 break;
4420 case AArch64::LD3B_IMM:
4421 case AArch64::LD3H_IMM:
4422 case AArch64::LD3W_IMM:
4423 case AArch64::LD3D_IMM:
4424 case AArch64::ST3B_IMM:
4425 case AArch64::ST3H_IMM:
4426 case AArch64::ST3W_IMM:
4427 case AArch64::ST3D_IMM:
4428 Scale = TypeSize::getScalable(48);
4429 Width = TypeSize::getScalable(16 * 3);
4430 MinOffset = -8;
4431 MaxOffset = 7;
4432 break;
4433 case AArch64::LD4B_IMM:
4434 case AArch64::LD4H_IMM:
4435 case AArch64::LD4W_IMM:
4436 case AArch64::LD4D_IMM:
4437 case AArch64::ST4B_IMM:
4438 case AArch64::ST4H_IMM:
4439 case AArch64::ST4W_IMM:
4440 case AArch64::ST4D_IMM:
4441 Scale = TypeSize::getScalable(64);
4442 Width = TypeSize::getScalable(16 * 4);
4443 MinOffset = -8;
4444 MaxOffset = 7;
4445 break;
4446 case AArch64::LD1B_H_IMM:
4447 case AArch64::LD1SB_H_IMM:
4448 case AArch64::LD1H_S_IMM:
4449 case AArch64::LD1SH_S_IMM:
4450 case AArch64::LD1W_D_IMM:
4451 case AArch64::LD1SW_D_IMM:
4452 case AArch64::ST1B_H_IMM:
4453 case AArch64::ST1H_S_IMM:
4454 case AArch64::ST1W_D_IMM:
4455 case AArch64::LDNF1B_H_IMM:
4456 case AArch64::LDNF1SB_H_IMM:
4457 case AArch64::LDNF1H_S_IMM:
4458 case AArch64::LDNF1SH_S_IMM:
4459 case AArch64::LDNF1W_D_IMM:
4460 case AArch64::LDNF1SW_D_IMM:
4461 // A half vector worth of data
4462 // Width = mbytes * elements
4463 Scale = TypeSize::getScalable(8);
4464 Width = TypeSize::getScalable(8);
4465 MinOffset = -8;
4466 MaxOffset = 7;
4467 break;
4468 case AArch64::LD1B_S_IMM:
4469 case AArch64::LD1SB_S_IMM:
4470 case AArch64::LD1H_D_IMM:
4471 case AArch64::LD1SH_D_IMM:
4472 case AArch64::ST1B_S_IMM:
4473 case AArch64::ST1H_D_IMM:
4474 case AArch64::LDNF1B_S_IMM:
4475 case AArch64::LDNF1SB_S_IMM:
4476 case AArch64::LDNF1H_D_IMM:
4477 case AArch64::LDNF1SH_D_IMM:
4478 // A quarter vector worth of data
4479 // Width = mbytes * elements
4480 Scale = TypeSize::getScalable(4);
4481 Width = TypeSize::getScalable(4);
4482 MinOffset = -8;
4483 MaxOffset = 7;
4484 break;
4485 case AArch64::LD1B_D_IMM:
4486 case AArch64::LD1SB_D_IMM:
4487 case AArch64::ST1B_D_IMM:
4488 case AArch64::LDNF1B_D_IMM:
4489 case AArch64::LDNF1SB_D_IMM:
4490 // A eighth vector worth of data
4491 // Width = mbytes * elements
4492 Scale = TypeSize::getScalable(2);
4493 Width = TypeSize::getScalable(2);
4494 MinOffset = -8;
4495 MaxOffset = 7;
4496 break;
4497 case AArch64::ST2Gi:
4498 case AArch64::ST2GPreIndex:
4499 case AArch64::ST2GPostIndex:
4500 case AArch64::STZ2Gi:
4501 case AArch64::STZ2GPreIndex:
4502 case AArch64::STZ2GPostIndex:
4503 Scale = TypeSize::getFixed(16);
4504 Width = TypeSize::getFixed(32);
4505 MinOffset = -256;
4506 MaxOffset = 255;
4507 break;
4508 case AArch64::STGPi:
4509 case AArch64::STGPpost:
4510 case AArch64::STGPpre:
4511 Scale = TypeSize::getFixed(16);
4512 Width = TypeSize::getFixed(16);
4513 MinOffset = -64;
4514 MaxOffset = 63;
4515 break;
4516 case AArch64::LD1RB_IMM:
4517 case AArch64::LD1RB_H_IMM:
4518 case AArch64::LD1RB_S_IMM:
4519 case AArch64::LD1RB_D_IMM:
4520 case AArch64::LD1RSB_H_IMM:
4521 case AArch64::LD1RSB_S_IMM:
4522 case AArch64::LD1RSB_D_IMM:
4523 Scale = TypeSize::getFixed(1);
4524 Width = TypeSize::getFixed(1);
4525 MinOffset = 0;
4526 MaxOffset = 63;
4527 break;
4528 case AArch64::LD1RH_IMM:
4529 case AArch64::LD1RH_S_IMM:
4530 case AArch64::LD1RH_D_IMM:
4531 case AArch64::LD1RSH_S_IMM:
4532 case AArch64::LD1RSH_D_IMM:
4533 Scale = TypeSize::getFixed(2);
4534 Width = TypeSize::getFixed(2);
4535 MinOffset = 0;
4536 MaxOffset = 63;
4537 break;
4538 case AArch64::LD1RW_IMM:
4539 case AArch64::LD1RW_D_IMM:
4540 case AArch64::LD1RSW_IMM:
4541 Scale = TypeSize::getFixed(4);
4542 Width = TypeSize::getFixed(4);
4543 MinOffset = 0;
4544 MaxOffset = 63;
4545 break;
4546 case AArch64::LD1RD_IMM:
4547 Scale = TypeSize::getFixed(8);
4548 Width = TypeSize::getFixed(8);
4549 MinOffset = 0;
4550 MaxOffset = 63;
4551 break;
4552 }
4553
4554 return true;
4555}
4556
4557// Scaling factor for unscaled load or store.
4559 switch (Opc) {
4560 default:
4561 llvm_unreachable("Opcode has unknown scale!");
4562 case AArch64::LDRBBui:
4563 case AArch64::LDURBBi:
4564 case AArch64::LDRSBWui:
4565 case AArch64::LDURSBWi:
4566 case AArch64::STRBBui:
4567 case AArch64::STURBBi:
4568 return 1;
4569 case AArch64::LDRHHui:
4570 case AArch64::LDURHHi:
4571 case AArch64::LDRSHWui:
4572 case AArch64::LDURSHWi:
4573 case AArch64::STRHHui:
4574 case AArch64::STURHHi:
4575 return 2;
4576 case AArch64::LDRSui:
4577 case AArch64::LDURSi:
4578 case AArch64::LDRSpre:
4579 case AArch64::LDRSWui:
4580 case AArch64::LDURSWi:
4581 case AArch64::LDRSWpre:
4582 case AArch64::LDRWpre:
4583 case AArch64::LDRWui:
4584 case AArch64::LDURWi:
4585 case AArch64::STRSui:
4586 case AArch64::STURSi:
4587 case AArch64::STRSpre:
4588 case AArch64::STRWui:
4589 case AArch64::STURWi:
4590 case AArch64::STRWpre:
4591 case AArch64::LDPSi:
4592 case AArch64::LDPSWi:
4593 case AArch64::LDPWi:
4594 case AArch64::STPSi:
4595 case AArch64::STPWi:
4596 return 4;
4597 case AArch64::LDRDui:
4598 case AArch64::LDURDi:
4599 case AArch64::LDRDpre:
4600 case AArch64::LDRXui:
4601 case AArch64::LDURXi:
4602 case AArch64::LDRXpre:
4603 case AArch64::STRDui:
4604 case AArch64::STURDi:
4605 case AArch64::STRDpre:
4606 case AArch64::STRXui:
4607 case AArch64::STURXi:
4608 case AArch64::STRXpre:
4609 case AArch64::LDPDi:
4610 case AArch64::LDPXi:
4611 case AArch64::STPDi:
4612 case AArch64::STPXi:
4613 return 8;
4614 case AArch64::LDRQui:
4615 case AArch64::LDURQi:
4616 case AArch64::STRQui:
4617 case AArch64::STURQi:
4618 case AArch64::STRQpre:
4619 case AArch64::LDPQi:
4620 case AArch64::LDRQpre:
4621 case AArch64::STPQi:
4622 case AArch64::STGi:
4623 case AArch64::STZGi:
4624 case AArch64::ST2Gi:
4625 case AArch64::STZ2Gi:
4626 case AArch64::STGPi:
4627 return 16;
4628 }
4629}
4630
4632 switch (MI.getOpcode()) {
4633 default:
4634 return false;
4635 case AArch64::LDRWpre:
4636 case AArch64::LDRXpre:
4637 case AArch64::LDRSWpre:
4638 case AArch64::LDRSpre:
4639 case AArch64::LDRDpre:
4640 case AArch64::LDRQpre:
4641 return true;
4642 }
4643}
4644
4646 switch (MI.getOpcode()) {
4647 default:
4648 return false;
4649 case AArch64::STRWpre:
4650 case AArch64::STRXpre:
4651 case AArch64::STRSpre:
4652 case AArch64::STRDpre:
4653 case AArch64::STRQpre:
4654 return true;
4655 }
4656}
4657
4659 return isPreLd(MI) || isPreSt(MI);
4660}
4661
4663 switch (MI.getOpcode()) {
4664 default:
4665 return false;
4666 case AArch64::LDPSi:
4667 case AArch64::LDPSWi:
4668 case AArch64::LDPDi:
4669 case AArch64::LDPQi:
4670 case AArch64::LDPWi:
4671 case AArch64::LDPXi:
4672 case AArch64::STPSi:
4673 case AArch64::STPDi:
4674 case AArch64::STPQi:
4675 case AArch64::STPWi:
4676 case AArch64::STPXi:
4677 case AArch64::STGPi:
4678 return true;
4679 }
4680}
4681
4683 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4684 unsigned Idx =
4686 : 1;
4687 return MI.getOperand(Idx);
4688}
4689
4690const MachineOperand &
4692 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4693 unsigned Idx =
4695 : 2;
4696 return MI.getOperand(Idx);
4697}
4698
4699const MachineOperand &
4701 switch (MI.getOpcode()) {
4702 default:
4703 llvm_unreachable("Unexpected opcode");
4704 case AArch64::LDRBroX:
4705 case AArch64::LDRBBroX:
4706 case AArch64::LDRSBXroX:
4707 case AArch64::LDRSBWroX:
4708 case AArch64::LDRHroX:
4709 case AArch64::LDRHHroX:
4710 case AArch64::LDRSHXroX:
4711 case AArch64::LDRSHWroX:
4712 case AArch64::LDRWroX:
4713 case AArch64::LDRSroX:
4714 case AArch64::LDRSWroX:
4715 case AArch64::LDRDroX:
4716 case AArch64::LDRXroX:
4717 case AArch64::LDRQroX:
4718 return MI.getOperand(4);
4719 }
4720}
4721
4723 Register Reg) {
4724 if (MI.getParent() == nullptr)
4725 return nullptr;
4726 const MachineFunction *MF = MI.getParent()->getParent();
4727 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4728}
4729
4731 auto IsHFPR = [&](const MachineOperand &Op) {
4732 if (!Op.isReg())
4733 return false;
4734 auto Reg = Op.getReg();
4735 if (Reg.isPhysical())
4736 return AArch64::FPR16RegClass.contains(Reg);
4737 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4738 return TRC == &AArch64::FPR16RegClass ||
4739 TRC == &AArch64::FPR16_loRegClass;
4740 };
4741 return llvm::any_of(MI.operands(), IsHFPR);
4742}
4743
4745 auto IsQFPR = [&](const MachineOperand &Op) {
4746 if (!Op.isReg())
4747 return false;
4748 auto Reg = Op.getReg();
4749 if (Reg.isPhysical())
4750 return AArch64::FPR128RegClass.contains(Reg);
4751 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4752 return TRC == &AArch64::FPR128RegClass ||
4753 TRC == &AArch64::FPR128_loRegClass;
4754 };
4755 return llvm::any_of(MI.operands(), IsQFPR);
4756}
4757
4759 switch (MI.getOpcode()) {
4760 case AArch64::BRK:
4761 case AArch64::HLT:
4762 case AArch64::PACIASP:
4763 case AArch64::PACIBSP:
4764 // Implicit BTI behavior.
4765 return true;
4766 case AArch64::PAUTH_PROLOGUE:
4767 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4768 return true;
4769 case AArch64::HINT: {
4770 unsigned Imm = MI.getOperand(0).getImm();
4771 // Explicit BTI instruction.
4772 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4773 return true;
4774 // PACI(A|B)SP instructions.
4775 if (Imm == 25 || Imm == 27)
4776 return true;
4777 return false;
4778 }
4779 default:
4780 return false;
4781 }
4782}
4783
4785 if (Reg == 0)
4786 return false;
4787 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4788 return AArch64::FPR128RegClass.contains(Reg) ||
4789 AArch64::FPR64RegClass.contains(Reg) ||
4790 AArch64::FPR32RegClass.contains(Reg) ||
4791 AArch64::FPR16RegClass.contains(Reg) ||
4792 AArch64::FPR8RegClass.contains(Reg);
4793}
4794
4796 auto IsFPR = [&](const MachineOperand &Op) {
4797 if (!Op.isReg())
4798 return false;
4799 auto Reg = Op.getReg();
4800 if (Reg.isPhysical())
4801 return isFpOrNEON(Reg);
4802
4803 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4804 return TRC == &AArch64::FPR128RegClass ||
4805 TRC == &AArch64::FPR128_loRegClass ||
4806 TRC == &AArch64::FPR64RegClass ||
4807 TRC == &AArch64::FPR64_loRegClass ||
4808 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4809 TRC == &AArch64::FPR8RegClass;
4810 };
4811 return llvm::any_of(MI.operands(), IsFPR);
4812}
4813
4814// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4815// scaled.
4816static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4818
4819 // If the byte-offset isn't a multiple of the stride, we can't scale this
4820 // offset.
4821 if (Offset % Scale != 0)
4822 return false;
4823
4824 // Convert the byte-offset used by unscaled into an "element" offset used
4825 // by the scaled pair load/store instructions.
4826 Offset /= Scale;
4827 return true;
4828}
4829
4830static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4831 if (FirstOpc == SecondOpc)
4832 return true;
4833 // We can also pair sign-ext and zero-ext instructions.
4834 switch (FirstOpc) {
4835 default:
4836 return false;
4837 case AArch64::STRSui:
4838 case AArch64::STURSi:
4839 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4840 case AArch64::STRDui:
4841 case AArch64::STURDi:
4842 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4843 case AArch64::STRQui:
4844 case AArch64::STURQi:
4845 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4846 case AArch64::STRWui:
4847 case AArch64::STURWi:
4848 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4849 case AArch64::STRXui:
4850 case AArch64::STURXi:
4851 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4852 case AArch64::LDRSui:
4853 case AArch64::LDURSi:
4854 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4855 case AArch64::LDRDui:
4856 case AArch64::LDURDi:
4857 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4858 case AArch64::LDRQui:
4859 case AArch64::LDURQi:
4860 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4861 case AArch64::LDRWui:
4862 case AArch64::LDURWi:
4863 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4864 case AArch64::LDRSWui:
4865 case AArch64::LDURSWi:
4866 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4867 case AArch64::LDRXui:
4868 case AArch64::LDURXi:
4869 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4870 }
4871 // These instructions can't be paired based on their opcodes.
4872 return false;
4873}
4874
4875static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4876 int64_t Offset1, unsigned Opcode1, int FI2,
4877 int64_t Offset2, unsigned Opcode2) {
4878 // Accesses through fixed stack object frame indices may access a different
4879 // fixed stack slot. Check that the object offsets + offsets match.
4880 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4881 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4882 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4883 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4884 // Convert to scaled object offsets.
4885 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4886 if (ObjectOffset1 % Scale1 != 0)
4887 return false;
4888 ObjectOffset1 /= Scale1;
4889 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4890 if (ObjectOffset2 % Scale2 != 0)
4891 return false;
4892 ObjectOffset2 /= Scale2;
4893 ObjectOffset1 += Offset1;
4894 ObjectOffset2 += Offset2;
4895 return ObjectOffset1 + 1 == ObjectOffset2;
4896 }
4897
4898 return FI1 == FI2;
4899}
4900
4901/// Detect opportunities for ldp/stp formation.
4902///
4903/// Only called for LdSt for which getMemOperandWithOffset returns true.
4905 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4906 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4907 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4908 unsigned NumBytes) const {
4909 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4910 const MachineOperand &BaseOp1 = *BaseOps1.front();
4911 const MachineOperand &BaseOp2 = *BaseOps2.front();
4912 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4913 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4914 if (BaseOp1.getType() != BaseOp2.getType())
4915 return false;
4916
4917 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4918 "Only base registers and frame indices are supported.");
4919
4920 // Check for both base regs and base FI.
4921 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4922 return false;
4923
4924 // Only cluster up to a single pair.
4925 if (ClusterSize > 2)
4926 return false;
4927
4928 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4929 return false;
4930
4931 // Can we pair these instructions based on their opcodes?
4932 unsigned FirstOpc = FirstLdSt.getOpcode();
4933 unsigned SecondOpc = SecondLdSt.getOpcode();
4934 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4935 return false;
4936
4937 // Can't merge volatiles or load/stores that have a hint to avoid pair
4938 // formation, for example.
4939 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4940 !isCandidateToMergeOrPair(SecondLdSt))
4941 return false;
4942
4943 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4944 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4945 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4946 return false;
4947
4948 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4949 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4950 return false;
4951
4952 // Pairwise instructions have a 7-bit signed offset field.
4953 if (Offset1 > 63 || Offset1 < -64)
4954 return false;
4955
4956 // The caller should already have ordered First/SecondLdSt by offset.
4957 // Note: except for non-equal frame index bases
4958 if (BaseOp1.isFI()) {
4959 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4960 "Caller should have ordered offsets.");
4961
4962 const MachineFrameInfo &MFI =
4963 FirstLdSt.getParent()->getParent()->getFrameInfo();
4964 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4965 BaseOp2.getIndex(), Offset2, SecondOpc);
4966 }
4967
4968 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4969
4970 return Offset1 + 1 == Offset2;
4971}
4972
4974 MCRegister Reg, unsigned SubIdx,
4975 unsigned State,
4976 const TargetRegisterInfo *TRI) {
4977 if (!SubIdx)
4978 return MIB.addReg(Reg, State);
4979
4980 if (Reg.isPhysical())
4981 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4982 return MIB.addReg(Reg, State, SubIdx);
4983}
4984
4985static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4986 unsigned NumRegs) {
4987 // We really want the positive remainder mod 32 here, that happens to be
4988 // easily obtainable with a mask.
4989 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4990}
4991
4994 const DebugLoc &DL, MCRegister DestReg,
4995 MCRegister SrcReg, bool KillSrc,
4996 unsigned Opcode,
4997 ArrayRef<unsigned> Indices) const {
4998 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5000 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5001 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5002 unsigned NumRegs = Indices.size();
5003
5004 int SubReg = 0, End = NumRegs, Incr = 1;
5005 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5006 SubReg = NumRegs - 1;
5007 End = -1;
5008 Incr = -1;
5009 }
5010
5011 for (; SubReg != End; SubReg += Incr) {
5012 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5013 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5014 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5015 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5016 }
5017}
5018
5021 const DebugLoc &DL, MCRegister DestReg,
5022 MCRegister SrcReg, bool KillSrc,
5023 unsigned Opcode, unsigned ZeroReg,
5024 llvm::ArrayRef<unsigned> Indices) const {
5026 unsigned NumRegs = Indices.size();
5027
5028#ifndef NDEBUG
5029 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5030 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5031 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5032 "GPR reg sequences should not be able to overlap");
5033#endif
5034
5035 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5036 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5037 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5038 MIB.addReg(ZeroReg);
5039 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5040 MIB.addImm(0);
5041 }
5042}
5043
5046 const DebugLoc &DL, Register DestReg,
5047 Register SrcReg, bool KillSrc,
5048 bool RenamableDest,
5049 bool RenamableSrc) const {
5050 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5051 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
5053
5054 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5055 // If either operand is WSP, expand to ADD #0.
5056 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5057 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5058 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5059 MCRegister DestRegX = TRI->getMatchingSuperReg(
5060 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5061 MCRegister SrcRegX = TRI->getMatchingSuperReg(
5062 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5063 // This instruction is reading and writing X registers. This may upset
5064 // the register scavenger and machine verifier, so we need to indicate
5065 // that we are reading an undefined value from SrcRegX, but a proper
5066 // value from SrcReg.
5067 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5068 .addReg(SrcRegX, RegState::Undef)
5069 .addImm(0)
5071 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5072 } else {
5073 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5074 .addReg(SrcReg, getKillRegState(KillSrc))
5075 .addImm(0)
5077 }
5078 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
5079 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5080 .addImm(0)
5082 } else {
5083 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5084 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5085 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5086 MCRegister DestRegX = TRI->getMatchingSuperReg(
5087 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5088 assert(DestRegX.isValid() && "Destination super-reg not valid");
5089 MCRegister SrcRegX =
5090 SrcReg == AArch64::WZR
5091 ? AArch64::XZR
5092 : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
5093 &AArch64::GPR64spRegClass);
5094 assert(SrcRegX.isValid() && "Source super-reg not valid");
5095 // This instruction is reading and writing X registers. This may upset
5096 // the register scavenger and machine verifier, so we need to indicate
5097 // that we are reading an undefined value from SrcRegX, but a proper
5098 // value from SrcReg.
5099 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5100 .addReg(AArch64::XZR)
5101 .addReg(SrcRegX, RegState::Undef)
5102 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5103 } else {
5104 // Otherwise, expand to ORR WZR.
5105 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5106 .addReg(AArch64::WZR)
5107 .addReg(SrcReg, getKillRegState(KillSrc));
5108 }
5109 }
5110 return;
5111 }
5112
5113 // Copy a Predicate register by ORRing with itself.
5114 if (AArch64::PPRRegClass.contains(DestReg) &&
5115 AArch64::PPRRegClass.contains(SrcReg)) {
5117 "Unexpected SVE register.");
5118 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5119 .addReg(SrcReg) // Pg
5120 .addReg(SrcReg)
5121 .addReg(SrcReg, getKillRegState(KillSrc));
5122 return;
5123 }
5124
5125 // Copy a predicate-as-counter register by ORRing with itself as if it
5126 // were a regular predicate (mask) register.
5127 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5128 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5129 if (DestIsPNR || SrcIsPNR) {
5130 auto ToPPR = [](MCRegister R) -> MCRegister {
5131 return (R - AArch64::PN0) + AArch64::P0;
5132 };
5133 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5134 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5135
5136 if (PPRSrcReg != PPRDestReg) {
5137 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5138 .addReg(PPRSrcReg) // Pg
5139 .addReg(PPRSrcReg)
5140 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5141 if (DestIsPNR)
5142 NewMI.addDef(DestReg, RegState::Implicit);
5143 }
5144 return;
5145 }
5146
5147 // Copy a Z register by ORRing with itself.
5148 if (AArch64::ZPRRegClass.contains(DestReg) &&
5149 AArch64::ZPRRegClass.contains(SrcReg)) {
5151 "Unexpected SVE register.");
5152 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5153 .addReg(SrcReg)
5154 .addReg(SrcReg, getKillRegState(KillSrc));
5155 return;
5156 }
5157
5158 // Copy a Z register pair by copying the individual sub-registers.
5159 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5160 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5161 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5162 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5164 "Unexpected SVE register.");
5165 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5166 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5167 Indices);
5168 return;
5169 }
5170
5171 // Copy a Z register triple by copying the individual sub-registers.
5172 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5173 AArch64::ZPR3RegClass.contains(SrcReg)) {
5175 "Unexpected SVE register.");
5176 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5177 AArch64::zsub2};
5178 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5179 Indices);
5180 return;
5181 }
5182
5183 // Copy a Z register quad by copying the individual sub-registers.
5184 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5185 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5186 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5187 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5189 "Unexpected SVE register.");
5190 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5191 AArch64::zsub2, AArch64::zsub3};
5192 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5193 Indices);
5194 return;
5195 }
5196
5197 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5198 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5199 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5200 // If either operand is SP, expand to ADD #0.
5201 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5202 .addReg(SrcReg, getKillRegState(KillSrc))
5203 .addImm(0)
5205 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
5206 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5207 .addImm(0)
5209 } else {
5210 // Otherwise, expand to ORR XZR.
5211 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5212 .addReg(AArch64::XZR)
5213 .addReg(SrcReg, getKillRegState(KillSrc));
5214 }
5215 return;
5216 }
5217
5218 // Copy a DDDD register quad by copying the individual sub-registers.
5219 if (AArch64::DDDDRegClass.contains(DestReg) &&
5220 AArch64::DDDDRegClass.contains(SrcReg)) {
5221 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5222 AArch64::dsub2, AArch64::dsub3};
5223 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5224 Indices);
5225 return;
5226 }
5227
5228 // Copy a DDD register triple by copying the individual sub-registers.
5229 if (AArch64::DDDRegClass.contains(DestReg) &&
5230 AArch64::DDDRegClass.contains(SrcReg)) {
5231 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5232 AArch64::dsub2};
5233 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5234 Indices);
5235 return;
5236 }
5237
5238 // Copy a DD register pair by copying the individual sub-registers.
5239 if (AArch64::DDRegClass.contains(DestReg) &&
5240 AArch64::DDRegClass.contains(SrcReg)) {
5241 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5242 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5243 Indices);
5244 return;
5245 }
5246
5247 // Copy a QQQQ register quad by copying the individual sub-registers.
5248 if (AArch64::QQQQRegClass.contains(DestReg) &&
5249 AArch64::QQQQRegClass.contains(SrcReg)) {
5250 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5251 AArch64::qsub2, AArch64::qsub3};
5252 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5253 Indices);
5254 return;
5255 }
5256
5257 // Copy a QQQ register triple by copying the individual sub-registers.
5258 if (AArch64::QQQRegClass.contains(DestReg) &&
5259 AArch64::QQQRegClass.contains(SrcReg)) {
5260 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5261 AArch64::qsub2};
5262 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5263 Indices);
5264 return;
5265 }
5266
5267 // Copy a QQ register pair by copying the individual sub-registers.
5268 if (AArch64::QQRegClass.contains(DestReg) &&
5269 AArch64::QQRegClass.contains(SrcReg)) {
5270 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5271 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5272 Indices);
5273 return;
5274 }
5275
5276 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5277 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5278 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5279 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5280 AArch64::XZR, Indices);
5281 return;
5282 }
5283
5284 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5285 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5286 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5287 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5288 AArch64::WZR, Indices);
5289 return;
5290 }
5291
5292 if (AArch64::FPR128RegClass.contains(DestReg) &&
5293 AArch64::FPR128RegClass.contains(SrcReg)) {
5294 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5295 !Subtarget.isNeonAvailable())
5296 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5297 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5298 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5299 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5300 else if (Subtarget.isNeonAvailable())
5301 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5302 .addReg(SrcReg)
5303 .addReg(SrcReg, getKillRegState(KillSrc));
5304 else {
5305 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5306 .addReg(AArch64::SP, RegState::Define)
5307 .addReg(SrcReg, getKillRegState(KillSrc))
5308 .addReg(AArch64::SP)
5309 .addImm(-16);
5310 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5311 .addReg(AArch64::SP, RegState::Define)
5312 .addReg(DestReg, RegState::Define)
5313 .addReg(AArch64::SP)
5314 .addImm(16);
5315 }
5316 return;
5317 }
5318
5319 if (AArch64::FPR64RegClass.contains(DestReg) &&
5320 AArch64::FPR64RegClass.contains(SrcReg)) {
5321 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5322 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5323 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5325 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
5326 &AArch64::FPR128RegClass);
5327 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
5328 &AArch64::FPR128RegClass);
5329 // This instruction is reading and writing Q registers. This may upset
5330 // the register scavenger and machine verifier, so we need to indicate
5331 // that we are reading an undefined value from SrcRegQ, but a proper
5332 // value from SrcReg.
5333 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5334 .addReg(SrcRegQ, RegState::Undef)
5335 .addReg(SrcRegQ, RegState::Undef)
5336 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5337 } else {
5338 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5339 .addReg(SrcReg, getKillRegState(KillSrc));
5340 }
5341 return;
5342 }
5343
5344 if (AArch64::FPR32RegClass.contains(DestReg) &&
5345 AArch64::FPR32RegClass.contains(SrcReg)) {
5346 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5347 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5348 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5350 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5351 &AArch64::FPR128RegClass);
5352 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5353 &AArch64::FPR128RegClass);
5354 // This instruction is reading and writing Q registers. This may upset
5355 // the register scavenger and machine verifier, so we need to indicate
5356 // that we are reading an undefined value from SrcRegQ, but a proper
5357 // value from SrcReg.
5358 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5359 .addReg(SrcRegQ, RegState::Undef)
5360 .addReg(SrcRegQ, RegState::Undef)
5361 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5362 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5363 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5365 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5366 &AArch64::FPR64RegClass);
5367 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5368 &AArch64::FPR64RegClass);
5369 // This instruction is reading and writing D registers. This may upset
5370 // the register scavenger and machine verifier, so we need to indicate
5371 // that we are reading an undefined value from SrcRegD, but a proper
5372 // value from SrcReg.
5373 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5374 .addReg(SrcRegD, RegState::Undef)
5375 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5376 } else {
5377 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5378 .addReg(SrcReg, getKillRegState(KillSrc));
5379 }
5380 return;
5381 }
5382
5383 if (AArch64::FPR16RegClass.contains(DestReg) &&
5384 AArch64::FPR16RegClass.contains(SrcReg)) {
5385 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5386 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5387 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5389 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5390 &AArch64::FPR128RegClass);
5391 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5392 &AArch64::FPR128RegClass);
5393 // This instruction is reading and writing Q registers. This may upset
5394 // the register scavenger and machine verifier, so we need to indicate
5395 // that we are reading an undefined value from SrcRegQ, but a proper
5396 // value from SrcReg.
5397 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5398 .addReg(SrcRegQ, RegState::Undef)
5399 .addReg(SrcRegQ, RegState::Undef)
5400 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5401 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5402 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5404 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5405 &AArch64::FPR64RegClass);
5406 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5407 &AArch64::FPR64RegClass);
5408 // This instruction is reading and writing D registers. This may upset
5409 // the register scavenger and machine verifier, so we need to indicate
5410 // that we are reading an undefined value from SrcRegD, but a proper
5411 // value from SrcReg.
5412 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5413 .addReg(SrcRegD, RegState::Undef)
5414 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5415 } else {
5416 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5417 &AArch64::FPR32RegClass);
5418 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5419 &AArch64::FPR32RegClass);
5420 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5421 .addReg(SrcReg, getKillRegState(KillSrc));
5422 }
5423 return;
5424 }
5425
5426 if (AArch64::FPR8RegClass.contains(DestReg) &&
5427 AArch64::FPR8RegClass.contains(SrcReg)) {
5428 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5429 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5430 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5432 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5433 &AArch64::FPR128RegClass);
5434 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5435 &AArch64::FPR128RegClass);
5436 // This instruction is reading and writing Q registers. This may upset
5437 // the register scavenger and machine verifier, so we need to indicate
5438 // that we are reading an undefined value from SrcRegQ, but a proper
5439 // value from SrcReg.
5440 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5441 .addReg(SrcRegQ, RegState::Undef)
5442 .addReg(SrcRegQ, RegState::Undef)
5443 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5444 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5445 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5447 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5448 &AArch64::FPR64RegClass);
5449 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5450 &AArch64::FPR64RegClass);
5451 // This instruction is reading and writing D registers. This may upset
5452 // the register scavenger and machine verifier, so we need to indicate
5453 // that we are reading an undefined value from SrcRegD, but a proper
5454 // value from SrcReg.
5455 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5456 .addReg(SrcRegD, RegState::Undef)
5457 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5458 } else {
5459 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5460 &AArch64::FPR32RegClass);
5461 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5462 &AArch64::FPR32RegClass);
5463 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5464 .addReg(SrcReg, getKillRegState(KillSrc));
5465 }
5466 return;
5467 }
5468
5469 // Copies between GPR64 and FPR64.
5470 if (AArch64::FPR64RegClass.contains(DestReg) &&
5471 AArch64::GPR64RegClass.contains(SrcReg)) {
5472 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5473 .addReg(SrcReg, getKillRegState(KillSrc));
5474 return;
5475 }
5476 if (AArch64::GPR64RegClass.contains(DestReg) &&
5477 AArch64::FPR64RegClass.contains(SrcReg)) {
5478 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5479 .addReg(SrcReg, getKillRegState(KillSrc));
5480 return;
5481 }
5482 // Copies between GPR32 and FPR32.
5483 if (AArch64::FPR32RegClass.contains(DestReg) &&
5484 AArch64::GPR32RegClass.contains(SrcReg)) {
5485 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5486 .addReg(SrcReg, getKillRegState(KillSrc));
5487 return;
5488 }
5489 if (AArch64::GPR32RegClass.contains(DestReg) &&
5490 AArch64::FPR32RegClass.contains(SrcReg)) {
5491 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5492 .addReg(SrcReg, getKillRegState(KillSrc));
5493 return;
5494 }
5495
5496 if (DestReg == AArch64::NZCV) {
5497 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5498 BuildMI(MBB, I, DL, get(AArch64::MSR))
5499 .addImm(AArch64SysReg::NZCV)
5500 .addReg(SrcReg, getKillRegState(KillSrc))
5501 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5502 return;
5503 }
5504
5505 if (SrcReg == AArch64::NZCV) {
5506 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5507 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5508 .addImm(AArch64SysReg::NZCV)
5509 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5510 return;
5511 }
5512
5513#ifndef NDEBUG
5515 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5516 << TRI.getRegAsmName(SrcReg) << "\n";
5517#endif
5518 llvm_unreachable("unimplemented reg-to-reg copy");
5519}
5520
5523 MachineBasicBlock::iterator InsertBefore,
5524 const MCInstrDesc &MCID,
5525 Register SrcReg, bool IsKill,
5526 unsigned SubIdx0, unsigned SubIdx1, int FI,
5527 MachineMemOperand *MMO) {
5528 Register SrcReg0 = SrcReg;
5529 Register SrcReg1 = SrcReg;
5530 if (SrcReg.isPhysical()) {
5531 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5532 SubIdx0 = 0;
5533 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5534 SubIdx1 = 0;
5535 }
5536 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5537 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5538 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5539 .addFrameIndex(FI)
5540 .addImm(0)
5541 .addMemOperand(MMO);
5542}
5543
5546 Register SrcReg, bool isKill, int FI,
5547 const TargetRegisterClass *RC,
5548 const TargetRegisterInfo *TRI,
5549 Register VReg,
5550 MachineInstr::MIFlag Flags) const {
5551 MachineFunction &MF = *MBB.getParent();
5552 MachineFrameInfo &MFI = MF.getFrameInfo();
5553
5555 MachineMemOperand *MMO =
5557 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5558 unsigned Opc = 0;
5559 bool Offset = true;
5561 unsigned StackID = TargetStackID::Default;
5562 switch (TRI->getSpillSize(*RC)) {
5563 case 1:
5564 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5565 Opc = AArch64::STRBui;
5566 break;
5567 case 2: {
5568 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5569 Opc = AArch64::STRHui;
5570 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5571 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5573 "Unexpected register store without SVE store instructions");
5574 Opc = AArch64::STR_PXI;
5576 }
5577 break;
5578 }
5579 case 4:
5580 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5581 Opc = AArch64::STRWui;
5582 if (SrcReg.isVirtual())
5583 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5584 else
5585 assert(SrcReg != AArch64::WSP);
5586 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5587 Opc = AArch64::STRSui;
5588 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5589 Opc = AArch64::STR_PPXI;
5591 }
5592 break;
5593 case 8:
5594 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5595 Opc = AArch64::STRXui;
5596 if (SrcReg.isVirtual())
5597 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5598 else
5599 assert(SrcReg != AArch64::SP);
5600 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5601 Opc = AArch64::STRDui;
5602 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5604 get(AArch64::STPWi), SrcReg, isKill,
5605 AArch64::sube32, AArch64::subo32, FI, MMO);
5606 return;
5607 }
5608 break;
5609 case 16:
5610 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5611 Opc = AArch64::STRQui;
5612 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5613 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5614 Opc = AArch64::ST1Twov1d;
5615 Offset = false;
5616 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5618 get(AArch64::STPXi), SrcReg, isKill,
5619 AArch64::sube64, AArch64::subo64, FI, MMO);
5620 return;
5621 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5623 "Unexpected register store without SVE store instructions");
5624 Opc = AArch64::STR_ZXI;
5626 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5628 "Unexpected predicate store without SVE store instructions");
5629 Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5631 }
5632 break;
5633 case 24:
5634 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5635 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5636 Opc = AArch64::ST1Threev1d;
5637 Offset = false;
5638 }
5639 break;
5640 case 32:
5641 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5642 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5643 Opc = AArch64::ST1Fourv1d;
5644 Offset = false;
5645 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5646 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5647 Opc = AArch64::ST1Twov2d;
5648 Offset = false;
5649 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5651 "Unexpected register store without SVE store instructions");
5652 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5654 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5656 "Unexpected register store without SVE store instructions");
5657 Opc = AArch64::STR_ZZXI;
5659 }
5660 break;
5661 case 48:
5662 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5663 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5664 Opc = AArch64::ST1Threev2d;
5665 Offset = false;
5666 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5668 "Unexpected register store without SVE store instructions");
5669 Opc = AArch64::STR_ZZZXI;
5671 }
5672 break;
5673 case 64:
5674 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5675 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5676 Opc = AArch64::ST1Fourv2d;
5677 Offset = false;
5678 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5680 "Unexpected register store without SVE store instructions");
5681 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5683 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5685 "Unexpected register store without SVE store instructions");
5686 Opc = AArch64::STR_ZZZZXI;
5688 }
5689 break;
5690 }
5691 assert(Opc && "Unknown register class");
5692 MFI.setStackID(FI, StackID);
5693
5695 .addReg(SrcReg, getKillRegState(isKill))
5696 .addFrameIndex(FI);
5697
5698 if (Offset)
5699 MI.addImm(0);
5700 if (PNRReg.isValid())
5701 MI.addDef(PNRReg, RegState::Implicit);
5702 MI.addMemOperand(MMO);
5703}
5704
5707 MachineBasicBlock::iterator InsertBefore,
5708 const MCInstrDesc &MCID,
5709 Register DestReg, unsigned SubIdx0,
5710 unsigned SubIdx1, int FI,
5711 MachineMemOperand *MMO) {
5712 Register DestReg0 = DestReg;
5713 Register DestReg1 = DestReg;
5714 bool IsUndef = true;
5715 if (DestReg.isPhysical()) {
5716 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5717 SubIdx0 = 0;
5718 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5719 SubIdx1 = 0;
5720 IsUndef = false;
5721 }
5722 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5723 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5724 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5725 .addFrameIndex(FI)
5726 .addImm(0)
5727 .addMemOperand(MMO);
5728}
5729
5732 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5733 Register VReg, MachineInstr::MIFlag Flags) const {
5734 MachineFunction &MF = *MBB.getParent();
5735 MachineFrameInfo &MFI = MF.getFrameInfo();
5737 MachineMemOperand *MMO =
5739 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5740
5741 unsigned Opc = 0;
5742 bool Offset = true;
5743 unsigned StackID = TargetStackID::Default;
5745 switch (TRI->getSpillSize(*RC)) {
5746 case 1:
5747 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5748 Opc = AArch64::LDRBui;
5749 break;
5750 case 2: {
5751 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5752 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5753 Opc = AArch64::LDRHui;
5754 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5756 "Unexpected register load without SVE load instructions");
5757 if (IsPNR)
5758 PNRReg = DestReg;
5759 Opc = AArch64::LDR_PXI;
5761 }
5762 break;
5763 }
5764 case 4:
5765 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5766 Opc = AArch64::LDRWui;
5767 if (DestReg.isVirtual())
5768 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5769 else
5770 assert(DestReg != AArch64::WSP);
5771 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5772 Opc = AArch64::LDRSui;
5773 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5774 Opc = AArch64::LDR_PPXI;
5776 }
5777 break;
5778 case 8:
5779 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5780 Opc = AArch64::LDRXui;
5781 if (DestReg.isVirtual())
5782 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5783 else
5784 assert(DestReg != AArch64::SP);
5785 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5786 Opc = AArch64::LDRDui;
5787 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5789 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5790 AArch64::subo32, FI, MMO);
5791 return;
5792 }
5793 break;
5794 case 16:
5795 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5796 Opc = AArch64::LDRQui;
5797 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5798 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5799 Opc = AArch64::LD1Twov1d;
5800 Offset = false;
5801 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5803 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5804 AArch64::subo64, FI, MMO);
5805 return;
5806 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5808 "Unexpected register load without SVE load instructions");
5809 Opc = AArch64::LDR_ZXI;
5811 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5813 "Unexpected predicate load without SVE load instructions");
5814 Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5816 }
5817 break;
5818 case 24:
5819 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5820 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5821 Opc = AArch64::LD1Threev1d;
5822 Offset = false;
5823 }
5824 break;
5825 case 32:
5826 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5827 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5828 Opc = AArch64::LD1Fourv1d;
5829 Offset = false;
5830 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5831 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5832 Opc = AArch64::LD1Twov2d;
5833 Offset = false;
5834 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5836 "Unexpected register load without SVE load instructions");
5837 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5839 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5841 "Unexpected register load without SVE load instructions");
5842 Opc = AArch64::LDR_ZZXI;
5844 }
5845 break;
5846 case 48:
5847 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5848 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5849 Opc = AArch64::LD1Threev2d;
5850 Offset = false;
5851 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5853 "Unexpected register load without SVE load instructions");
5854 Opc = AArch64::LDR_ZZZXI;
5856 }
5857 break;
5858 case 64:
5859 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5860 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5861 Opc = AArch64::LD1Fourv2d;
5862 Offset = false;
5863 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5865 "Unexpected register load without SVE load instructions");
5866 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5868 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5870 "Unexpected register load without SVE load instructions");
5871 Opc = AArch64::LDR_ZZZZXI;
5873 }
5874 break;
5875 }
5876
5877 assert(Opc && "Unknown register class");
5878 MFI.setStackID(FI, StackID);
5879
5881 .addReg(DestReg, getDefRegState(true))
5882 .addFrameIndex(FI);
5883 if (Offset)
5884 MI.addImm(0);
5885 if (PNRReg.isValid() && !PNRReg.isVirtual())
5886 MI.addDef(PNRReg, RegState::Implicit);
5887 MI.addMemOperand(MMO);
5888}
5889
5891 const MachineInstr &UseMI,
5892 const TargetRegisterInfo *TRI) {
5893 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5894 UseMI.getIterator()),
5895 [TRI](const MachineInstr &I) {
5896 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5897 I.readsRegister(AArch64::NZCV, TRI);
5898 });
5899}
5900
5902 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5903 // The smallest scalable element supported by scaled SVE addressing
5904 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5905 // byte offset must always be a multiple of 2.
5906 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5907
5908 // VGSized offsets are divided by '2', because the VG register is the
5909 // the number of 64bit granules as opposed to 128bit vector chunks,
5910 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5911 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5912 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5913 ByteSized = Offset.getFixed();
5914 VGSized = Offset.getScalable() / 2;
5915}
5916
5917/// Returns the offset in parts to which this frame offset can be
5918/// decomposed for the purpose of describing a frame offset.
5919/// For non-scalable offsets this is simply its byte size.
5921 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5922 int64_t &NumDataVectors) {
5923 // The smallest scalable element supported by scaled SVE addressing
5924 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5925 // byte offset must always be a multiple of 2.
5926 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5927
5928 NumBytes = Offset.getFixed();
5929 NumDataVectors = 0;
5930 NumPredicateVectors = Offset.getScalable() / 2;
5931 // This method is used to get the offsets to adjust the frame offset.
5932 // If the function requires ADDPL to be used and needs more than two ADDPL
5933 // instructions, part of the offset is folded into NumDataVectors so that it
5934 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5935 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5936 NumPredicateVectors > 62) {
5937 NumDataVectors = NumPredicateVectors / 8;
5938 NumPredicateVectors -= NumDataVectors * 8;
5939 }
5940}
5941
5942// Convenience function to create a DWARF expression for: Constant `Operation`.
5943// This helper emits compact sequences for common cases. For example, for`-15
5944// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
5947 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
5948 // -Constant (1 to 31)
5949 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
5950 Operation = dwarf::DW_OP_minus;
5951 } else if (Constant >= 0 && Constant <= 31) {
5952 // Literal value 0 to 31
5953 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
5954 } else {
5955 // Signed constant
5956 Expr.push_back(dwarf::DW_OP_consts);
5957 appendLEB128<LEB128Sign::Signed>(Expr, Constant);
5958 }
5959 return Expr.push_back(Operation);
5960}
5961
5962// Convenience function to create a DWARF expression for a register.
5963static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
5964 Expr.push_back((char)dwarf::DW_OP_bregx);
5965 appendLEB128<LEB128Sign::Unsigned>(Expr, RegNum);
5966 Expr.push_back(0);
5967}
5968
5969// Convenience function to create a DWARF expression for loading a register from
5970// a CFA offset.
5972 int64_t OffsetFromDefCFA) {
5973 // This assumes the top of the DWARF stack contains the CFA.
5974 Expr.push_back(dwarf::DW_OP_dup);
5975 // Add the offset to the register.
5976 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
5977 // Dereference the address (loads a 64 bit value)..
5978 Expr.push_back(dwarf::DW_OP_deref);
5979}
5980
5981// Convenience function to create a comment for
5982// (+/-) NumBytes (* RegScale)?
5983static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
5984 StringRef RegScale = {}) {
5985 if (NumBytes) {
5986 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5987 if (!RegScale.empty())
5988 Comment << ' ' << RegScale;
5989 }
5990}
5991
5992// Creates an MCCFIInstruction:
5993// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5995 unsigned Reg,
5996 const StackOffset &Offset) {
5997 int64_t NumBytes, NumVGScaledBytes;
5999 NumVGScaledBytes);
6000 std::string CommentBuffer;
6001 llvm::raw_string_ostream Comment(CommentBuffer);
6002
6003 if (Reg == AArch64::SP)
6004 Comment << "sp";
6005 else if (Reg == AArch64::FP)
6006 Comment << "fp";
6007 else
6008 Comment << printReg(Reg, &TRI);
6009
6010 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6011 SmallString<64> Expr;
6012 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6013 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6014 // Reg + NumBytes
6015 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6016 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6017 appendOffsetComment(NumBytes, Comment);
6018 if (NumVGScaledBytes) {
6019 // + VG * NumVGScaledBytes
6020 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6021 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6022 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6023 Expr.push_back(dwarf::DW_OP_plus);
6024 }
6025
6026 // Wrap this into DW_CFA_def_cfa.
6027 SmallString<64> DefCfaExpr;
6028 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6029 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6030 DefCfaExpr.append(Expr.str());
6031 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6032 Comment.str());
6033}
6034
6036 unsigned FrameReg, unsigned Reg,
6037 const StackOffset &Offset,
6038 bool LastAdjustmentWasScalable) {
6039 if (Offset.getScalable())
6040 return createDefCFAExpression(TRI, Reg, Offset);
6041
6042 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6043 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6044
6045 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6046 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6047}
6048
6051 const StackOffset &OffsetFromDefCFA,
6052 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6053 int64_t NumBytes, NumVGScaledBytes;
6055 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6056
6057 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6058
6059 // Non-scalable offsets can use DW_CFA_offset directly.
6060 if (!NumVGScaledBytes)
6061 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6062
6063 std::string CommentBuffer;
6064 llvm::raw_string_ostream Comment(CommentBuffer);
6065 Comment << printReg(Reg, &TRI) << " @ cfa";
6066
6067 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6068 assert(NumVGScaledBytes && "Expected scalable offset");
6069 SmallString<64> OffsetExpr;
6070 // + VG * NumVGScaledBytes
6071 StringRef VGRegScale;
6072 if (IncomingVGOffsetFromDefCFA) {
6073 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6074 VGRegScale = "* IncomingVG";
6075 } else {
6076 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6077 VGRegScale = "* VG";
6078 }
6079 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6080 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6081 OffsetExpr.push_back(dwarf::DW_OP_plus);
6082 if (NumBytes) {
6083 // + NumBytes
6084 appendOffsetComment(NumBytes, Comment);
6085 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6086 }
6087
6088 // Wrap this into DW_CFA_expression
6089 SmallString<64> CfaExpr;
6090 CfaExpr.push_back(dwarf::DW_CFA_expression);
6091 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6092 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6093 CfaExpr.append(OffsetExpr.str());
6094
6095 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6096 Comment.str());
6097}
6098
6099// Helper function to emit a frame offset adjustment from a given
6100// pointer (SrcReg), stored into DestReg. This function is explicit
6101// in that it requires the opcode.
6104 const DebugLoc &DL, unsigned DestReg,
6105 unsigned SrcReg, int64_t Offset, unsigned Opc,
6106 const TargetInstrInfo *TII,
6107 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6108 bool *HasWinCFI, bool EmitCFAOffset,
6109 StackOffset CFAOffset, unsigned FrameReg) {
6110 int Sign = 1;
6111 unsigned MaxEncoding, ShiftSize;
6112 switch (Opc) {
6113 case AArch64::ADDXri:
6114 case AArch64::ADDSXri:
6115 case AArch64::SUBXri:
6116 case AArch64::SUBSXri:
6117 MaxEncoding = 0xfff;
6118 ShiftSize = 12;
6119 break;
6120 case AArch64::ADDVL_XXI:
6121 case AArch64::ADDPL_XXI:
6122 case AArch64::ADDSVL_XXI:
6123 case AArch64::ADDSPL_XXI:
6124 MaxEncoding = 31;
6125 ShiftSize = 0;
6126 if (Offset < 0) {
6127 MaxEncoding = 32;
6128 Sign = -1;
6129 Offset = -Offset;
6130 }
6131 break;
6132 default:
6133 llvm_unreachable("Unsupported opcode");
6134 }
6135
6136 // `Offset` can be in bytes or in "scalable bytes".
6137 int VScale = 1;
6138 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6139 VScale = 16;
6140 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6141 VScale = 2;
6142
6143 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6144 // scratch register. If DestReg is a virtual register, use it as the
6145 // scratch register; otherwise, create a new virtual register (to be
6146 // replaced by the scavenger at the end of PEI). That case can be optimized
6147 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6148 // register can be loaded with offset%8 and the add/sub can use an extending
6149 // instruction with LSL#3.
6150 // Currently the function handles any offsets but generates a poor sequence
6151 // of code.
6152 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6153
6154 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6155 Register TmpReg = DestReg;
6156 if (TmpReg == AArch64::XZR)
6158 &AArch64::GPR64RegClass);
6159 do {
6160 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6161 unsigned LocalShiftSize = 0;
6162 if (ThisVal > MaxEncoding) {
6163 ThisVal = ThisVal >> ShiftSize;
6164 LocalShiftSize = ShiftSize;
6165 }
6166 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6167 "Encoding cannot handle value that big");
6168
6169 Offset -= ThisVal << LocalShiftSize;
6170 if (Offset == 0)
6171 TmpReg = DestReg;
6172 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6173 .addReg(SrcReg)
6174 .addImm(Sign * (int)ThisVal);
6175 if (ShiftSize)
6176 MBI = MBI.addImm(
6178 MBI = MBI.setMIFlag(Flag);
6179
6180 auto Change =
6181 VScale == 1
6182 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6183 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6184 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6185 CFAOffset += Change;
6186 else
6187 CFAOffset -= Change;
6188 if (EmitCFAOffset && DestReg == TmpReg) {
6189 MachineFunction &MF = *MBB.getParent();
6190 const TargetSubtargetInfo &STI = MF.getSubtarget();
6191 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6192
6193 unsigned CFIIndex = MF.addFrameInst(
6194 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6195 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6196 .addCFIIndex(CFIIndex)
6197 .setMIFlags(Flag);
6198 }
6199
6200 if (NeedsWinCFI) {
6201 int Imm = (int)(ThisVal << LocalShiftSize);
6202 if (VScale != 1 && DestReg == AArch64::SP) {
6203 if (HasWinCFI)
6204 *HasWinCFI = true;
6205 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6206 .addImm(ThisVal)
6207 .setMIFlag(Flag);
6208 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6209 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6210 assert(VScale == 1 && "Expected non-scalable operation");
6211 if (HasWinCFI)
6212 *HasWinCFI = true;
6213 if (Imm == 0)
6214 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6215 else
6216 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6217 .addImm(Imm)
6218 .setMIFlag(Flag);
6219 assert(Offset == 0 && "Expected remaining offset to be zero to "
6220 "emit a single SEH directive");
6221 } else if (DestReg == AArch64::SP) {
6222 assert(VScale == 1 && "Expected non-scalable operation");
6223 if (HasWinCFI)
6224 *HasWinCFI = true;
6225 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6226 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6227 .addImm(Imm)
6228 .setMIFlag(Flag);
6229 }
6230 }
6231
6232 SrcReg = TmpReg;
6233 } while (Offset);
6234}
6235
6238 unsigned DestReg, unsigned SrcReg,
6240 MachineInstr::MIFlag Flag, bool SetNZCV,
6241 bool NeedsWinCFI, bool *HasWinCFI,
6242 bool EmitCFAOffset, StackOffset CFAOffset,
6243 unsigned FrameReg) {
6244 // If a function is marked as arm_locally_streaming, then the runtime value of
6245 // vscale in the prologue/epilogue is different the runtime value of vscale
6246 // in the function's body. To avoid having to consider multiple vscales,
6247 // we can use `addsvl` to allocate any scalable stack-slots, which under
6248 // most circumstances will be only locals, not callee-save slots.
6249 const Function &F = MBB.getParent()->getFunction();
6250 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6251
6252 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6254 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6255
6256 // First emit non-scalable frame offsets, or a simple 'mov'.
6257 if (Bytes || (!Offset && SrcReg != DestReg)) {
6258 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6259 "SP increment/decrement not 8-byte aligned");
6260 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6261 if (Bytes < 0) {
6262 Bytes = -Bytes;
6263 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6264 }
6265 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6266 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6267 FrameReg);
6268 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6269 ? StackOffset::getFixed(-Bytes)
6270 : StackOffset::getFixed(Bytes);
6271 SrcReg = DestReg;
6272 FrameReg = DestReg;
6273 }
6274
6275 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
6276 "SetNZCV not supported with SVE vectors");
6277 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6278 "WinCFI can't allocate fractions of an SVE data vector");
6279
6280 if (NumDataVectors) {
6281 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6282 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6283 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6284 FrameReg);
6285 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6286 SrcReg = DestReg;
6287 }
6288
6289 if (NumPredicateVectors) {
6290 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6291 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6292 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6293 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6294 FrameReg);
6295 }
6296}
6297
6300 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6301 LiveIntervals *LIS, VirtRegMap *VRM) const {
6302 // This is a bit of a hack. Consider this instruction:
6303 //
6304 // %0 = COPY %sp; GPR64all:%0
6305 //
6306 // We explicitly chose GPR64all for the virtual register so such a copy might
6307 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6308 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6309 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6310 //
6311 // To prevent that, we are going to constrain the %0 register class here.
6312 if (MI.isFullCopy()) {
6313 Register DstReg = MI.getOperand(0).getReg();
6314 Register SrcReg = MI.getOperand(1).getReg();
6315 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6316 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6317 return nullptr;
6318 }
6319 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6320 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6321 return nullptr;
6322 }
6323 // Nothing can folded with copy from/to NZCV.
6324 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6325 return nullptr;
6326 }
6327
6328 // Handle the case where a copy is being spilled or filled but the source
6329 // and destination register class don't match. For example:
6330 //
6331 // %0 = COPY %xzr; GPR64common:%0
6332 //
6333 // In this case we can still safely fold away the COPY and generate the
6334 // following spill code:
6335 //
6336 // STRXui %xzr, %stack.0
6337 //
6338 // This also eliminates spilled cross register class COPYs (e.g. between x and
6339 // d regs) of the same size. For example:
6340 //
6341 // %0 = COPY %1; GPR64:%0, FPR64:%1
6342 //
6343 // will be filled as
6344 //
6345 // LDRDui %0, fi<#0>
6346 //
6347 // instead of
6348 //
6349 // LDRXui %Temp, fi<#0>
6350 // %0 = FMOV %Temp
6351 //
6352 if (MI.isCopy() && Ops.size() == 1 &&
6353 // Make sure we're only folding the explicit COPY defs/uses.
6354 (Ops[0] == 0 || Ops[0] == 1)) {
6355 bool IsSpill = Ops[0] == 0;
6356 bool IsFill = !IsSpill;
6358 const MachineRegisterInfo &MRI = MF.getRegInfo();
6359 MachineBasicBlock &MBB = *MI.getParent();
6360 const MachineOperand &DstMO = MI.getOperand(0);
6361 const MachineOperand &SrcMO = MI.getOperand(1);
6362 Register DstReg = DstMO.getReg();
6363 Register SrcReg = SrcMO.getReg();
6364 // This is slightly expensive to compute for physical regs since
6365 // getMinimalPhysRegClass is slow.
6366 auto getRegClass = [&](unsigned Reg) {
6367 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6368 : TRI.getMinimalPhysRegClass(Reg);
6369 };
6370
6371 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6372 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6373 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6374 "Mismatched register size in non subreg COPY");
6375 if (IsSpill)
6376 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6377 getRegClass(SrcReg), &TRI, Register());
6378 else
6379 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6380 getRegClass(DstReg), &TRI, Register());
6381 return &*--InsertPt;
6382 }
6383
6384 // Handle cases like spilling def of:
6385 //
6386 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6387 //
6388 // where the physical register source can be widened and stored to the full
6389 // virtual reg destination stack slot, in this case producing:
6390 //
6391 // STRXui %xzr, %stack.0
6392 //
6393 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6394 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6395 assert(SrcMO.getSubReg() == 0 &&
6396 "Unexpected subreg on physical register");
6397 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6398 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6399 Register());
6400 return &*--InsertPt;
6401 }
6402
6403 // Handle cases like filling use of:
6404 //
6405 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6406 //
6407 // where we can load the full virtual reg source stack slot, into the subreg
6408 // destination, in this case producing:
6409 //
6410 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6411 //
6412 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6413 const TargetRegisterClass *FillRC = nullptr;
6414 switch (DstMO.getSubReg()) {
6415 default:
6416 break;
6417 case AArch64::sub_32:
6418 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6419 FillRC = &AArch64::GPR32RegClass;
6420 break;
6421 case AArch64::ssub:
6422 FillRC = &AArch64::FPR32RegClass;
6423 break;
6424 case AArch64::dsub:
6425 FillRC = &AArch64::FPR64RegClass;
6426 break;
6427 }
6428
6429 if (FillRC) {
6430 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6431 TRI.getRegSizeInBits(*FillRC) &&
6432 "Mismatched regclass size on folded subreg COPY");
6433 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6434 Register());
6435 MachineInstr &LoadMI = *--InsertPt;
6436 MachineOperand &LoadDst = LoadMI.getOperand(0);
6437 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6438 LoadDst.setSubReg(DstMO.getSubReg());
6439 LoadDst.setIsUndef();
6440 return &LoadMI;
6441 }
6442 }
6443 }
6444
6445 // Cannot fold.
6446 return nullptr;
6447}
6448
6450 StackOffset &SOffset,
6451 bool *OutUseUnscaledOp,
6452 unsigned *OutUnscaledOp,
6453 int64_t *EmittableOffset) {
6454 // Set output values in case of early exit.
6455 if (EmittableOffset)
6456 *EmittableOffset = 0;
6457 if (OutUseUnscaledOp)
6458 *OutUseUnscaledOp = false;
6459 if (OutUnscaledOp)
6460 *OutUnscaledOp = 0;
6461
6462 // Exit early for structured vector spills/fills as they can't take an
6463 // immediate offset.
6464 switch (MI.getOpcode()) {
6465 default:
6466 break;
6467 case AArch64::LD1Rv1d:
6468 case AArch64::LD1Rv2s:
6469 case AArch64::LD1Rv2d:
6470 case AArch64::LD1Rv4h:
6471 case AArch64::LD1Rv4s:
6472 case AArch64::LD1Rv8b:
6473 case AArch64::LD1Rv8h:
6474 case AArch64::LD1Rv16b:
6475 case AArch64::LD1Twov2d:
6476 case AArch64::LD1Threev2d:
6477 case AArch64::LD1Fourv2d:
6478 case AArch64::LD1Twov1d:
6479 case AArch64::LD1Threev1d:
6480 case AArch64::LD1Fourv1d:
6481 case AArch64::ST1Twov2d:
6482 case AArch64::ST1Threev2d:
6483 case AArch64::ST1Fourv2d:
6484 case AArch64::ST1Twov1d:
6485 case AArch64::ST1Threev1d:
6486 case AArch64::ST1Fourv1d:
6487 case AArch64::ST1i8:
6488 case AArch64::ST1i16:
6489 case AArch64::ST1i32:
6490 case AArch64::ST1i64:
6491 case AArch64::IRG:
6492 case AArch64::IRGstack:
6493 case AArch64::STGloop:
6494 case AArch64::STZGloop:
6496 }
6497
6498 // Get the min/max offset and the scale.
6499 TypeSize ScaleValue(0U, false), Width(0U, false);
6500 int64_t MinOff, MaxOff;
6501 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6502 MaxOff))
6503 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6504
6505 // Construct the complete offset.
6506 bool IsMulVL = ScaleValue.isScalable();
6507 unsigned Scale = ScaleValue.getKnownMinValue();
6508 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6509
6510 const MachineOperand &ImmOpnd =
6511 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6512 Offset += ImmOpnd.getImm() * Scale;
6513
6514 // If the offset doesn't match the scale, we rewrite the instruction to
6515 // use the unscaled instruction instead. Likewise, if we have a negative
6516 // offset and there is an unscaled op to use.
6517 std::optional<unsigned> UnscaledOp =
6519 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6520 if (useUnscaledOp &&
6521 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6522 MaxOff))
6523 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6524
6525 Scale = ScaleValue.getKnownMinValue();
6526 assert(IsMulVL == ScaleValue.isScalable() &&
6527 "Unscaled opcode has different value for scalable");
6528
6529 int64_t Remainder = Offset % Scale;
6530 assert(!(Remainder && useUnscaledOp) &&
6531 "Cannot have remainder when using unscaled op");
6532
6533 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6534 int64_t NewOffset = Offset / Scale;
6535 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6536 Offset = Remainder;
6537 else {
6538 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6539 Offset = Offset - (NewOffset * Scale);
6540 }
6541
6542 if (EmittableOffset)
6543 *EmittableOffset = NewOffset;
6544 if (OutUseUnscaledOp)
6545 *OutUseUnscaledOp = useUnscaledOp;
6546 if (OutUnscaledOp && UnscaledOp)
6547 *OutUnscaledOp = *UnscaledOp;
6548
6549 if (IsMulVL)
6550 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6551 else
6552 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6554 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6555}
6556
6558 unsigned FrameReg, StackOffset &Offset,
6559 const AArch64InstrInfo *TII) {
6560 unsigned Opcode = MI.getOpcode();
6561 unsigned ImmIdx = FrameRegIdx + 1;
6562
6563 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6564 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6565 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6566 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6567 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6568 MI.eraseFromParent();
6569 Offset = StackOffset();
6570 return true;
6571 }
6572
6573 int64_t NewOffset;
6574 unsigned UnscaledOp;
6575 bool UseUnscaledOp;
6576 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6577 &UnscaledOp, &NewOffset);
6580 // Replace the FrameIndex with FrameReg.
6581 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6582 if (UseUnscaledOp)
6583 MI.setDesc(TII->get(UnscaledOp));
6584
6585 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6586 return !Offset;
6587 }
6588
6589 return false;
6590}
6591
6594 DebugLoc DL;
6595 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
6596}
6597
6599 return MCInstBuilder(AArch64::HINT).addImm(0);
6600}
6601
6602// AArch64 supports MachineCombiner.
6603bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6604
6605// True when Opc sets flag
6606static bool isCombineInstrSettingFlag(unsigned Opc) {
6607 switch (Opc) {
6608 case AArch64::ADDSWrr:
6609 case AArch64::ADDSWri:
6610 case AArch64::ADDSXrr:
6611 case AArch64::ADDSXri:
6612 case AArch64::SUBSWrr:
6613 case AArch64::SUBSXrr:
6614 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6615 case AArch64::SUBSWri:
6616 case AArch64::SUBSXri:
6617 return true;
6618 default:
6619 break;
6620 }
6621 return false;
6622}
6623
6624// 32b Opcodes that can be combined with a MUL
6625static bool isCombineInstrCandidate32(unsigned Opc) {
6626 switch (Opc) {
6627 case AArch64::ADDWrr:
6628 case AArch64::ADDWri:
6629 case AArch64::SUBWrr:
6630 case AArch64::ADDSWrr:
6631 case AArch64::ADDSWri:
6632 case AArch64::SUBSWrr:
6633 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6634 case AArch64::SUBWri:
6635 case AArch64::SUBSWri:
6636 return true;
6637 default:
6638 break;
6639 }
6640 return false;
6641}
6642
6643// 64b Opcodes that can be combined with a MUL
6644static bool isCombineInstrCandidate64(unsigned Opc) {
6645 switch (Opc) {
6646 case AArch64::ADDXrr:
6647 case AArch64::ADDXri:
6648 case AArch64::SUBXrr:
6649 case AArch64::ADDSXrr:
6650 case AArch64::ADDSXri:
6651 case AArch64::SUBSXrr:
6652 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6653 case AArch64::SUBXri:
6654 case AArch64::SUBSXri:
6655 case AArch64::ADDv8i8:
6656 case AArch64::ADDv16i8:
6657 case AArch64::ADDv4i16:
6658 case AArch64::ADDv8i16:
6659 case AArch64::ADDv2i32:
6660 case AArch64::ADDv4i32:
6661 case AArch64::SUBv8i8:
6662 case AArch64::SUBv16i8:
6663 case AArch64::SUBv4i16:
6664 case AArch64::SUBv8i16:
6665 case AArch64::SUBv2i32:
6666 case AArch64::SUBv4i32:
6667 return true;
6668 default:
6669 break;
6670 }
6671 return false;
6672}
6673
6674// FP Opcodes that can be combined with a FMUL.
6675static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6676 switch (Inst.getOpcode()) {
6677 default:
6678 break;
6679 case AArch64::FADDHrr:
6680 case AArch64::FADDSrr:
6681 case AArch64::FADDDrr:
6682 case AArch64::FADDv4f16:
6683 case AArch64::FADDv8f16:
6684 case AArch64::FADDv2f32:
6685 case AArch64::FADDv2f64:
6686 case AArch64::FADDv4f32:
6687 case AArch64::FSUBHrr:
6688 case AArch64::FSUBSrr:
6689 case AArch64::FSUBDrr:
6690 case AArch64::FSUBv4f16:
6691 case AArch64::FSUBv8f16:
6692 case AArch64::FSUBv2f32:
6693 case AArch64::FSUBv2f64:
6694 case AArch64::FSUBv4f32:
6696 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6697 // the target options or if FADD/FSUB has the contract fast-math flag.
6698 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6700 }
6701 return false;
6702}
6703
6704// Opcodes that can be combined with a MUL
6705static bool isCombineInstrCandidate(unsigned Opc) {
6707}
6708
6709//
6710// Utility routine that checks if \param MO is defined by an
6711// \param CombineOpc instruction in the basic block \param MBB
6713 unsigned CombineOpc, unsigned ZeroReg = 0,
6714 bool CheckZeroReg = false) {
6716 MachineInstr *MI = nullptr;
6717
6718 if (MO.isReg() && MO.getReg().isVirtual())
6719 MI = MRI.getUniqueVRegDef(MO.getReg());
6720 // And it needs to be in the trace (otherwise, it won't have a depth).
6721 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
6722 return false;
6723 // Must only used by the user we combine with.
6724 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6725 return false;
6726
6727 if (CheckZeroReg) {
6728 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6729 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6730 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6731 // The third input reg must be zero.
6732 if (MI->getOperand(3).getReg() != ZeroReg)
6733 return false;
6734 }
6735
6736 if (isCombineInstrSettingFlag(CombineOpc) &&
6737 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6738 return false;
6739
6740 return true;
6741}
6742
6743//
6744// Is \param MO defined by an integer multiply and can be combined?
6746 unsigned MulOpc, unsigned ZeroReg) {
6747 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6748}
6749
6750//
6751// Is \param MO defined by a floating-point multiply and can be combined?
6753 unsigned MulOpc) {
6754 return canCombine(MBB, MO, MulOpc);
6755}
6756
6757// TODO: There are many more machine instruction opcodes to match:
6758// 1. Other data types (integer, vectors)
6759// 2. Other math / logic operations (xor, or)
6760// 3. Other forms of the same operation (intrinsics and other variants)
6762 bool Invert) const {
6763 if (Invert)
6764 return false;
6765 switch (Inst.getOpcode()) {
6766 // == Floating-point types ==
6767 // -- Floating-point instructions --
6768 case AArch64::FADDHrr:
6769 case AArch64::FADDSrr:
6770 case AArch64::FADDDrr:
6771 case AArch64::FMULHrr:
6772 case AArch64::FMULSrr:
6773 case AArch64::FMULDrr:
6774 case AArch64::FMULX16:
6775 case AArch64::FMULX32:
6776 case AArch64::FMULX64:
6777 // -- Advanced SIMD instructions --
6778 case AArch64::FADDv4f16:
6779 case AArch64::FADDv8f16:
6780 case AArch64::FADDv2f32:
6781 case AArch64::FADDv4f32:
6782 case AArch64::FADDv2f64:
6783 case AArch64::FMULv4f16:
6784 case AArch64::FMULv8f16:
6785 case AArch64::FMULv2f32:
6786 case AArch64::FMULv4f32:
6787 case AArch64::FMULv2f64:
6788 case AArch64::FMULXv4f16:
6789 case AArch64::FMULXv8f16:
6790 case AArch64::FMULXv2f32:
6791 case AArch64::FMULXv4f32:
6792 case AArch64::FMULXv2f64:
6793 // -- SVE instructions --
6794 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6795 // in the SVE instruction set (though there are predicated ones).
6796 case AArch64::FADD_ZZZ_H:
6797 case AArch64::FADD_ZZZ_S:
6798 case AArch64::FADD_ZZZ_D:
6799 case AArch64::FMUL_ZZZ_H:
6800 case AArch64::FMUL_ZZZ_S:
6801 case AArch64::FMUL_ZZZ_D:
6804
6805 // == Integer types ==
6806 // -- Base instructions --
6807 // Opcodes MULWrr and MULXrr don't exist because
6808 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6809 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6810 // The machine-combiner does not support three-source-operands machine
6811 // instruction. So we cannot reassociate MULs.
6812 case AArch64::ADDWrr:
6813 case AArch64::ADDXrr:
6814 case AArch64::ANDWrr:
6815 case AArch64::ANDXrr:
6816 case AArch64::ORRWrr:
6817 case AArch64::ORRXrr:
6818 case AArch64::EORWrr:
6819 case AArch64::EORXrr:
6820 case AArch64::EONWrr:
6821 case AArch64::EONXrr:
6822 // -- Advanced SIMD instructions --
6823 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6824 // in the Advanced SIMD instruction set.
6825 case AArch64::ADDv8i8:
6826 case AArch64::ADDv16i8:
6827 case AArch64::ADDv4i16:
6828 case AArch64::ADDv8i16:
6829 case AArch64::ADDv2i32:
6830 case AArch64::ADDv4i32:
6831 case AArch64::ADDv1i64:
6832 case AArch64::ADDv2i64:
6833 case AArch64::MULv8i8:
6834 case AArch64::MULv16i8:
6835 case AArch64::MULv4i16:
6836 case AArch64::MULv8i16:
6837 case AArch64::MULv2i32:
6838 case AArch64::MULv4i32:
6839 case AArch64::ANDv8i8:
6840 case AArch64::ANDv16i8:
6841 case AArch64::ORRv8i8:
6842 case AArch64::ORRv16i8:
6843 case AArch64::EORv8i8:
6844 case AArch64::EORv16i8:
6845 // -- SVE instructions --
6846 case AArch64::ADD_ZZZ_B:
6847 case AArch64::ADD_ZZZ_H:
6848 case AArch64::ADD_ZZZ_S:
6849 case AArch64::ADD_ZZZ_D:
6850 case AArch64::MUL_ZZZ_B:
6851 case AArch64::MUL_ZZZ_H:
6852 case AArch64::MUL_ZZZ_S:
6853 case AArch64::MUL_ZZZ_D:
6854 case AArch64::AND_ZZZ:
6855 case AArch64::ORR_ZZZ:
6856 case AArch64::EOR_ZZZ:
6857 return true;
6858
6859 default:
6860 return false;
6861 }
6862}
6863
6864/// Find instructions that can be turned into madd.
6866 SmallVectorImpl<unsigned> &Patterns) {
6867 unsigned Opc = Root.getOpcode();
6868 MachineBasicBlock &MBB = *Root.getParent();
6869 bool Found = false;
6870
6872 return false;
6874 int Cmp_NZCV =
6875 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6876 // When NZCV is live bail out.
6877 if (Cmp_NZCV == -1)
6878 return false;
6879 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6880 // When opcode can't change bail out.
6881 // CHECKME: do we miss any cases for opcode conversion?
6882 if (NewOpc == Opc)
6883 return false;
6884 Opc = NewOpc;
6885 }
6886
6887 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6888 unsigned Pattern) {
6889 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6890 Patterns.push_back(Pattern);
6891 Found = true;
6892 }
6893 };
6894
6895 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6896 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6897 Patterns.push_back(Pattern);
6898 Found = true;
6899 }
6900 };
6901
6903
6904 switch (Opc) {
6905 default:
6906 break;
6907 case AArch64::ADDWrr:
6908 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6909 "ADDWrr does not have register operands");
6910 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6911 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6912 break;
6913 case AArch64::ADDXrr:
6914 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6915 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6916 break;
6917 case AArch64::SUBWrr:
6918 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6919 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6920 break;
6921 case AArch64::SUBXrr:
6922 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6923 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6924 break;
6925 case AArch64::ADDWri:
6926 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6927 break;
6928 case AArch64::ADDXri:
6929 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6930 break;
6931 case AArch64::SUBWri:
6932 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6933 break;
6934 case AArch64::SUBXri:
6935 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6936 break;
6937 case AArch64::ADDv8i8:
6938 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6939 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6940 break;
6941 case AArch64::ADDv16i8:
6942 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6943 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6944 break;
6945 case AArch64::ADDv4i16:
6946 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6947 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6948 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6949 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6950 break;
6951 case AArch64::ADDv8i16:
6952 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6953 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6954 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6955 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6956 break;
6957 case AArch64::ADDv2i32:
6958 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6959 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6960 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6961 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6962 break;
6963 case AArch64::ADDv4i32:
6964 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6965 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6966 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6967 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6968 break;
6969 case AArch64::SUBv8i8:
6970 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6971 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6972 break;
6973 case AArch64::SUBv16i8:
6974 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6975 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6976 break;
6977 case AArch64::SUBv4i16:
6978 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6979 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6980 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6981 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6982 break;
6983 case AArch64::SUBv8i16:
6984 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6985 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6986 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6987 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6988 break;
6989 case AArch64::SUBv2i32:
6990 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6991 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6992 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6993 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6994 break;
6995 case AArch64::SUBv4i32:
6996 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6997 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6998 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6999 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7000 break;
7001 }
7002 return Found;
7003}
7004
7005bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7006 switch (Opcode) {
7007 default:
7008 break;
7009 case AArch64::UABALB_ZZZ_D:
7010 case AArch64::UABALB_ZZZ_H:
7011 case AArch64::UABALB_ZZZ_S:
7012 case AArch64::UABALT_ZZZ_D:
7013 case AArch64::UABALT_ZZZ_H:
7014 case AArch64::UABALT_ZZZ_S:
7015 case AArch64::SABALB_ZZZ_D:
7016 case AArch64::SABALB_ZZZ_S:
7017 case AArch64::SABALB_ZZZ_H:
7018 case AArch64::SABALT_ZZZ_D:
7019 case AArch64::SABALT_ZZZ_S:
7020 case AArch64::SABALT_ZZZ_H:
7021 case AArch64::UABALv16i8_v8i16:
7022 case AArch64::UABALv2i32_v2i64:
7023 case AArch64::UABALv4i16_v4i32:
7024 case AArch64::UABALv4i32_v2i64:
7025 case AArch64::UABALv8i16_v4i32:
7026 case AArch64::UABALv8i8_v8i16:
7027 case AArch64::UABAv16i8:
7028 case AArch64::UABAv2i32:
7029 case AArch64::UABAv4i16:
7030 case AArch64::UABAv4i32:
7031 case AArch64::UABAv8i16:
7032 case AArch64::UABAv8i8:
7033 case AArch64::SABALv16i8_v8i16:
7034 case AArch64::SABALv2i32_v2i64:
7035 case AArch64::SABALv4i16_v4i32:
7036 case AArch64::SABALv4i32_v2i64:
7037 case AArch64::SABALv8i16_v4i32:
7038 case AArch64::SABALv8i8_v8i16:
7039 case AArch64::SABAv16i8:
7040 case AArch64::SABAv2i32:
7041 case AArch64::SABAv4i16:
7042 case AArch64::SABAv4i32:
7043 case AArch64::SABAv8i16:
7044 case AArch64::SABAv8i8:
7045 return true;
7046 }
7047
7048 return false;
7049}
7050
7052 unsigned AccumulationOpcode) const {
7053 switch (AccumulationOpcode) {
7054 default:
7055 llvm_unreachable("Unsupported accumulation Opcode!");
7056 case AArch64::UABALB_ZZZ_D:
7057 return AArch64::UABDLB_ZZZ_D;
7058 case AArch64::UABALB_ZZZ_H:
7059 return AArch64::UABDLB_ZZZ_H;
7060 case AArch64::UABALB_ZZZ_S:
7061 return AArch64::UABDLB_ZZZ_S;
7062 case AArch64::UABALT_ZZZ_D:
7063 return AArch64::UABDLT_ZZZ_D;
7064 case AArch64::UABALT_ZZZ_H:
7065 return AArch64::UABDLT_ZZZ_H;
7066 case AArch64::UABALT_ZZZ_S:
7067 return AArch64::UABDLT_ZZZ_S;
7068 case AArch64::UABALv16i8_v8i16:
7069 return AArch64::UABDLv16i8_v8i16;
7070 case AArch64::UABALv2i32_v2i64:
7071 return AArch64::UABDLv2i32_v2i64;
7072 case AArch64::UABALv4i16_v4i32:
7073 return AArch64::UABDLv4i16_v4i32;
7074 case AArch64::UABALv4i32_v2i64:
7075 return AArch64::UABDLv4i32_v2i64;
7076 case AArch64::UABALv8i16_v4i32:
7077 return AArch64::UABDLv8i16_v4i32;
7078 case AArch64::UABALv8i8_v8i16:
7079 return AArch64::UABDLv8i8_v8i16;
7080 case AArch64::UABAv16i8:
7081 return AArch64::UABDv16i8;
7082 case AArch64::UABAv2i32:
7083 return AArch64::UABDv2i32;
7084 case AArch64::UABAv4i16:
7085 return AArch64::UABDv4i16;
7086 case AArch64::UABAv4i32:
7087 return AArch64::UABDv4i32;
7088 case AArch64::UABAv8i16:
7089 return AArch64::UABDv8i16;
7090 case AArch64::UABAv8i8:
7091 return AArch64::UABDv8i8;
7092 case AArch64::SABALB_ZZZ_D:
7093 return AArch64::SABDLB_ZZZ_D;
7094 case AArch64::SABALB_ZZZ_S:
7095 return AArch64::SABDLB_ZZZ_S;
7096 case AArch64::SABALB_ZZZ_H:
7097 return AArch64::SABDLB_ZZZ_H;
7098 case AArch64::SABALT_ZZZ_D:
7099 return AArch64::SABDLT_ZZZ_D;
7100 case AArch64::SABALT_ZZZ_S:
7101 return AArch64::SABDLT_ZZZ_S;
7102 case AArch64::SABALT_ZZZ_H:
7103 return AArch64::SABDLT_ZZZ_H;
7104 case AArch64::SABALv16i8_v8i16:
7105 return AArch64::SABDLv16i8_v8i16;
7106 case AArch64::SABALv2i32_v2i64:
7107 return AArch64::SABDLv2i32_v2i64;
7108 case AArch64::SABALv4i16_v4i32:
7109 return AArch64::SABDLv4i16_v4i32;
7110 case AArch64::SABALv4i32_v2i64:
7111 return AArch64::SABDLv4i32_v2i64;
7112 case AArch64::SABALv8i16_v4i32:
7113 return AArch64::SABDLv8i16_v4i32;
7114 case AArch64::SABALv8i8_v8i16:
7115 return AArch64::SABDLv8i8_v8i16;
7116 case AArch64::SABAv16i8:
7117 return AArch64::SABDv16i8;
7118 case AArch64::SABAv2i32:
7119 return AArch64::SABAv2i32;
7120 case AArch64::SABAv4i16:
7121 return AArch64::SABDv4i16;
7122 case AArch64::SABAv4i32:
7123 return AArch64::SABDv4i32;
7124 case AArch64::SABAv8i16:
7125 return AArch64::SABDv8i16;
7126 case AArch64::SABAv8i8:
7127 return AArch64::SABDv8i8;
7128 }
7129}
7130
7131/// Floating-Point Support
7132
7133/// Find instructions that can be turned into madd.
7135 SmallVectorImpl<unsigned> &Patterns) {
7136
7137 if (!isCombineInstrCandidateFP(Root))
7138 return false;
7139
7140 MachineBasicBlock &MBB = *Root.getParent();
7141 bool Found = false;
7142
7143 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7144 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7145 Patterns.push_back(Pattern);
7146 return true;
7147 }
7148 return false;
7149 };
7150
7152
7153 switch (Root.getOpcode()) {
7154 default:
7155 assert(false && "Unsupported FP instruction in combiner\n");
7156 break;
7157 case AArch64::FADDHrr:
7158 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7159 "FADDHrr does not have register operands");
7160
7161 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7162 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7163 break;
7164 case AArch64::FADDSrr:
7165 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7166 "FADDSrr does not have register operands");
7167
7168 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7169 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7170
7171 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7172 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7173 break;
7174 case AArch64::FADDDrr:
7175 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7176 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7177
7178 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7179 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7180 break;
7181 case AArch64::FADDv4f16:
7182 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7183 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7184
7185 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7186 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7187 break;
7188 case AArch64::FADDv8f16:
7189 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7190 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7191
7192 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7193 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7194 break;
7195 case AArch64::FADDv2f32:
7196 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7197 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7198
7199 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7200 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7201 break;
7202 case AArch64::FADDv2f64:
7203 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7204 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7205
7206 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7207 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7208 break;
7209 case AArch64::FADDv4f32:
7210 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7211 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7212
7213 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7214 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7215 break;
7216 case AArch64::FSUBHrr:
7217 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7218 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7219 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7220 break;
7221 case AArch64::FSUBSrr:
7222 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7223
7224 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7225 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7226
7227 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7228 break;
7229 case AArch64::FSUBDrr:
7230 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7231
7232 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7233 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7234
7235 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7236 break;
7237 case AArch64::FSUBv4f16:
7238 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7239 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7240
7241 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7242 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7243 break;
7244 case AArch64::FSUBv8f16:
7245 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7246 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7247
7248 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7249 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7250 break;
7251 case AArch64::FSUBv2f32:
7252 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7253 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7254
7255 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7256 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7257 break;
7258 case AArch64::FSUBv2f64:
7259 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7260 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7261
7262 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7263 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7264 break;
7265 case AArch64::FSUBv4f32:
7266 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7267 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7268
7269 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7270 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7271 break;
7272 }
7273 return Found;
7274}
7275
7277 SmallVectorImpl<unsigned> &Patterns) {
7278 MachineBasicBlock &MBB = *Root.getParent();
7279 bool Found = false;
7280
7281 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7283 MachineOperand &MO = Root.getOperand(Operand);
7284 MachineInstr *MI = nullptr;
7285 if (MO.isReg() && MO.getReg().isVirtual())
7286 MI = MRI.getUniqueVRegDef(MO.getReg());
7287 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7288 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7289 MI->getOperand(1).getReg().isVirtual())
7290 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7291 if (MI && MI->getOpcode() == Opcode) {
7292 Patterns.push_back(Pattern);
7293 return true;
7294 }
7295 return false;
7296 };
7297
7299
7300 switch (Root.getOpcode()) {
7301 default:
7302 return false;
7303 case AArch64::FMULv2f32:
7304 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7305 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7306 break;
7307 case AArch64::FMULv2f64:
7308 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7309 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7310 break;
7311 case AArch64::FMULv4f16:
7312 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7313 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7314 break;
7315 case AArch64::FMULv4f32:
7316 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7317 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7318 break;
7319 case AArch64::FMULv8f16:
7320 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7321 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7322 break;
7323 }
7324
7325 return Found;
7326}
7327
7329 SmallVectorImpl<unsigned> &Patterns) {
7330 unsigned Opc = Root.getOpcode();
7331 MachineBasicBlock &MBB = *Root.getParent();
7333
7334 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7335 MachineOperand &MO = Root.getOperand(1);
7336 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7337 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7338 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7342 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7343 Patterns.push_back(Pattern);
7344 return true;
7345 }
7346 return false;
7347 };
7348
7349 switch (Opc) {
7350 default:
7351 break;
7352 case AArch64::FNEGDr:
7353 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7354 case AArch64::FNEGSr:
7355 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7356 }
7357
7358 return false;
7359}
7360
7361/// Return true when a code sequence can improve throughput. It
7362/// should be called only for instructions in loops.
7363/// \param Pattern - combiner pattern
7365 switch (Pattern) {
7366 default:
7367 break;
7473 return true;
7474 } // end switch (Pattern)
7475 return false;
7476}
7477
7478/// Find other MI combine patterns.
7480 SmallVectorImpl<unsigned> &Patterns) {
7481 // A - (B + C) ==> (A - B) - C or (A - C) - B
7482 unsigned Opc = Root.getOpcode();
7483 MachineBasicBlock &MBB = *Root.getParent();
7484
7485 switch (Opc) {
7486 case AArch64::SUBWrr:
7487 case AArch64::SUBSWrr:
7488 case AArch64::SUBXrr:
7489 case AArch64::SUBSXrr:
7490 // Found candidate root.
7491 break;
7492 default:
7493 return false;
7494 }
7495
7497 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7498 -1)
7499 return false;
7500
7501 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7502 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7503 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7504 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7507 return true;
7508 }
7509
7510 return false;
7511}
7512
7513/// Check if the given instruction forms a gather load pattern that can be
7514/// optimized for better Memory-Level Parallelism (MLP). This function
7515/// identifies chains of NEON lane load instructions that load data from
7516/// different memory addresses into individual lanes of a 128-bit vector
7517/// register, then attempts to split the pattern into parallel loads to break
7518/// the serial dependency between instructions.
7519///
7520/// Pattern Matched:
7521/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7522/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7523///
7524/// Transformed Into:
7525/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7526/// to combine the results, enabling better memory-level parallelism.
7527///
7528/// Supported Element Types:
7529/// - 32-bit elements (LD1i32, 4 lanes total)
7530/// - 16-bit elements (LD1i16, 8 lanes total)
7531/// - 8-bit elements (LD1i8, 16 lanes total)
7533 SmallVectorImpl<unsigned> &Patterns,
7534 unsigned LoadLaneOpCode, unsigned NumLanes) {
7535 const MachineFunction *MF = Root.getMF();
7536
7537 // Early exit if optimizing for size.
7538 if (MF->getFunction().hasMinSize())
7539 return false;
7540
7541 const MachineRegisterInfo &MRI = MF->getRegInfo();
7543
7544 // The root of the pattern must load into the last lane of the vector.
7545 if (Root.getOperand(2).getImm() != NumLanes - 1)
7546 return false;
7547
7548 // Check that we have load into all lanes except lane 0.
7549 // For each load we also want to check that:
7550 // 1. It has a single non-debug use (since we will be replacing the virtual
7551 // register)
7552 // 2. That the addressing mode only uses a single pointer operand
7553 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7554 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7555 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7557 while (!RemainingLanes.empty() && CurrInstr &&
7558 CurrInstr->getOpcode() == LoadLaneOpCode &&
7559 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7560 CurrInstr->getNumOperands() == 4) {
7561 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7562 LoadInstrs.push_back(CurrInstr);
7563 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7564 }
7565
7566 // Check that we have found a match for lanes N-1.. 1.
7567 if (!RemainingLanes.empty())
7568 return false;
7569
7570 // Match the SUBREG_TO_REG sequence.
7571 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7572 return false;
7573
7574 // Verify that the subreg to reg loads an integer into the first lane.
7575 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7576 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7577 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7578 return false;
7579
7580 // Verify that it also has a single non debug use.
7581 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7582 return false;
7583
7584 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7585
7586 // If there is any chance of aliasing, do not apply the pattern.
7587 // Walk backward through the MBB starting from Root.
7588 // Exit early if we've encountered all load instructions or hit the search
7589 // limit.
7590 auto MBBItr = Root.getIterator();
7591 unsigned RemainingSteps = GatherOptSearchLimit;
7592 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7593 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7594 const MachineBasicBlock *MBB = Root.getParent();
7595
7596 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7597 !RemainingLoadInstrs.empty();
7598 --MBBItr, --RemainingSteps) {
7599 const MachineInstr &CurrInstr = *MBBItr;
7600
7601 // Remove this instruction from remaining loads if it's one we're tracking.
7602 RemainingLoadInstrs.erase(&CurrInstr);
7603
7604 // Check for potential aliasing with any of the load instructions to
7605 // optimize.
7606 if (CurrInstr.isLoadFoldBarrier())
7607 return false;
7608 }
7609
7610 // If we hit the search limit without finding all load instructions,
7611 // don't match the pattern.
7612 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7613 return false;
7614
7615 switch (NumLanes) {
7616 case 4:
7618 break;
7619 case 8:
7621 break;
7622 case 16:
7624 break;
7625 default:
7626 llvm_unreachable("Got bad number of lanes for gather pattern.");
7627 }
7628
7629 return true;
7630}
7631
7632/// Search for patterns of LD instructions we can optimize.
7634 SmallVectorImpl<unsigned> &Patterns) {
7635
7636 // The pattern searches for loads into single lanes.
7637 switch (Root.getOpcode()) {
7638 case AArch64::LD1i32:
7639 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7640 case AArch64::LD1i16:
7641 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7642 case AArch64::LD1i8:
7643 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7644 default:
7645 return false;
7646 }
7647}
7648
7649/// Generate optimized instruction sequence for gather load patterns to improve
7650/// Memory-Level Parallelism (MLP). This function transforms a chain of
7651/// sequential NEON lane loads into parallel vector loads that can execute
7652/// concurrently.
7653static void
7657 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7658 unsigned Pattern, unsigned NumLanes) {
7659 MachineFunction &MF = *Root.getParent()->getParent();
7662
7663 // Gather the initial load instructions to build the pattern.
7664 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7665 MachineInstr *CurrInstr = &Root;
7666 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7667 LoadToLaneInstrs.push_back(CurrInstr);
7668 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7669 }
7670
7671 // Sort the load instructions according to the lane.
7672 llvm::sort(LoadToLaneInstrs,
7673 [](const MachineInstr *A, const MachineInstr *B) {
7674 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7675 });
7676
7677 MachineInstr *SubregToReg = CurrInstr;
7678 LoadToLaneInstrs.push_back(
7679 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7680 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7681
7682 const TargetRegisterClass *FPR128RegClass =
7683 MRI.getRegClass(Root.getOperand(0).getReg());
7684
7685 // Helper lambda to create a LD1 instruction.
7686 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7687 Register SrcRegister, unsigned Lane,
7688 Register OffsetRegister,
7689 bool OffsetRegisterKillState) {
7690 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7691 MachineInstrBuilder LoadIndexIntoRegister =
7692 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7693 NewRegister)
7694 .addReg(SrcRegister)
7695 .addImm(Lane)
7696 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7697 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7698 InsInstrs.push_back(LoadIndexIntoRegister);
7699 return NewRegister;
7700 };
7701
7702 // Helper to create load instruction based on the NumLanes in the NEON
7703 // register we are rewriting.
7704 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7705 Register OffsetReg,
7706 bool KillState) -> MachineInstrBuilder {
7707 unsigned Opcode;
7708 switch (NumLanes) {
7709 case 4:
7710 Opcode = AArch64::LDRSui;
7711 break;
7712 case 8:
7713 Opcode = AArch64::LDRHui;
7714 break;
7715 case 16:
7716 Opcode = AArch64::LDRBui;
7717 break;
7718 default:
7720 "Got unsupported number of lanes in machine-combiner gather pattern");
7721 }
7722 // Immediate offset load
7723 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7724 .addReg(OffsetReg)
7725 .addImm(0);
7726 };
7727
7728 // Load the remaining lanes into register 0.
7729 auto LanesToLoadToReg0 =
7730 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7731 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7732 Register PrevReg = SubregToReg->getOperand(0).getReg();
7733 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7734 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7735 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7736 OffsetRegOperand.getReg(),
7737 OffsetRegOperand.isKill());
7738 DelInstrs.push_back(LoadInstr);
7739 }
7740 Register LastLoadReg0 = PrevReg;
7741
7742 // First load into register 1. Perform an integer load to zero out the upper
7743 // lanes in a single instruction.
7744 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7745 MachineInstr *OriginalSplitLoad =
7746 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7747 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7748 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7749
7750 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7751 OriginalSplitLoad->getOperand(3);
7752 MachineInstrBuilder MiddleIndexLoadInstr =
7753 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7754 OriginalSplitToLoadOffsetOperand.getReg(),
7755 OriginalSplitToLoadOffsetOperand.isKill());
7756
7757 InstrIdxForVirtReg.insert(
7758 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7759 InsInstrs.push_back(MiddleIndexLoadInstr);
7760 DelInstrs.push_back(OriginalSplitLoad);
7761
7762 // Subreg To Reg instruction for register 1.
7763 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7764 unsigned SubregType;
7765 switch (NumLanes) {
7766 case 4:
7767 SubregType = AArch64::ssub;
7768 break;
7769 case 8:
7770 SubregType = AArch64::hsub;
7771 break;
7772 case 16:
7773 SubregType = AArch64::bsub;
7774 break;
7775 default:
7777 "Got invalid NumLanes for machine-combiner gather pattern");
7778 }
7779
7780 auto SubRegToRegInstr =
7781 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7782 DestRegForSubregToReg)
7783 .addImm(0)
7784 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7785 .addImm(SubregType);
7786 InstrIdxForVirtReg.insert(
7787 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7788 InsInstrs.push_back(SubRegToRegInstr);
7789
7790 // Load remaining lanes into register 1.
7791 auto LanesToLoadToReg1 =
7792 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7793 LoadToLaneInstrsAscending.end());
7794 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7795 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7796 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7797 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7798 OffsetRegOperand.getReg(),
7799 OffsetRegOperand.isKill());
7800
7801 // Do not add the last reg to DelInstrs - it will be removed later.
7802 if (Index == NumLanes / 2 - 2) {
7803 break;
7804 }
7805 DelInstrs.push_back(LoadInstr);
7806 }
7807 Register LastLoadReg1 = PrevReg;
7808
7809 // Create the final zip instruction to combine the results.
7810 MachineInstrBuilder ZipInstr =
7811 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7812 Root.getOperand(0).getReg())
7813 .addReg(LastLoadReg0)
7814 .addReg(LastLoadReg1);
7815 InsInstrs.push_back(ZipInstr);
7816}
7817
7820 switch (Pattern) {
7827 default:
7829 }
7830}
7831
7832/// Return true when there is potentially a faster code sequence for an
7833/// instruction chain ending in \p Root. All potential patterns are listed in
7834/// the \p Pattern vector. Pattern should be sorted in priority order since the
7835/// pattern evaluator stops checking as soon as it finds a faster sequence.
7836
7838 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7839 bool DoRegPressureReduce) const {
7840 // Integer patterns
7841 if (getMaddPatterns(Root, Patterns))
7842 return true;
7843 // Floating point patterns
7844 if (getFMULPatterns(Root, Patterns))
7845 return true;
7846 if (getFMAPatterns(Root, Patterns))
7847 return true;
7848 if (getFNEGPatterns(Root, Patterns))
7849 return true;
7850
7851 // Other patterns
7852 if (getMiscPatterns(Root, Patterns))
7853 return true;
7854
7855 // Load patterns
7856 if (getLoadPatterns(Root, Patterns))
7857 return true;
7858
7859 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7860 DoRegPressureReduce);
7861}
7862
7864/// genFusedMultiply - Generate fused multiply instructions.
7865/// This function supports both integer and floating point instructions.
7866/// A typical example:
7867/// F|MUL I=A,B,0
7868/// F|ADD R,I,C
7869/// ==> F|MADD R,A,B,C
7870/// \param MF Containing MachineFunction
7871/// \param MRI Register information
7872/// \param TII Target information
7873/// \param Root is the F|ADD instruction
7874/// \param [out] InsInstrs is a vector of machine instructions and will
7875/// contain the generated madd instruction
7876/// \param IdxMulOpd is index of operand in Root that is the result of
7877/// the F|MUL. In the example above IdxMulOpd is 1.
7878/// \param MaddOpc the opcode fo the f|madd instruction
7879/// \param RC Register class of operands
7880/// \param kind of fma instruction (addressing mode) to be generated
7881/// \param ReplacedAddend is the result register from the instruction
7882/// replacing the non-combined operand, if any.
7883static MachineInstr *
7885 const TargetInstrInfo *TII, MachineInstr &Root,
7886 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7887 unsigned MaddOpc, const TargetRegisterClass *RC,
7888 FMAInstKind kind = FMAInstKind::Default,
7889 const Register *ReplacedAddend = nullptr) {
7890 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7891
7892 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7893 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7894 Register ResultReg = Root.getOperand(0).getReg();
7895 Register SrcReg0 = MUL->getOperand(1).getReg();
7896 bool Src0IsKill = MUL->getOperand(1).isKill();
7897 Register SrcReg1 = MUL->getOperand(2).getReg();
7898 bool Src1IsKill = MUL->getOperand(2).isKill();
7899
7900 Register SrcReg2;
7901 bool Src2IsKill;
7902 if (ReplacedAddend) {
7903 // If we just generated a new addend, we must be it's only use.
7904 SrcReg2 = *ReplacedAddend;
7905 Src2IsKill = true;
7906 } else {
7907 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7908 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7909 }
7910
7911 if (ResultReg.isVirtual())
7912 MRI.constrainRegClass(ResultReg, RC);
7913 if (SrcReg0.isVirtual())
7914 MRI.constrainRegClass(SrcReg0, RC);
7915 if (SrcReg1.isVirtual())
7916 MRI.constrainRegClass(SrcReg1, RC);
7917 if (SrcReg2.isVirtual())
7918 MRI.constrainRegClass(SrcReg2, RC);
7919
7921 if (kind == FMAInstKind::Default)
7922 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7923 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7924 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7925 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7926 else if (kind == FMAInstKind::Indexed)
7927 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7928 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7929 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7930 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7931 .addImm(MUL->getOperand(3).getImm());
7932 else if (kind == FMAInstKind::Accumulator)
7933 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7934 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7935 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7936 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7937 else
7938 assert(false && "Invalid FMA instruction kind \n");
7939 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7940 InsInstrs.push_back(MIB);
7941 return MUL;
7942}
7943
7944static MachineInstr *
7946 const TargetInstrInfo *TII, MachineInstr &Root,
7948 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7949
7950 unsigned Opc = 0;
7951 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7952 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7953 Opc = AArch64::FNMADDSrrr;
7954 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7955 Opc = AArch64::FNMADDDrrr;
7956 else
7957 return nullptr;
7958
7959 Register ResultReg = Root.getOperand(0).getReg();
7960 Register SrcReg0 = MAD->getOperand(1).getReg();
7961 Register SrcReg1 = MAD->getOperand(2).getReg();
7962 Register SrcReg2 = MAD->getOperand(3).getReg();
7963 bool Src0IsKill = MAD->getOperand(1).isKill();
7964 bool Src1IsKill = MAD->getOperand(2).isKill();
7965 bool Src2IsKill = MAD->getOperand(3).isKill();
7966 if (ResultReg.isVirtual())
7967 MRI.constrainRegClass(ResultReg, RC);
7968 if (SrcReg0.isVirtual())
7969 MRI.constrainRegClass(SrcReg0, RC);
7970 if (SrcReg1.isVirtual())
7971 MRI.constrainRegClass(SrcReg1, RC);
7972 if (SrcReg2.isVirtual())
7973 MRI.constrainRegClass(SrcReg2, RC);
7974
7976 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
7977 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7978 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7979 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7980 InsInstrs.push_back(MIB);
7981
7982 return MAD;
7983}
7984
7985/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7986static MachineInstr *
7989 unsigned IdxDupOp, unsigned MulOpc,
7991 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
7992 "Invalid index of FMUL operand");
7993
7994 MachineFunction &MF = *Root.getMF();
7996
7997 MachineInstr *Dup =
7998 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
7999
8000 if (Dup->getOpcode() == TargetOpcode::COPY)
8001 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8002
8003 Register DupSrcReg = Dup->getOperand(1).getReg();
8004 MRI.clearKillFlags(DupSrcReg);
8005 MRI.constrainRegClass(DupSrcReg, RC);
8006
8007 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8008
8009 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8010 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8011
8012 Register ResultReg = Root.getOperand(0).getReg();
8013
8015 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8016 .add(MulOp)
8017 .addReg(DupSrcReg)
8018 .addImm(DupSrcLane);
8019
8020 InsInstrs.push_back(MIB);
8021 return &Root;
8022}
8023
8024/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8025/// instructions.
8026///
8027/// \see genFusedMultiply
8031 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8032 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8033 FMAInstKind::Accumulator);
8034}
8035
8036/// genNeg - Helper to generate an intermediate negation of the second operand
8037/// of Root
8039 const TargetInstrInfo *TII, MachineInstr &Root,
8041 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8042 unsigned MnegOpc, const TargetRegisterClass *RC) {
8043 Register NewVR = MRI.createVirtualRegister(RC);
8045 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8046 .add(Root.getOperand(2));
8047 InsInstrs.push_back(MIB);
8048
8049 assert(InstrIdxForVirtReg.empty());
8050 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8051
8052 return NewVR;
8053}
8054
8055/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8056/// instructions with an additional negation of the accumulator
8060 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8061 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8062 assert(IdxMulOpd == 1);
8063
8064 Register NewVR =
8065 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8066 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8067 FMAInstKind::Accumulator, &NewVR);
8068}
8069
8070/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8071/// instructions.
8072///
8073/// \see genFusedMultiply
8077 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8078 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8079 FMAInstKind::Indexed);
8080}
8081
8082/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8083/// instructions with an additional negation of the accumulator
8087 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8088 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8089 assert(IdxMulOpd == 1);
8090
8091 Register NewVR =
8092 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8093
8094 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8095 FMAInstKind::Indexed, &NewVR);
8096}
8097
8098/// genMaddR - Generate madd instruction and combine mul and add using
8099/// an extra virtual register
8100/// Example - an ADD intermediate needs to be stored in a register:
8101/// MUL I=A,B,0
8102/// ADD R,I,Imm
8103/// ==> ORR V, ZR, Imm
8104/// ==> MADD R,A,B,V
8105/// \param MF Containing MachineFunction
8106/// \param MRI Register information
8107/// \param TII Target information
8108/// \param Root is the ADD instruction
8109/// \param [out] InsInstrs is a vector of machine instructions and will
8110/// contain the generated madd instruction
8111/// \param IdxMulOpd is index of operand in Root that is the result of
8112/// the MUL. In the example above IdxMulOpd is 1.
8113/// \param MaddOpc the opcode fo the madd instruction
8114/// \param VR is a virtual register that holds the value of an ADD operand
8115/// (V in the example above).
8116/// \param RC Register class of operands
8118 const TargetInstrInfo *TII, MachineInstr &Root,
8120 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8121 const TargetRegisterClass *RC) {
8122 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8123
8124 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8125 Register ResultReg = Root.getOperand(0).getReg();
8126 Register SrcReg0 = MUL->getOperand(1).getReg();
8127 bool Src0IsKill = MUL->getOperand(1).isKill();
8128 Register SrcReg1 = MUL->getOperand(2).getReg();
8129 bool Src1IsKill = MUL->getOperand(2).isKill();
8130
8131 if (ResultReg.isVirtual())
8132 MRI.constrainRegClass(ResultReg, RC);
8133 if (SrcReg0.isVirtual())
8134 MRI.constrainRegClass(SrcReg0, RC);
8135 if (SrcReg1.isVirtual())
8136 MRI.constrainRegClass(SrcReg1, RC);
8138 MRI.constrainRegClass(VR, RC);
8139
8141 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8142 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8143 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8144 .addReg(VR);
8145 // Insert the MADD
8146 InsInstrs.push_back(MIB);
8147 return MUL;
8148}
8149
8150/// Do the following transformation
8151/// A - (B + C) ==> (A - B) - C
8152/// A - (B + C) ==> (A - C) - B
8154 const TargetInstrInfo *TII, MachineInstr &Root,
8157 unsigned IdxOpd1,
8158 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8159 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8160 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8161 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8162
8163 Register ResultReg = Root.getOperand(0).getReg();
8164 Register RegA = Root.getOperand(1).getReg();
8165 bool RegAIsKill = Root.getOperand(1).isKill();
8166 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8167 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8168 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8169 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8170 Register NewVR =
8171 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8172
8173 unsigned Opcode = Root.getOpcode();
8174 if (Opcode == AArch64::SUBSWrr)
8175 Opcode = AArch64::SUBWrr;
8176 else if (Opcode == AArch64::SUBSXrr)
8177 Opcode = AArch64::SUBXrr;
8178 else
8179 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8180 "Unexpected instruction opcode.");
8181
8182 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8183 Flags &= ~MachineInstr::NoSWrap;
8184 Flags &= ~MachineInstr::NoUWrap;
8185
8186 MachineInstrBuilder MIB1 =
8187 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8188 .addReg(RegA, getKillRegState(RegAIsKill))
8189 .addReg(RegB, getKillRegState(RegBIsKill))
8190 .setMIFlags(Flags);
8191 MachineInstrBuilder MIB2 =
8192 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8193 .addReg(NewVR, getKillRegState(true))
8194 .addReg(RegC, getKillRegState(RegCIsKill))
8195 .setMIFlags(Flags);
8196
8197 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8198 InsInstrs.push_back(MIB1);
8199 InsInstrs.push_back(MIB2);
8200 DelInstrs.push_back(AddMI);
8201 DelInstrs.push_back(&Root);
8202}
8203
8205 unsigned int AccumulatorOpCode) const {
8206 switch (AccumulatorOpCode) {
8207 case AArch64::UABALB_ZZZ_D:
8208 case AArch64::SABALB_ZZZ_D:
8209 case AArch64::UABALT_ZZZ_D:
8210 case AArch64::SABALT_ZZZ_D:
8211 return AArch64::ADD_ZZZ_D;
8212 case AArch64::UABALB_ZZZ_H:
8213 case AArch64::SABALB_ZZZ_H:
8214 case AArch64::UABALT_ZZZ_H:
8215 case AArch64::SABALT_ZZZ_H:
8216 return AArch64::ADD_ZZZ_H;
8217 case AArch64::UABALB_ZZZ_S:
8218 case AArch64::SABALB_ZZZ_S:
8219 case AArch64::UABALT_ZZZ_S:
8220 case AArch64::SABALT_ZZZ_S:
8221 return AArch64::ADD_ZZZ_S;
8222 case AArch64::UABALv16i8_v8i16:
8223 case AArch64::SABALv8i8_v8i16:
8224 case AArch64::SABAv8i16:
8225 case AArch64::UABAv8i16:
8226 return AArch64::ADDv8i16;
8227 case AArch64::SABALv2i32_v2i64:
8228 case AArch64::UABALv2i32_v2i64:
8229 case AArch64::SABALv4i32_v2i64:
8230 return AArch64::ADDv2i64;
8231 case AArch64::UABALv4i16_v4i32:
8232 case AArch64::SABALv4i16_v4i32:
8233 case AArch64::SABALv8i16_v4i32:
8234 case AArch64::SABAv4i32:
8235 case AArch64::UABAv4i32:
8236 return AArch64::ADDv4i32;
8237 case AArch64::UABALv4i32_v2i64:
8238 return AArch64::ADDv2i64;
8239 case AArch64::UABALv8i16_v4i32:
8240 return AArch64::ADDv4i32;
8241 case AArch64::UABALv8i8_v8i16:
8242 case AArch64::SABALv16i8_v8i16:
8243 return AArch64::ADDv8i16;
8244 case AArch64::UABAv16i8:
8245 case AArch64::SABAv16i8:
8246 return AArch64::ADDv16i8;
8247 case AArch64::UABAv4i16:
8248 case AArch64::SABAv4i16:
8249 return AArch64::ADDv4i16;
8250 case AArch64::UABAv2i32:
8251 case AArch64::SABAv2i32:
8252 return AArch64::ADDv2i32;
8253 case AArch64::UABAv8i8:
8254 case AArch64::SABAv8i8:
8255 return AArch64::ADDv8i8;
8256 default:
8257 llvm_unreachable("Unknown accumulator opcode");
8258 }
8259}
8260
8261/// When getMachineCombinerPatterns() finds potential patterns,
8262/// this function generates the instructions that could replace the
8263/// original code sequence
8265 MachineInstr &Root, unsigned Pattern,
8268 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8269 MachineBasicBlock &MBB = *Root.getParent();
8271 MachineFunction &MF = *MBB.getParent();
8273
8274 MachineInstr *MUL = nullptr;
8275 const TargetRegisterClass *RC;
8276 unsigned Opc;
8277 switch (Pattern) {
8278 default:
8279 // Reassociate instructions.
8281 DelInstrs, InstrIdxForVirtReg);
8282 return;
8284 // A - (B + C)
8285 // ==> (A - B) - C
8286 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8287 InstrIdxForVirtReg);
8288 return;
8290 // A - (B + C)
8291 // ==> (A - C) - B
8292 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8293 InstrIdxForVirtReg);
8294 return;
8297 // MUL I=A,B,0
8298 // ADD R,I,C
8299 // ==> MADD R,A,B,C
8300 // --- Create(MADD);
8302 Opc = AArch64::MADDWrrr;
8303 RC = &AArch64::GPR32RegClass;
8304 } else {
8305 Opc = AArch64::MADDXrrr;
8306 RC = &AArch64::GPR64RegClass;
8307 }
8308 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8309 break;
8312 // MUL I=A,B,0
8313 // ADD R,C,I
8314 // ==> MADD R,A,B,C
8315 // --- Create(MADD);
8317 Opc = AArch64::MADDWrrr;
8318 RC = &AArch64::GPR32RegClass;
8319 } else {
8320 Opc = AArch64::MADDXrrr;
8321 RC = &AArch64::GPR64RegClass;
8322 }
8323 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8324 break;
8329 // MUL I=A,B,0
8330 // ADD/SUB R,I,Imm
8331 // ==> MOV V, Imm/-Imm
8332 // ==> MADD R,A,B,V
8333 // --- Create(MADD);
8334 const TargetRegisterClass *RC;
8335 unsigned BitSize, MovImm;
8338 MovImm = AArch64::MOVi32imm;
8339 RC = &AArch64::GPR32spRegClass;
8340 BitSize = 32;
8341 Opc = AArch64::MADDWrrr;
8342 RC = &AArch64::GPR32RegClass;
8343 } else {
8344 MovImm = AArch64::MOVi64imm;
8345 RC = &AArch64::GPR64spRegClass;
8346 BitSize = 64;
8347 Opc = AArch64::MADDXrrr;
8348 RC = &AArch64::GPR64RegClass;
8349 }
8350 Register NewVR = MRI.createVirtualRegister(RC);
8351 uint64_t Imm = Root.getOperand(2).getImm();
8352
8353 if (Root.getOperand(3).isImm()) {
8354 unsigned Val = Root.getOperand(3).getImm();
8355 Imm = Imm << Val;
8356 }
8359 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8360 // Check that the immediate can be composed via a single instruction.
8362 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8363 if (Insn.size() != 1)
8364 return;
8365 MachineInstrBuilder MIB1 =
8366 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8367 .addImm(IsSub ? -Imm : Imm);
8368 InsInstrs.push_back(MIB1);
8369 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8370 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8371 break;
8372 }
8375 // MUL I=A,B,0
8376 // SUB R,I, C
8377 // ==> SUB V, 0, C
8378 // ==> MADD R,A,B,V // = -C + A*B
8379 // --- Create(MADD);
8380 const TargetRegisterClass *SubRC;
8381 unsigned SubOpc, ZeroReg;
8383 SubOpc = AArch64::SUBWrr;
8384 SubRC = &AArch64::GPR32spRegClass;
8385 ZeroReg = AArch64::WZR;
8386 Opc = AArch64::MADDWrrr;
8387 RC = &AArch64::GPR32RegClass;
8388 } else {
8389 SubOpc = AArch64::SUBXrr;
8390 SubRC = &AArch64::GPR64spRegClass;
8391 ZeroReg = AArch64::XZR;
8392 Opc = AArch64::MADDXrrr;
8393 RC = &AArch64::GPR64RegClass;
8394 }
8395 Register NewVR = MRI.createVirtualRegister(SubRC);
8396 // SUB NewVR, 0, C
8397 MachineInstrBuilder MIB1 =
8398 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8399 .addReg(ZeroReg)
8400 .add(Root.getOperand(2));
8401 InsInstrs.push_back(MIB1);
8402 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8403 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8404 break;
8405 }
8408 // MUL I=A,B,0
8409 // SUB R,C,I
8410 // ==> MSUB R,A,B,C (computes C - A*B)
8411 // --- Create(MSUB);
8413 Opc = AArch64::MSUBWrrr;
8414 RC = &AArch64::GPR32RegClass;
8415 } else {
8416 Opc = AArch64::MSUBXrrr;
8417 RC = &AArch64::GPR64RegClass;
8418 }
8419 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8420 break;
8422 Opc = AArch64::MLAv8i8;
8423 RC = &AArch64::FPR64RegClass;
8424 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8425 break;
8427 Opc = AArch64::MLAv8i8;
8428 RC = &AArch64::FPR64RegClass;
8429 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8430 break;
8432 Opc = AArch64::MLAv16i8;
8433 RC = &AArch64::FPR128RegClass;
8434 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8435 break;
8437 Opc = AArch64::MLAv16i8;
8438 RC = &AArch64::FPR128RegClass;
8439 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8440 break;
8442 Opc = AArch64::MLAv4i16;
8443 RC = &AArch64::FPR64RegClass;
8444 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8445 break;
8447 Opc = AArch64::MLAv4i16;
8448 RC = &AArch64::FPR64RegClass;
8449 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8450 break;
8452 Opc = AArch64::MLAv8i16;
8453 RC = &AArch64::FPR128RegClass;
8454 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8455 break;
8457 Opc = AArch64::MLAv8i16;
8458 RC = &AArch64::FPR128RegClass;
8459 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8460 break;
8462 Opc = AArch64::MLAv2i32;
8463 RC = &AArch64::FPR64RegClass;
8464 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8465 break;
8467 Opc = AArch64::MLAv2i32;
8468 RC = &AArch64::FPR64RegClass;
8469 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8470 break;
8472 Opc = AArch64::MLAv4i32;
8473 RC = &AArch64::FPR128RegClass;
8474 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8475 break;
8477 Opc = AArch64::MLAv4i32;
8478 RC = &AArch64::FPR128RegClass;
8479 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8480 break;
8481
8483 Opc = AArch64::MLAv8i8;
8484 RC = &AArch64::FPR64RegClass;
8485 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8486 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8487 RC);
8488 break;
8490 Opc = AArch64::MLSv8i8;
8491 RC = &AArch64::FPR64RegClass;
8492 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8493 break;
8495 Opc = AArch64::MLAv16i8;
8496 RC = &AArch64::FPR128RegClass;
8497 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8498 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8499 RC);
8500 break;
8502 Opc = AArch64::MLSv16i8;
8503 RC = &AArch64::FPR128RegClass;
8504 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8505 break;
8507 Opc = AArch64::MLAv4i16;
8508 RC = &AArch64::FPR64RegClass;
8509 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8510 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8511 RC);
8512 break;
8514 Opc = AArch64::MLSv4i16;
8515 RC = &AArch64::FPR64RegClass;
8516 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8517 break;
8519 Opc = AArch64::MLAv8i16;
8520 RC = &AArch64::FPR128RegClass;
8521 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8522 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8523 RC);
8524 break;
8526 Opc = AArch64::MLSv8i16;
8527 RC = &AArch64::FPR128RegClass;
8528 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8529 break;
8531 Opc = AArch64::MLAv2i32;
8532 RC = &AArch64::FPR64RegClass;
8533 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8534 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8535 RC);
8536 break;
8538 Opc = AArch64::MLSv2i32;
8539 RC = &AArch64::FPR64RegClass;
8540 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8541 break;
8543 Opc = AArch64::MLAv4i32;
8544 RC = &AArch64::FPR128RegClass;
8545 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8546 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8547 RC);
8548 break;
8550 Opc = AArch64::MLSv4i32;
8551 RC = &AArch64::FPR128RegClass;
8552 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8553 break;
8554
8556 Opc = AArch64::MLAv4i16_indexed;
8557 RC = &AArch64::FPR64RegClass;
8558 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8559 break;
8561 Opc = AArch64::MLAv4i16_indexed;
8562 RC = &AArch64::FPR64RegClass;
8563 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8564 break;
8566 Opc = AArch64::MLAv8i16_indexed;
8567 RC = &AArch64::FPR128RegClass;
8568 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8569 break;
8571 Opc = AArch64::MLAv8i16_indexed;
8572 RC = &AArch64::FPR128RegClass;
8573 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8574 break;
8576 Opc = AArch64::MLAv2i32_indexed;
8577 RC = &AArch64::FPR64RegClass;
8578 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8579 break;
8581 Opc = AArch64::MLAv2i32_indexed;
8582 RC = &AArch64::FPR64RegClass;
8583 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8584 break;
8586 Opc = AArch64::MLAv4i32_indexed;
8587 RC = &AArch64::FPR128RegClass;
8588 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8589 break;
8591 Opc = AArch64::MLAv4i32_indexed;
8592 RC = &AArch64::FPR128RegClass;
8593 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8594 break;
8595
8597 Opc = AArch64::MLAv4i16_indexed;
8598 RC = &AArch64::FPR64RegClass;
8599 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8600 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8601 RC);
8602 break;
8604 Opc = AArch64::MLSv4i16_indexed;
8605 RC = &AArch64::FPR64RegClass;
8606 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8607 break;
8609 Opc = AArch64::MLAv8i16_indexed;
8610 RC = &AArch64::FPR128RegClass;
8611 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8612 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8613 RC);
8614 break;
8616 Opc = AArch64::MLSv8i16_indexed;
8617 RC = &AArch64::FPR128RegClass;
8618 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8619 break;
8621 Opc = AArch64::MLAv2i32_indexed;
8622 RC = &AArch64::FPR64RegClass;
8623 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8624 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8625 RC);
8626 break;
8628 Opc = AArch64::MLSv2i32_indexed;
8629 RC = &AArch64::FPR64RegClass;
8630 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8631 break;
8633 Opc = AArch64::MLAv4i32_indexed;
8634 RC = &AArch64::FPR128RegClass;
8635 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8636 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8637 RC);
8638 break;
8640 Opc = AArch64::MLSv4i32_indexed;
8641 RC = &AArch64::FPR128RegClass;
8642 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8643 break;
8644
8645 // Floating Point Support
8647 Opc = AArch64::FMADDHrrr;
8648 RC = &AArch64::FPR16RegClass;
8649 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8650 break;
8652 Opc = AArch64::FMADDSrrr;
8653 RC = &AArch64::FPR32RegClass;
8654 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8655 break;
8657 Opc = AArch64::FMADDDrrr;
8658 RC = &AArch64::FPR64RegClass;
8659 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8660 break;
8661
8663 Opc = AArch64::FMADDHrrr;
8664 RC = &AArch64::FPR16RegClass;
8665 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8666 break;
8668 Opc = AArch64::FMADDSrrr;
8669 RC = &AArch64::FPR32RegClass;
8670 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8671 break;
8673 Opc = AArch64::FMADDDrrr;
8674 RC = &AArch64::FPR64RegClass;
8675 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8676 break;
8677
8679 Opc = AArch64::FMLAv1i32_indexed;
8680 RC = &AArch64::FPR32RegClass;
8681 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8682 FMAInstKind::Indexed);
8683 break;
8685 Opc = AArch64::FMLAv1i32_indexed;
8686 RC = &AArch64::FPR32RegClass;
8687 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8688 FMAInstKind::Indexed);
8689 break;
8690
8692 Opc = AArch64::FMLAv1i64_indexed;
8693 RC = &AArch64::FPR64RegClass;
8694 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8695 FMAInstKind::Indexed);
8696 break;
8698 Opc = AArch64::FMLAv1i64_indexed;
8699 RC = &AArch64::FPR64RegClass;
8700 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8701 FMAInstKind::Indexed);
8702 break;
8703
8705 RC = &AArch64::FPR64RegClass;
8706 Opc = AArch64::FMLAv4i16_indexed;
8707 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8708 FMAInstKind::Indexed);
8709 break;
8711 RC = &AArch64::FPR64RegClass;
8712 Opc = AArch64::FMLAv4f16;
8713 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8714 FMAInstKind::Accumulator);
8715 break;
8717 RC = &AArch64::FPR64RegClass;
8718 Opc = AArch64::FMLAv4i16_indexed;
8719 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8720 FMAInstKind::Indexed);
8721 break;
8723 RC = &AArch64::FPR64RegClass;
8724 Opc = AArch64::FMLAv4f16;
8725 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8726 FMAInstKind::Accumulator);
8727 break;
8728
8731 RC = &AArch64::FPR64RegClass;
8733 Opc = AArch64::FMLAv2i32_indexed;
8734 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8735 FMAInstKind::Indexed);
8736 } else {
8737 Opc = AArch64::FMLAv2f32;
8738 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8739 FMAInstKind::Accumulator);
8740 }
8741 break;
8744 RC = &AArch64::FPR64RegClass;
8746 Opc = AArch64::FMLAv2i32_indexed;
8747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8748 FMAInstKind::Indexed);
8749 } else {
8750 Opc = AArch64::FMLAv2f32;
8751 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8752 FMAInstKind::Accumulator);
8753 }
8754 break;
8755
8757 RC = &AArch64::FPR128RegClass;
8758 Opc = AArch64::FMLAv8i16_indexed;
8759 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8760 FMAInstKind::Indexed);
8761 break;
8763 RC = &AArch64::FPR128RegClass;
8764 Opc = AArch64::FMLAv8f16;
8765 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8766 FMAInstKind::Accumulator);
8767 break;
8769 RC = &AArch64::FPR128RegClass;
8770 Opc = AArch64::FMLAv8i16_indexed;
8771 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8772 FMAInstKind::Indexed);
8773 break;
8775 RC = &AArch64::FPR128RegClass;
8776 Opc = AArch64::FMLAv8f16;
8777 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8778 FMAInstKind::Accumulator);
8779 break;
8780
8783 RC = &AArch64::FPR128RegClass;
8785 Opc = AArch64::FMLAv2i64_indexed;
8786 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8787 FMAInstKind::Indexed);
8788 } else {
8789 Opc = AArch64::FMLAv2f64;
8790 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8791 FMAInstKind::Accumulator);
8792 }
8793 break;
8796 RC = &AArch64::FPR128RegClass;
8798 Opc = AArch64::FMLAv2i64_indexed;
8799 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8800 FMAInstKind::Indexed);
8801 } else {
8802 Opc = AArch64::FMLAv2f64;
8803 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8804 FMAInstKind::Accumulator);
8805 }
8806 break;
8807
8810 RC = &AArch64::FPR128RegClass;
8812 Opc = AArch64::FMLAv4i32_indexed;
8813 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8814 FMAInstKind::Indexed);
8815 } else {
8816 Opc = AArch64::FMLAv4f32;
8817 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8818 FMAInstKind::Accumulator);
8819 }
8820 break;
8821
8824 RC = &AArch64::FPR128RegClass;
8826 Opc = AArch64::FMLAv4i32_indexed;
8827 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8828 FMAInstKind::Indexed);
8829 } else {
8830 Opc = AArch64::FMLAv4f32;
8831 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8832 FMAInstKind::Accumulator);
8833 }
8834 break;
8835
8837 Opc = AArch64::FNMSUBHrrr;
8838 RC = &AArch64::FPR16RegClass;
8839 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8840 break;
8842 Opc = AArch64::FNMSUBSrrr;
8843 RC = &AArch64::FPR32RegClass;
8844 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8845 break;
8847 Opc = AArch64::FNMSUBDrrr;
8848 RC = &AArch64::FPR64RegClass;
8849 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8850 break;
8851
8853 Opc = AArch64::FNMADDHrrr;
8854 RC = &AArch64::FPR16RegClass;
8855 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8856 break;
8858 Opc = AArch64::FNMADDSrrr;
8859 RC = &AArch64::FPR32RegClass;
8860 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8861 break;
8863 Opc = AArch64::FNMADDDrrr;
8864 RC = &AArch64::FPR64RegClass;
8865 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8866 break;
8867
8869 Opc = AArch64::FMSUBHrrr;
8870 RC = &AArch64::FPR16RegClass;
8871 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8872 break;
8874 Opc = AArch64::FMSUBSrrr;
8875 RC = &AArch64::FPR32RegClass;
8876 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8877 break;
8879 Opc = AArch64::FMSUBDrrr;
8880 RC = &AArch64::FPR64RegClass;
8881 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8882 break;
8883
8885 Opc = AArch64::FMLSv1i32_indexed;
8886 RC = &AArch64::FPR32RegClass;
8887 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8888 FMAInstKind::Indexed);
8889 break;
8890
8892 Opc = AArch64::FMLSv1i64_indexed;
8893 RC = &AArch64::FPR64RegClass;
8894 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8895 FMAInstKind::Indexed);
8896 break;
8897
8900 RC = &AArch64::FPR64RegClass;
8901 Register NewVR = MRI.createVirtualRegister(RC);
8902 MachineInstrBuilder MIB1 =
8903 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8904 .add(Root.getOperand(2));
8905 InsInstrs.push_back(MIB1);
8906 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8908 Opc = AArch64::FMLAv4f16;
8909 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8910 FMAInstKind::Accumulator, &NewVR);
8911 } else {
8912 Opc = AArch64::FMLAv4i16_indexed;
8913 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8914 FMAInstKind::Indexed, &NewVR);
8915 }
8916 break;
8917 }
8919 RC = &AArch64::FPR64RegClass;
8920 Opc = AArch64::FMLSv4f16;
8921 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8922 FMAInstKind::Accumulator);
8923 break;
8925 RC = &AArch64::FPR64RegClass;
8926 Opc = AArch64::FMLSv4i16_indexed;
8927 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8928 FMAInstKind::Indexed);
8929 break;
8930
8933 RC = &AArch64::FPR64RegClass;
8935 Opc = AArch64::FMLSv2i32_indexed;
8936 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8937 FMAInstKind::Indexed);
8938 } else {
8939 Opc = AArch64::FMLSv2f32;
8940 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8941 FMAInstKind::Accumulator);
8942 }
8943 break;
8944
8947 RC = &AArch64::FPR128RegClass;
8948 Register NewVR = MRI.createVirtualRegister(RC);
8949 MachineInstrBuilder MIB1 =
8950 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8951 .add(Root.getOperand(2));
8952 InsInstrs.push_back(MIB1);
8953 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8955 Opc = AArch64::FMLAv8f16;
8956 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8957 FMAInstKind::Accumulator, &NewVR);
8958 } else {
8959 Opc = AArch64::FMLAv8i16_indexed;
8960 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8961 FMAInstKind::Indexed, &NewVR);
8962 }
8963 break;
8964 }
8966 RC = &AArch64::FPR128RegClass;
8967 Opc = AArch64::FMLSv8f16;
8968 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8969 FMAInstKind::Accumulator);
8970 break;
8972 RC = &AArch64::FPR128RegClass;
8973 Opc = AArch64::FMLSv8i16_indexed;
8974 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8975 FMAInstKind::Indexed);
8976 break;
8977
8980 RC = &AArch64::FPR128RegClass;
8982 Opc = AArch64::FMLSv2i64_indexed;
8983 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8984 FMAInstKind::Indexed);
8985 } else {
8986 Opc = AArch64::FMLSv2f64;
8987 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8988 FMAInstKind::Accumulator);
8989 }
8990 break;
8991
8994 RC = &AArch64::FPR128RegClass;
8996 Opc = AArch64::FMLSv4i32_indexed;
8997 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8998 FMAInstKind::Indexed);
8999 } else {
9000 Opc = AArch64::FMLSv4f32;
9001 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9002 FMAInstKind::Accumulator);
9003 }
9004 break;
9007 RC = &AArch64::FPR64RegClass;
9008 Register NewVR = MRI.createVirtualRegister(RC);
9009 MachineInstrBuilder MIB1 =
9010 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9011 .add(Root.getOperand(2));
9012 InsInstrs.push_back(MIB1);
9013 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9015 Opc = AArch64::FMLAv2i32_indexed;
9016 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9017 FMAInstKind::Indexed, &NewVR);
9018 } else {
9019 Opc = AArch64::FMLAv2f32;
9020 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9021 FMAInstKind::Accumulator, &NewVR);
9022 }
9023 break;
9024 }
9027 RC = &AArch64::FPR128RegClass;
9028 Register NewVR = MRI.createVirtualRegister(RC);
9029 MachineInstrBuilder MIB1 =
9030 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9031 .add(Root.getOperand(2));
9032 InsInstrs.push_back(MIB1);
9033 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9035 Opc = AArch64::FMLAv4i32_indexed;
9036 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9037 FMAInstKind::Indexed, &NewVR);
9038 } else {
9039 Opc = AArch64::FMLAv4f32;
9040 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9041 FMAInstKind::Accumulator, &NewVR);
9042 }
9043 break;
9044 }
9047 RC = &AArch64::FPR128RegClass;
9048 Register NewVR = MRI.createVirtualRegister(RC);
9049 MachineInstrBuilder MIB1 =
9050 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9051 .add(Root.getOperand(2));
9052 InsInstrs.push_back(MIB1);
9053 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9055 Opc = AArch64::FMLAv2i64_indexed;
9056 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9057 FMAInstKind::Indexed, &NewVR);
9058 } else {
9059 Opc = AArch64::FMLAv2f64;
9060 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9061 FMAInstKind::Accumulator, &NewVR);
9062 }
9063 break;
9064 }
9067 unsigned IdxDupOp =
9069 : 2;
9070 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9071 &AArch64::FPR128RegClass, MRI);
9072 break;
9073 }
9076 unsigned IdxDupOp =
9078 : 2;
9079 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9080 &AArch64::FPR128RegClass, MRI);
9081 break;
9082 }
9085 unsigned IdxDupOp =
9087 : 2;
9088 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9089 &AArch64::FPR128_loRegClass, MRI);
9090 break;
9091 }
9094 unsigned IdxDupOp =
9096 : 2;
9097 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9098 &AArch64::FPR128RegClass, MRI);
9099 break;
9100 }
9103 unsigned IdxDupOp =
9105 : 2;
9106 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9107 &AArch64::FPR128_loRegClass, MRI);
9108 break;
9109 }
9111 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9112 break;
9113 }
9115 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9116 Pattern, 4);
9117 break;
9118 }
9120 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9121 Pattern, 8);
9122 break;
9123 }
9125 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9126 Pattern, 16);
9127 break;
9128 }
9129
9130 } // end switch (Pattern)
9131 // Record MUL and ADD/SUB for deletion
9132 if (MUL)
9133 DelInstrs.push_back(MUL);
9134 DelInstrs.push_back(&Root);
9135
9136 // Set the flags on the inserted instructions to be the merged flags of the
9137 // instructions that we have combined.
9138 uint32_t Flags = Root.getFlags();
9139 if (MUL)
9140 Flags = Root.mergeFlagsWith(*MUL);
9141 for (auto *MI : InsInstrs)
9142 MI->setFlags(Flags);
9143}
9144
9145/// Replace csincr-branch sequence by simple conditional branch
9146///
9147/// Examples:
9148/// 1. \code
9149/// csinc w9, wzr, wzr, <condition code>
9150/// tbnz w9, #0, 0x44
9151/// \endcode
9152/// to
9153/// \code
9154/// b.<inverted condition code>
9155/// \endcode
9156///
9157/// 2. \code
9158/// csinc w9, wzr, wzr, <condition code>
9159/// tbz w9, #0, 0x44
9160/// \endcode
9161/// to
9162/// \code
9163/// b.<condition code>
9164/// \endcode
9165///
9166/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9167/// compare's constant operand is power of 2.
9168///
9169/// Examples:
9170/// \code
9171/// and w8, w8, #0x400
9172/// cbnz w8, L1
9173/// \endcode
9174/// to
9175/// \code
9176/// tbnz w8, #10, L1
9177/// \endcode
9178///
9179/// \param MI Conditional Branch
9180/// \return True when the simple conditional branch is generated
9181///
9183 bool IsNegativeBranch = false;
9184 bool IsTestAndBranch = false;
9185 unsigned TargetBBInMI = 0;
9186 switch (MI.getOpcode()) {
9187 default:
9188 llvm_unreachable("Unknown branch instruction?");
9189 case AArch64::Bcc:
9190 case AArch64::CBWPri:
9191 case AArch64::CBXPri:
9192 case AArch64::CBWPrr:
9193 case AArch64::CBXPrr:
9194 return false;
9195 case AArch64::CBZW:
9196 case AArch64::CBZX:
9197 TargetBBInMI = 1;
9198 break;
9199 case AArch64::CBNZW:
9200 case AArch64::CBNZX:
9201 TargetBBInMI = 1;
9202 IsNegativeBranch = true;
9203 break;
9204 case AArch64::TBZW:
9205 case AArch64::TBZX:
9206 TargetBBInMI = 2;
9207 IsTestAndBranch = true;
9208 break;
9209 case AArch64::TBNZW:
9210 case AArch64::TBNZX:
9211 TargetBBInMI = 2;
9212 IsNegativeBranch = true;
9213 IsTestAndBranch = true;
9214 break;
9215 }
9216 // So we increment a zero register and test for bits other
9217 // than bit 0? Conservatively bail out in case the verifier
9218 // missed this case.
9219 if (IsTestAndBranch && MI.getOperand(1).getImm())
9220 return false;
9221
9222 // Find Definition.
9223 assert(MI.getParent() && "Incomplete machine instruction\n");
9224 MachineBasicBlock *MBB = MI.getParent();
9225 MachineFunction *MF = MBB->getParent();
9227 Register VReg = MI.getOperand(0).getReg();
9228 if (!VReg.isVirtual())
9229 return false;
9230
9231 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9232
9233 // Look through COPY instructions to find definition.
9234 while (DefMI->isCopy()) {
9235 Register CopyVReg = DefMI->getOperand(1).getReg();
9236 if (!MRI->hasOneNonDBGUse(CopyVReg))
9237 return false;
9238 if (!MRI->hasOneDef(CopyVReg))
9239 return false;
9240 DefMI = MRI->getVRegDef(CopyVReg);
9241 }
9242
9243 switch (DefMI->getOpcode()) {
9244 default:
9245 return false;
9246 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9247 case AArch64::ANDWri:
9248 case AArch64::ANDXri: {
9249 if (IsTestAndBranch)
9250 return false;
9251 if (DefMI->getParent() != MBB)
9252 return false;
9253 if (!MRI->hasOneNonDBGUse(VReg))
9254 return false;
9255
9256 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9258 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9259 if (!isPowerOf2_64(Mask))
9260 return false;
9261
9263 Register NewReg = MO.getReg();
9264 if (!NewReg.isVirtual())
9265 return false;
9266
9267 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9268
9269 MachineBasicBlock &RefToMBB = *MBB;
9270 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9271 DebugLoc DL = MI.getDebugLoc();
9272 unsigned Imm = Log2_64(Mask);
9273 unsigned Opc = (Imm < 32)
9274 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9275 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9276 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9277 .addReg(NewReg)
9278 .addImm(Imm)
9279 .addMBB(TBB);
9280 // Register lives on to the CBZ now.
9281 MO.setIsKill(false);
9282
9283 // For immediate smaller than 32, we need to use the 32-bit
9284 // variant (W) in all cases. Indeed the 64-bit variant does not
9285 // allow to encode them.
9286 // Therefore, if the input register is 64-bit, we need to take the
9287 // 32-bit sub-part.
9288 if (!Is32Bit && Imm < 32)
9289 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9290 MI.eraseFromParent();
9291 return true;
9292 }
9293 // Look for CSINC
9294 case AArch64::CSINCWr:
9295 case AArch64::CSINCXr: {
9296 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9297 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9298 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9299 DefMI->getOperand(2).getReg() == AArch64::XZR))
9300 return false;
9301
9302 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9303 true) != -1)
9304 return false;
9305
9307 // Convert only when the condition code is not modified between
9308 // the CSINC and the branch. The CC may be used by other
9309 // instructions in between.
9311 return false;
9312 MachineBasicBlock &RefToMBB = *MBB;
9313 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9314 DebugLoc DL = MI.getDebugLoc();
9315 if (IsNegativeBranch)
9317 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9318 MI.eraseFromParent();
9319 return true;
9320 }
9321 }
9322}
9323
9324std::pair<unsigned, unsigned>
9326 const unsigned Mask = AArch64II::MO_FRAGMENT;
9327 return std::make_pair(TF & Mask, TF & ~Mask);
9328}
9329
9332 using namespace AArch64II;
9333
9334 static const std::pair<unsigned, const char *> TargetFlags[] = {
9335 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9336 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9337 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9338 {MO_HI12, "aarch64-hi12"}};
9339 return ArrayRef(TargetFlags);
9340}
9341
9344 using namespace AArch64II;
9345
9346 static const std::pair<unsigned, const char *> TargetFlags[] = {
9347 {MO_COFFSTUB, "aarch64-coffstub"},
9348 {MO_GOT, "aarch64-got"},
9349 {MO_NC, "aarch64-nc"},
9350 {MO_S, "aarch64-s"},
9351 {MO_TLS, "aarch64-tls"},
9352 {MO_DLLIMPORT, "aarch64-dllimport"},
9353 {MO_PREL, "aarch64-prel"},
9354 {MO_TAGGED, "aarch64-tagged"},
9355 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9356 };
9357 return ArrayRef(TargetFlags);
9358}
9359
9362 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9363 {{MOSuppressPair, "aarch64-suppress-pair"},
9364 {MOStridedAccess, "aarch64-strided-access"}};
9365 return ArrayRef(TargetFlags);
9366}
9367
9368/// Constants defining how certain sequences should be outlined.
9369/// This encompasses how an outlined function should be called, and what kind of
9370/// frame should be emitted for that outlined function.
9371///
9372/// \p MachineOutlinerDefault implies that the function should be called with
9373/// a save and restore of LR to the stack.
9374///
9375/// That is,
9376///
9377/// I1 Save LR OUTLINED_FUNCTION:
9378/// I2 --> BL OUTLINED_FUNCTION I1
9379/// I3 Restore LR I2
9380/// I3
9381/// RET
9382///
9383/// * Call construction overhead: 3 (save + BL + restore)
9384/// * Frame construction overhead: 1 (ret)
9385/// * Requires stack fixups? Yes
9386///
9387/// \p MachineOutlinerTailCall implies that the function is being created from
9388/// a sequence of instructions ending in a return.
9389///
9390/// That is,
9391///
9392/// I1 OUTLINED_FUNCTION:
9393/// I2 --> B OUTLINED_FUNCTION I1
9394/// RET I2
9395/// RET
9396///
9397/// * Call construction overhead: 1 (B)
9398/// * Frame construction overhead: 0 (Return included in sequence)
9399/// * Requires stack fixups? No
9400///
9401/// \p MachineOutlinerNoLRSave implies that the function should be called using
9402/// a BL instruction, but doesn't require LR to be saved and restored. This
9403/// happens when LR is known to be dead.
9404///
9405/// That is,
9406///
9407/// I1 OUTLINED_FUNCTION:
9408/// I2 --> BL OUTLINED_FUNCTION I1
9409/// I3 I2
9410/// I3
9411/// RET
9412///
9413/// * Call construction overhead: 1 (BL)
9414/// * Frame construction overhead: 1 (RET)
9415/// * Requires stack fixups? No
9416///
9417/// \p MachineOutlinerThunk implies that the function is being created from
9418/// a sequence of instructions ending in a call. The outlined function is
9419/// called with a BL instruction, and the outlined function tail-calls the
9420/// original call destination.
9421///
9422/// That is,
9423///
9424/// I1 OUTLINED_FUNCTION:
9425/// I2 --> BL OUTLINED_FUNCTION I1
9426/// BL f I2
9427/// B f
9428/// * Call construction overhead: 1 (BL)
9429/// * Frame construction overhead: 0
9430/// * Requires stack fixups? No
9431///
9432/// \p MachineOutlinerRegSave implies that the function should be called with a
9433/// save and restore of LR to an available register. This allows us to avoid
9434/// stack fixups. Note that this outlining variant is compatible with the
9435/// NoLRSave case.
9436///
9437/// That is,
9438///
9439/// I1 Save LR OUTLINED_FUNCTION:
9440/// I2 --> BL OUTLINED_FUNCTION I1
9441/// I3 Restore LR I2
9442/// I3
9443/// RET
9444///
9445/// * Call construction overhead: 3 (save + BL + restore)
9446/// * Frame construction overhead: 1 (ret)
9447/// * Requires stack fixups? No
9449 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9450 MachineOutlinerTailCall, /// Only emit a branch.
9451 MachineOutlinerNoLRSave, /// Emit a call and return.
9452 MachineOutlinerThunk, /// Emit a call and tail-call.
9453 MachineOutlinerRegSave /// Same as default, but save to a register.
9455
9459 UnsafeRegsDead = 0x8
9461
9463AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9464 MachineFunction *MF = C.getMF();
9466 const AArch64RegisterInfo *ARI =
9467 static_cast<const AArch64RegisterInfo *>(&TRI);
9468 // Check if there is an available register across the sequence that we can
9469 // use.
9470 for (unsigned Reg : AArch64::GPR64RegClass) {
9471 if (!ARI->isReservedReg(*MF, Reg) &&
9472 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9473 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9474 Reg != AArch64::X17 && // Ditto for X17.
9475 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9476 C.isAvailableInsideSeq(Reg, TRI))
9477 return Reg;
9478 }
9479 return Register();
9480}
9481
9482static bool
9484 const outliner::Candidate &b) {
9485 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9486 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9487
9488 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9489 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9490}
9491
9492static bool
9494 const outliner::Candidate &b) {
9495 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9496 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9497
9498 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9499}
9500
9502 const outliner::Candidate &b) {
9503 const AArch64Subtarget &SubtargetA =
9505 const AArch64Subtarget &SubtargetB =
9506 b.getMF()->getSubtarget<AArch64Subtarget>();
9507 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9508}
9509
9510std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9512 const MachineModuleInfo &MMI,
9513 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9514 unsigned MinRepeats) const {
9515 unsigned SequenceSize = 0;
9516 for (auto &MI : RepeatedSequenceLocs[0])
9517 SequenceSize += getInstSizeInBytes(MI);
9518
9519 unsigned NumBytesToCreateFrame = 0;
9520
9521 // We only allow outlining for functions having exactly matching return
9522 // address signing attributes, i.e., all share the same value for the
9523 // attribute "sign-return-address" and all share the same type of key they
9524 // are signed with.
9525 // Additionally we require all functions to simultaneously either support
9526 // v8.3a features or not. Otherwise an outlined function could get signed
9527 // using dedicated v8.3 instructions and a call from a function that doesn't
9528 // support v8.3 instructions would therefore be invalid.
9529 if (std::adjacent_find(
9530 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9531 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9532 // Return true if a and b are non-equal w.r.t. return address
9533 // signing or support of v8.3a features
9534 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9535 outliningCandidatesSigningKeyConsensus(a, b) &&
9536 outliningCandidatesV8_3OpsConsensus(a, b)) {
9537 return false;
9538 }
9539 return true;
9540 }) != RepeatedSequenceLocs.end()) {
9541 return std::nullopt;
9542 }
9543
9544 // Since at this point all candidates agree on their return address signing
9545 // picking just one is fine. If the candidate functions potentially sign their
9546 // return addresses, the outlined function should do the same. Note that in
9547 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9548 // not certainly true that the outlined function will have to sign its return
9549 // address but this decision is made later, when the decision to outline
9550 // has already been made.
9551 // The same holds for the number of additional instructions we need: On
9552 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9553 // necessary. However, at this point we don't know if the outlined function
9554 // will have a RET instruction so we assume the worst.
9555 const TargetRegisterInfo &TRI = getRegisterInfo();
9556 // Performing a tail call may require extra checks when PAuth is enabled.
9557 // If PAuth is disabled, set it to zero for uniformity.
9558 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9559 if (RepeatedSequenceLocs[0]
9560 .getMF()
9561 ->getInfo<AArch64FunctionInfo>()
9562 ->shouldSignReturnAddress(true)) {
9563 // One PAC and one AUT instructions
9564 NumBytesToCreateFrame += 8;
9565
9566 // PAuth is enabled - set extra tail call cost, if any.
9567 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9568 *RepeatedSequenceLocs[0].getMF());
9569 NumBytesToCheckLRInTCEpilogue =
9571 // Checking the authenticated LR value may significantly impact
9572 // SequenceSize, so account for it for more precise results.
9573 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9574 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9575
9576 // We have to check if sp modifying instructions would get outlined.
9577 // If so we only allow outlining if sp is unchanged overall, so matching
9578 // sub and add instructions are okay to outline, all other sp modifications
9579 // are not
9580 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9581 int SPValue = 0;
9582 for (auto &MI : C) {
9583 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9584 switch (MI.getOpcode()) {
9585 case AArch64::ADDXri:
9586 case AArch64::ADDWri:
9587 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9588 assert(MI.getOperand(2).isImm() &&
9589 "Expected operand to be immediate");
9590 assert(MI.getOperand(1).isReg() &&
9591 "Expected operand to be a register");
9592 // Check if the add just increments sp. If so, we search for
9593 // matching sub instructions that decrement sp. If not, the
9594 // modification is illegal
9595 if (MI.getOperand(1).getReg() == AArch64::SP)
9596 SPValue += MI.getOperand(2).getImm();
9597 else
9598 return true;
9599 break;
9600 case AArch64::SUBXri:
9601 case AArch64::SUBWri:
9602 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9603 assert(MI.getOperand(2).isImm() &&
9604 "Expected operand to be immediate");
9605 assert(MI.getOperand(1).isReg() &&
9606 "Expected operand to be a register");
9607 // Check if the sub just decrements sp. If so, we search for
9608 // matching add instructions that increment sp. If not, the
9609 // modification is illegal
9610 if (MI.getOperand(1).getReg() == AArch64::SP)
9611 SPValue -= MI.getOperand(2).getImm();
9612 else
9613 return true;
9614 break;
9615 default:
9616 return true;
9617 }
9618 }
9619 }
9620 if (SPValue)
9621 return true;
9622 return false;
9623 };
9624 // Remove candidates with illegal stack modifying instructions
9625 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9626
9627 // If the sequence doesn't have enough candidates left, then we're done.
9628 if (RepeatedSequenceLocs.size() < MinRepeats)
9629 return std::nullopt;
9630 }
9631
9632 // Properties about candidate MBBs that hold for all of them.
9633 unsigned FlagsSetInAll = 0xF;
9634
9635 // Compute liveness information for each candidate, and set FlagsSetInAll.
9636 for (outliner::Candidate &C : RepeatedSequenceLocs)
9637 FlagsSetInAll &= C.Flags;
9638
9639 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9640
9641 // Helper lambda which sets call information for every candidate.
9642 auto SetCandidateCallInfo =
9643 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9644 for (outliner::Candidate &C : RepeatedSequenceLocs)
9645 C.setCallInfo(CallID, NumBytesForCall);
9646 };
9647
9648 unsigned FrameID = MachineOutlinerDefault;
9649 NumBytesToCreateFrame += 4;
9650
9651 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9652 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9653 });
9654
9655 // We check to see if CFI Instructions are present, and if they are
9656 // we find the number of CFI Instructions in the candidates.
9657 unsigned CFICount = 0;
9658 for (auto &I : RepeatedSequenceLocs[0]) {
9659 if (I.isCFIInstruction())
9660 CFICount++;
9661 }
9662
9663 // We compare the number of found CFI Instructions to the number of CFI
9664 // instructions in the parent function for each candidate. We must check this
9665 // since if we outline one of the CFI instructions in a function, we have to
9666 // outline them all for correctness. If we do not, the address offsets will be
9667 // incorrect between the two sections of the program.
9668 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9669 std::vector<MCCFIInstruction> CFIInstructions =
9670 C.getMF()->getFrameInstructions();
9671
9672 if (CFICount > 0 && CFICount != CFIInstructions.size())
9673 return std::nullopt;
9674 }
9675
9676 // Returns true if an instructions is safe to fix up, false otherwise.
9677 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9678 if (MI.isCall())
9679 return true;
9680
9681 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9682 !MI.readsRegister(AArch64::SP, &TRI))
9683 return true;
9684
9685 // Any modification of SP will break our code to save/restore LR.
9686 // FIXME: We could handle some instructions which add a constant
9687 // offset to SP, with a bit more work.
9688 if (MI.modifiesRegister(AArch64::SP, &TRI))
9689 return false;
9690
9691 // At this point, we have a stack instruction that we might need to
9692 // fix up. We'll handle it if it's a load or store.
9693 if (MI.mayLoadOrStore()) {
9694 const MachineOperand *Base; // Filled with the base operand of MI.
9695 int64_t Offset; // Filled with the offset of MI.
9696 bool OffsetIsScalable;
9697
9698 // Does it allow us to offset the base operand and is the base the
9699 // register SP?
9700 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9701 !Base->isReg() || Base->getReg() != AArch64::SP)
9702 return false;
9703
9704 // Fixe-up code below assumes bytes.
9705 if (OffsetIsScalable)
9706 return false;
9707
9708 // Find the minimum/maximum offset for this instruction and check
9709 // if fixing it up would be in range.
9710 int64_t MinOffset,
9711 MaxOffset; // Unscaled offsets for the instruction.
9712 // The scale to multiply the offsets by.
9713 TypeSize Scale(0U, false), DummyWidth(0U, false);
9714 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9715
9716 Offset += 16; // Update the offset to what it would be if we outlined.
9717 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9718 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9719 return false;
9720
9721 // It's in range, so we can outline it.
9722 return true;
9723 }
9724
9725 // FIXME: Add handling for instructions like "add x0, sp, #8".
9726
9727 // We can't fix it up, so don't outline it.
9728 return false;
9729 };
9730
9731 // True if it's possible to fix up each stack instruction in this sequence.
9732 // Important for frames/call variants that modify the stack.
9733 bool AllStackInstrsSafe =
9734 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9735
9736 // If the last instruction in any candidate is a terminator, then we should
9737 // tail call all of the candidates.
9738 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9739 FrameID = MachineOutlinerTailCall;
9740 NumBytesToCreateFrame = 0;
9741 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9742 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9743 }
9744
9745 else if (LastInstrOpcode == AArch64::BL ||
9746 ((LastInstrOpcode == AArch64::BLR ||
9747 LastInstrOpcode == AArch64::BLRNoIP) &&
9748 !HasBTI)) {
9749 // FIXME: Do we need to check if the code after this uses the value of LR?
9750 FrameID = MachineOutlinerThunk;
9751 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9752 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9753 }
9754
9755 else {
9756 // We need to decide how to emit calls + frames. We can always emit the same
9757 // frame if we don't need to save to the stack. If we have to save to the
9758 // stack, then we need a different frame.
9759 unsigned NumBytesNoStackCalls = 0;
9760 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9761
9762 // Check if we have to save LR.
9763 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9764 bool LRAvailable =
9765 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
9766 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9767 : true;
9768 // If we have a noreturn caller, then we're going to be conservative and
9769 // say that we have to save LR. If we don't have a ret at the end of the
9770 // block, then we can't reason about liveness accurately.
9771 //
9772 // FIXME: We can probably do better than always disabling this in
9773 // noreturn functions by fixing up the liveness info.
9774 bool IsNoReturn =
9775 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9776
9777 // Is LR available? If so, we don't need a save.
9778 if (LRAvailable && !IsNoReturn) {
9779 NumBytesNoStackCalls += 4;
9780 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9781 CandidatesWithoutStackFixups.push_back(C);
9782 }
9783
9784 // Is an unused register available? If so, we won't modify the stack, so
9785 // we can outline with the same frame type as those that don't save LR.
9786 else if (findRegisterToSaveLRTo(C)) {
9787 NumBytesNoStackCalls += 12;
9788 C.setCallInfo(MachineOutlinerRegSave, 12);
9789 CandidatesWithoutStackFixups.push_back(C);
9790 }
9791
9792 // Is SP used in the sequence at all? If not, we don't have to modify
9793 // the stack, so we are guaranteed to get the same frame.
9794 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9795 NumBytesNoStackCalls += 12;
9796 C.setCallInfo(MachineOutlinerDefault, 12);
9797 CandidatesWithoutStackFixups.push_back(C);
9798 }
9799
9800 // If we outline this, we need to modify the stack. Pretend we don't
9801 // outline this by saving all of its bytes.
9802 else {
9803 NumBytesNoStackCalls += SequenceSize;
9804 }
9805 }
9806
9807 // If there are no places where we have to save LR, then note that we
9808 // don't have to update the stack. Otherwise, give every candidate the
9809 // default call type, as long as it's safe to do so.
9810 if (!AllStackInstrsSafe ||
9811 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9812 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9813 FrameID = MachineOutlinerNoLRSave;
9814 if (RepeatedSequenceLocs.size() < MinRepeats)
9815 return std::nullopt;
9816 } else {
9817 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9818
9819 // Bugzilla ID: 46767
9820 // TODO: Check if fixing up the stack more than once is safe so we can
9821 // outline these.
9822 //
9823 // An outline resulting in a caller that requires stack fixups at the
9824 // callsite to a callee that also requires stack fixups can happen when
9825 // there are no available registers at the candidate callsite for a
9826 // candidate that itself also has calls.
9827 //
9828 // In other words if function_containing_sequence in the following pseudo
9829 // assembly requires that we save LR at the point of the call, but there
9830 // are no available registers: in this case we save using SP and as a
9831 // result the SP offsets requires stack fixups by multiples of 16.
9832 //
9833 // function_containing_sequence:
9834 // ...
9835 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9836 // call OUTLINED_FUNCTION_N
9837 // restore LR from SP
9838 // ...
9839 //
9840 // OUTLINED_FUNCTION_N:
9841 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9842 // ...
9843 // bl foo
9844 // restore LR from SP
9845 // ret
9846 //
9847 // Because the code to handle more than one stack fixup does not
9848 // currently have the proper checks for legality, these cases will assert
9849 // in the AArch64 MachineOutliner. This is because the code to do this
9850 // needs more hardening, testing, better checks that generated code is
9851 // legal, etc and because it is only verified to handle a single pass of
9852 // stack fixup.
9853 //
9854 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9855 // these cases until they are known to be handled. Bugzilla 46767 is
9856 // referenced in comments at the assert site.
9857 //
9858 // To avoid asserting (or generating non-legal code on noassert builds)
9859 // we remove all candidates which would need more than one stack fixup by
9860 // pruning the cases where the candidate has calls while also having no
9861 // available LR and having no available general purpose registers to copy
9862 // LR to (ie one extra stack save/restore).
9863 //
9864 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9865 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9866 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9867 return (llvm::any_of(C, IsCall)) &&
9868 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9869 !findRegisterToSaveLRTo(C));
9870 });
9871 }
9872 }
9873
9874 // If we dropped all of the candidates, bail out here.
9875 if (RepeatedSequenceLocs.size() < MinRepeats)
9876 return std::nullopt;
9877 }
9878
9879 // Does every candidate's MBB contain a call? If so, then we might have a call
9880 // in the range.
9881 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9882 // Check if the range contains a call. These require a save + restore of the
9883 // link register.
9884 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9885 bool ModStackToSaveLR = false;
9886 if (any_of(drop_end(FirstCand),
9887 [](const MachineInstr &MI) { return MI.isCall(); }))
9888 ModStackToSaveLR = true;
9889
9890 // Handle the last instruction separately. If this is a tail call, then the
9891 // last instruction is a call. We don't want to save + restore in this case.
9892 // However, it could be possible that the last instruction is a call without
9893 // it being valid to tail call this sequence. We should consider this as
9894 // well.
9895 else if (FrameID != MachineOutlinerThunk &&
9896 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9897 ModStackToSaveLR = true;
9898
9899 if (ModStackToSaveLR) {
9900 // We can't fix up the stack. Bail out.
9901 if (!AllStackInstrsSafe)
9902 return std::nullopt;
9903
9904 // Save + restore LR.
9905 NumBytesToCreateFrame += 8;
9906 }
9907 }
9908
9909 // If we have CFI instructions, we can only outline if the outlined section
9910 // can be a tail call
9911 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9912 return std::nullopt;
9913
9914 return std::make_unique<outliner::OutlinedFunction>(
9915 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9916}
9917
9919 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9920 // If a bunch of candidates reach this point they must agree on their return
9921 // address signing. It is therefore enough to just consider the signing
9922 // behaviour of one of them
9923 const auto &CFn = Candidates.front().getMF()->getFunction();
9924
9925 if (CFn.hasFnAttribute("ptrauth-returns"))
9926 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9927 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9928 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9929 // Since all candidates belong to the same module, just copy the
9930 // function-level attributes of an arbitrary function.
9931 if (CFn.hasFnAttribute("sign-return-address"))
9932 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9933 if (CFn.hasFnAttribute("sign-return-address-key"))
9934 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9935
9936 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9937}
9938
9940 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9941 const Function &F = MF.getFunction();
9942
9943 // Can F be deduplicated by the linker? If it can, don't outline from it.
9944 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9945 return false;
9946
9947 // Don't outline from functions with section markings; the program could
9948 // expect that all the code is in the named section.
9949 // FIXME: Allow outlining from multiple functions with the same section
9950 // marking.
9951 if (F.hasSection())
9952 return false;
9953
9954 // Outlining from functions with redzones is unsafe since the outliner may
9955 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9956 // outline from it.
9958 if (!AFI || AFI->hasRedZone().value_or(true))
9959 return false;
9960
9961 // FIXME: Determine whether it is safe to outline from functions which contain
9962 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9963 // outlined together and ensure it is safe to outline with async unwind info,
9964 // required for saving & restoring VG around calls.
9965 if (AFI->hasStreamingModeChanges())
9966 return false;
9967
9968 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9970 return false;
9971
9972 // It's safe to outline from MF.
9973 return true;
9974}
9975
9978 unsigned &Flags) const {
9980 "Must track liveness!");
9982 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9983 Ranges;
9984 // According to the AArch64 Procedure Call Standard, the following are
9985 // undefined on entry/exit from a function call:
9986 //
9987 // * Registers x16, x17, (and thus w16, w17)
9988 // * Condition codes (and thus the NZCV register)
9989 //
9990 // If any of these registers are used inside or live across an outlined
9991 // function, then they may be modified later, either by the compiler or
9992 // some other tool (like the linker).
9993 //
9994 // To avoid outlining in these situations, partition each block into ranges
9995 // where these registers are dead. We will only outline from those ranges.
9997 auto AreAllUnsafeRegsDead = [&LRU]() {
9998 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
9999 LRU.available(AArch64::NZCV);
10000 };
10001
10002 // We need to know if LR is live across an outlining boundary later on in
10003 // order to decide how we'll create the outlined call, frame, etc.
10004 //
10005 // It's pretty expensive to check this for *every candidate* within a block.
10006 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10007 // to compute liveness from the end of the block for O(n) candidates within
10008 // the block.
10009 //
10010 // So, to improve the average case, let's keep track of liveness from the end
10011 // of the block to the beginning of *every outlinable range*. If we know that
10012 // LR is available in every range we could outline from, then we know that
10013 // we don't need to check liveness for any candidate within that range.
10014 bool LRAvailableEverywhere = true;
10015 // Compute liveness bottom-up.
10016 LRU.addLiveOuts(MBB);
10017 // Update flags that require info about the entire MBB.
10018 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10019 if (MI.isCall() && !MI.isTerminator())
10020 Flags |= MachineOutlinerMBBFlags::HasCalls;
10021 };
10022 // Range: [RangeBegin, RangeEnd)
10023 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10024 unsigned RangeLen;
10025 auto CreateNewRangeStartingAt =
10026 [&RangeBegin, &RangeEnd,
10027 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10028 RangeBegin = NewBegin;
10029 RangeEnd = std::next(RangeBegin);
10030 RangeLen = 0;
10031 };
10032 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10033 // At least one unsafe register is not dead. We do not want to outline at
10034 // this point. If it is long enough to outline from and does not cross a
10035 // bundle boundary, save the range [RangeBegin, RangeEnd).
10036 if (RangeLen <= 1)
10037 return;
10038 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10039 return;
10040 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10041 return;
10042 Ranges.emplace_back(RangeBegin, RangeEnd);
10043 };
10044 // Find the first point where all unsafe registers are dead.
10045 // FIND: <safe instr> <-- end of first potential range
10046 // SKIP: <unsafe def>
10047 // SKIP: ... everything between ...
10048 // SKIP: <unsafe use>
10049 auto FirstPossibleEndPt = MBB.instr_rbegin();
10050 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10051 LRU.stepBackward(*FirstPossibleEndPt);
10052 // Update flags that impact how we outline across the entire block,
10053 // regardless of safety.
10054 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10055 if (AreAllUnsafeRegsDead())
10056 break;
10057 }
10058 // If we exhausted the entire block, we have no safe ranges to outline.
10059 if (FirstPossibleEndPt == MBB.instr_rend())
10060 return Ranges;
10061 // Current range.
10062 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10063 // StartPt points to the first place where all unsafe registers
10064 // are dead (if there is any such point). Begin partitioning the MBB into
10065 // ranges.
10066 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10067 LRU.stepBackward(MI);
10068 UpdateWholeMBBFlags(MI);
10069 if (!AreAllUnsafeRegsDead()) {
10070 SaveRangeIfNonEmpty();
10071 CreateNewRangeStartingAt(MI.getIterator());
10072 continue;
10073 }
10074 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10075 RangeBegin = MI.getIterator();
10076 ++RangeLen;
10077 }
10078 // Above loop misses the last (or only) range. If we are still safe, then
10079 // let's save the range.
10080 if (AreAllUnsafeRegsDead())
10081 SaveRangeIfNonEmpty();
10082 if (Ranges.empty())
10083 return Ranges;
10084 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10085 // the order.
10086 std::reverse(Ranges.begin(), Ranges.end());
10087 // If there is at least one outlinable range where LR is unavailable
10088 // somewhere, remember that.
10089 if (!LRAvailableEverywhere)
10090 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
10091 return Ranges;
10092}
10093
10097 unsigned Flags) const {
10098 MachineInstr &MI = *MIT;
10099
10100 // Don't outline anything used for return address signing. The outlined
10101 // function will get signed later if needed
10102 switch (MI.getOpcode()) {
10103 case AArch64::PACM:
10104 case AArch64::PACIASP:
10105 case AArch64::PACIBSP:
10106 case AArch64::PACIASPPC:
10107 case AArch64::PACIBSPPC:
10108 case AArch64::AUTIASP:
10109 case AArch64::AUTIBSP:
10110 case AArch64::AUTIASPPCi:
10111 case AArch64::AUTIASPPCr:
10112 case AArch64::AUTIBSPPCi:
10113 case AArch64::AUTIBSPPCr:
10114 case AArch64::RETAA:
10115 case AArch64::RETAB:
10116 case AArch64::RETAASPPCi:
10117 case AArch64::RETAASPPCr:
10118 case AArch64::RETABSPPCi:
10119 case AArch64::RETABSPPCr:
10120 case AArch64::EMITBKEY:
10121 case AArch64::PAUTH_PROLOGUE:
10122 case AArch64::PAUTH_EPILOGUE:
10124 }
10125
10126 // We can only outline these if we will tail call the outlined function, or
10127 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10128 // in a tail call.
10129 //
10130 // FIXME: If the proper fixups for the offset are implemented, this should be
10131 // possible.
10132 if (MI.isCFIInstruction())
10134
10135 // Is this a terminator for a basic block?
10136 if (MI.isTerminator())
10137 // TargetInstrInfo::getOutliningType has already filtered out anything
10138 // that would break this, so we can allow it here.
10140
10141 // Make sure none of the operands are un-outlinable.
10142 for (const MachineOperand &MOP : MI.operands()) {
10143 // A check preventing CFI indices was here before, but only CFI
10144 // instructions should have those.
10145 assert(!MOP.isCFIIndex());
10146
10147 // If it uses LR or W30 explicitly, then don't touch it.
10148 if (MOP.isReg() && !MOP.isImplicit() &&
10149 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10151 }
10152
10153 // Special cases for instructions that can always be outlined, but will fail
10154 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10155 // be outlined because they don't require a *specific* value to be in LR.
10156 if (MI.getOpcode() == AArch64::ADRP)
10158
10159 // If MI is a call we might be able to outline it. We don't want to outline
10160 // any calls that rely on the position of items on the stack. When we outline
10161 // something containing a call, we have to emit a save and restore of LR in
10162 // the outlined function. Currently, this always happens by saving LR to the
10163 // stack. Thus, if we outline, say, half the parameters for a function call
10164 // plus the call, then we'll break the callee's expectations for the layout
10165 // of the stack.
10166 //
10167 // FIXME: Allow calls to functions which construct a stack frame, as long
10168 // as they don't access arguments on the stack.
10169 // FIXME: Figure out some way to analyze functions defined in other modules.
10170 // We should be able to compute the memory usage based on the IR calling
10171 // convention, even if we can't see the definition.
10172 if (MI.isCall()) {
10173 // Get the function associated with the call. Look at each operand and find
10174 // the one that represents the callee and get its name.
10175 const Function *Callee = nullptr;
10176 for (const MachineOperand &MOP : MI.operands()) {
10177 if (MOP.isGlobal()) {
10178 Callee = dyn_cast<Function>(MOP.getGlobal());
10179 break;
10180 }
10181 }
10182
10183 // Never outline calls to mcount. There isn't any rule that would require
10184 // this, but the Linux kernel's "ftrace" feature depends on it.
10185 if (Callee && Callee->getName() == "\01_mcount")
10187
10188 // If we don't know anything about the callee, assume it depends on the
10189 // stack layout of the caller. In that case, it's only legal to outline
10190 // as a tail-call. Explicitly list the call instructions we know about so we
10191 // don't get unexpected results with call pseudo-instructions.
10192 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10193 if (MI.getOpcode() == AArch64::BLR ||
10194 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10195 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10196
10197 if (!Callee)
10198 return UnknownCallOutlineType;
10199
10200 // We have a function we have information about. Check it if it's something
10201 // can safely outline.
10202 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10203
10204 // We don't know what's going on with the callee at all. Don't touch it.
10205 if (!CalleeMF)
10206 return UnknownCallOutlineType;
10207
10208 // Check if we know anything about the callee saves on the function. If we
10209 // don't, then don't touch it, since that implies that we haven't
10210 // computed anything about its stack frame yet.
10211 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10212 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10213 MFI.getNumObjects() > 0)
10214 return UnknownCallOutlineType;
10215
10216 // At this point, we can say that CalleeMF ought to not pass anything on the
10217 // stack. Therefore, we can outline it.
10219 }
10220
10221 // Don't touch the link register or W30.
10222 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10223 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10225
10226 // Don't outline BTI instructions, because that will prevent the outlining
10227 // site from being indirectly callable.
10228 if (hasBTISemantics(MI))
10230
10232}
10233
10234void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10235 for (MachineInstr &MI : MBB) {
10236 const MachineOperand *Base;
10237 TypeSize Width(0, false);
10238 int64_t Offset;
10239 bool OffsetIsScalable;
10240
10241 // Is this a load or store with an immediate offset with SP as the base?
10242 if (!MI.mayLoadOrStore() ||
10243 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10244 &RI) ||
10245 (Base->isReg() && Base->getReg() != AArch64::SP))
10246 continue;
10247
10248 // It is, so we have to fix it up.
10249 TypeSize Scale(0U, false);
10250 int64_t Dummy1, Dummy2;
10251
10253 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10254 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10255 assert(Scale != 0 && "Unexpected opcode!");
10256 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10257
10258 // We've pushed the return address to the stack, so add 16 to the offset.
10259 // This is safe, since we already checked if it would overflow when we
10260 // checked if this instruction was legal to outline.
10261 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10262 StackOffsetOperand.setImm(NewImm);
10263 }
10264}
10265
10267 const AArch64InstrInfo *TII,
10268 bool ShouldSignReturnAddr) {
10269 if (!ShouldSignReturnAddr)
10270 return;
10271
10272 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10275 TII->get(AArch64::PAUTH_EPILOGUE))
10277}
10278
10281 const outliner::OutlinedFunction &OF) const {
10282
10284
10285 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10286 FI->setOutliningStyle("Tail Call");
10287 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10288 // For thunk outlining, rewrite the last instruction from a call to a
10289 // tail-call.
10290 MachineInstr *Call = &*--MBB.instr_end();
10291 unsigned TailOpcode;
10292 if (Call->getOpcode() == AArch64::BL) {
10293 TailOpcode = AArch64::TCRETURNdi;
10294 } else {
10295 assert(Call->getOpcode() == AArch64::BLR ||
10296 Call->getOpcode() == AArch64::BLRNoIP);
10297 TailOpcode = AArch64::TCRETURNriALL;
10298 }
10299 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10300 .add(Call->getOperand(0))
10301 .addImm(0);
10302 MBB.insert(MBB.end(), TC);
10303 Call->eraseFromParent();
10304
10305 FI->setOutliningStyle("Thunk");
10306 }
10307
10308 bool IsLeafFunction = true;
10309
10310 // Is there a call in the outlined range?
10311 auto IsNonTailCall = [](const MachineInstr &MI) {
10312 return MI.isCall() && !MI.isReturn();
10313 };
10314
10315 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10316 // Fix up the instructions in the range, since we're going to modify the
10317 // stack.
10318
10319 // Bugzilla ID: 46767
10320 // TODO: Check if fixing up twice is safe so we can outline these.
10321 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10322 "Can only fix up stack references once");
10323 fixupPostOutline(MBB);
10324
10325 IsLeafFunction = false;
10326
10327 // LR has to be a live in so that we can save it.
10328 if (!MBB.isLiveIn(AArch64::LR))
10329 MBB.addLiveIn(AArch64::LR);
10330
10333
10334 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10335 OF.FrameConstructionID == MachineOutlinerThunk)
10336 Et = std::prev(MBB.end());
10337
10338 // Insert a save before the outlined region
10339 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10340 .addReg(AArch64::SP, RegState::Define)
10341 .addReg(AArch64::LR)
10342 .addReg(AArch64::SP)
10343 .addImm(-16);
10344 It = MBB.insert(It, STRXpre);
10345
10348
10349 // Add a CFI saying the stack was moved 16 B down.
10350 CFIBuilder.buildDefCFAOffset(16);
10351
10352 // Add a CFI saying that the LR that we want to find is now 16 B higher
10353 // than before.
10354 CFIBuilder.buildOffset(AArch64::LR, -16);
10355 }
10356
10357 // Insert a restore before the terminator for the function.
10358 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10359 .addReg(AArch64::SP, RegState::Define)
10360 .addReg(AArch64::LR, RegState::Define)
10361 .addReg(AArch64::SP)
10362 .addImm(16);
10363 Et = MBB.insert(Et, LDRXpost);
10364 }
10365
10366 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10367
10368 // If this is a tail call outlined function, then there's already a return.
10369 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10370 OF.FrameConstructionID == MachineOutlinerThunk) {
10371 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10372 return;
10373 }
10374
10375 // It's not a tail call, so we have to insert the return ourselves.
10376
10377 // LR has to be a live in so that we can return to it.
10378 if (!MBB.isLiveIn(AArch64::LR))
10379 MBB.addLiveIn(AArch64::LR);
10380
10381 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10382 .addReg(AArch64::LR);
10383 MBB.insert(MBB.end(), ret);
10384
10385 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10386
10387 FI->setOutliningStyle("Function");
10388
10389 // Did we have to modify the stack by saving the link register?
10390 if (OF.FrameConstructionID != MachineOutlinerDefault)
10391 return;
10392
10393 // We modified the stack.
10394 // Walk over the basic block and fix up all the stack accesses.
10395 fixupPostOutline(MBB);
10396}
10397
10401
10402 // Are we tail calling?
10403 if (C.CallConstructionID == MachineOutlinerTailCall) {
10404 // If yes, then we can just branch to the label.
10405 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10406 .addGlobalAddress(M.getNamedValue(MF.getName()))
10407 .addImm(0));
10408 return It;
10409 }
10410
10411 // Are we saving the link register?
10412 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10413 C.CallConstructionID == MachineOutlinerThunk) {
10414 // No, so just insert the call.
10415 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10416 .addGlobalAddress(M.getNamedValue(MF.getName())));
10417 return It;
10418 }
10419
10420 // We want to return the spot where we inserted the call.
10422
10423 // Instructions for saving and restoring LR around the call instruction we're
10424 // going to insert.
10425 MachineInstr *Save;
10426 MachineInstr *Restore;
10427 // Can we save to a register?
10428 if (C.CallConstructionID == MachineOutlinerRegSave) {
10429 // FIXME: This logic should be sunk into a target-specific interface so that
10430 // we don't have to recompute the register.
10431 Register Reg = findRegisterToSaveLRTo(C);
10432 assert(Reg && "No callee-saved register available?");
10433
10434 // LR has to be a live in so that we can save it.
10435 if (!MBB.isLiveIn(AArch64::LR))
10436 MBB.addLiveIn(AArch64::LR);
10437
10438 // Save and restore LR from Reg.
10439 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10440 .addReg(AArch64::XZR)
10441 .addReg(AArch64::LR)
10442 .addImm(0);
10443 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10444 .addReg(AArch64::XZR)
10445 .addReg(Reg)
10446 .addImm(0);
10447 } else {
10448 // We have the default case. Save and restore from SP.
10449 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10450 .addReg(AArch64::SP, RegState::Define)
10451 .addReg(AArch64::LR)
10452 .addReg(AArch64::SP)
10453 .addImm(-16);
10454 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10455 .addReg(AArch64::SP, RegState::Define)
10456 .addReg(AArch64::LR, RegState::Define)
10457 .addReg(AArch64::SP)
10458 .addImm(16);
10459 }
10460
10461 It = MBB.insert(It, Save);
10462 It++;
10463
10464 // Insert the call.
10465 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10466 .addGlobalAddress(M.getNamedValue(MF.getName())));
10467 CallPt = It;
10468 It++;
10469
10470 It = MBB.insert(It, Restore);
10471 return CallPt;
10472}
10473
10475 MachineFunction &MF) const {
10476 return MF.getFunction().hasMinSize();
10477}
10478
10481 DebugLoc &DL,
10482 bool AllowSideEffects) const {
10483 const MachineFunction &MF = *MBB.getParent();
10485 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10486
10487 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10488 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10489 } else if (STI.isSVEorStreamingSVEAvailable()) {
10490 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10491 .addImm(0)
10492 .addImm(0);
10493 } else if (STI.isNeonAvailable()) {
10494 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10495 .addImm(0);
10496 } else {
10497 // This is a streaming-compatible function without SVE. We don't have full
10498 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10499 // So given `movi v..` would be illegal use `fmov d..` instead.
10500 assert(STI.hasNEON() && "Expected to have NEON.");
10501 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10502 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10503 }
10504}
10505
10506std::optional<DestSourcePair>
10508
10509 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10510 // and zero immediate operands used as an alias for mov instruction.
10511 if (((MI.getOpcode() == AArch64::ORRWrs &&
10512 MI.getOperand(1).getReg() == AArch64::WZR &&
10513 MI.getOperand(3).getImm() == 0x0) ||
10514 (MI.getOpcode() == AArch64::ORRWrr &&
10515 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10516 // Check that the w->w move is not a zero-extending w->x mov.
10517 (!MI.getOperand(0).getReg().isVirtual() ||
10518 MI.getOperand(0).getSubReg() == 0) &&
10519 (!MI.getOperand(0).getReg().isPhysical() ||
10520 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10521 /*TRI=*/nullptr) == -1))
10522 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10523
10524 if (MI.getOpcode() == AArch64::ORRXrs &&
10525 MI.getOperand(1).getReg() == AArch64::XZR &&
10526 MI.getOperand(3).getImm() == 0x0)
10527 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10528
10529 return std::nullopt;
10530}
10531
10532std::optional<DestSourcePair>
10534 if ((MI.getOpcode() == AArch64::ORRWrs &&
10535 MI.getOperand(1).getReg() == AArch64::WZR &&
10536 MI.getOperand(3).getImm() == 0x0) ||
10537 (MI.getOpcode() == AArch64::ORRWrr &&
10538 MI.getOperand(1).getReg() == AArch64::WZR))
10539 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10540 return std::nullopt;
10541}
10542
10543std::optional<RegImmPair>
10545 int Sign = 1;
10546 int64_t Offset = 0;
10547
10548 // TODO: Handle cases where Reg is a super- or sub-register of the
10549 // destination register.
10550 const MachineOperand &Op0 = MI.getOperand(0);
10551 if (!Op0.isReg() || Reg != Op0.getReg())
10552 return std::nullopt;
10553
10554 switch (MI.getOpcode()) {
10555 default:
10556 return std::nullopt;
10557 case AArch64::SUBWri:
10558 case AArch64::SUBXri:
10559 case AArch64::SUBSWri:
10560 case AArch64::SUBSXri:
10561 Sign *= -1;
10562 [[fallthrough]];
10563 case AArch64::ADDSWri:
10564 case AArch64::ADDSXri:
10565 case AArch64::ADDWri:
10566 case AArch64::ADDXri: {
10567 // TODO: Third operand can be global address (usually some string).
10568 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10569 !MI.getOperand(2).isImm())
10570 return std::nullopt;
10571 int Shift = MI.getOperand(3).getImm();
10572 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10573 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10574 }
10575 }
10576 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10577}
10578
10579/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10580/// the destination register then, if possible, describe the value in terms of
10581/// the source register.
10582static std::optional<ParamLoadedValue>
10584 const TargetInstrInfo *TII,
10585 const TargetRegisterInfo *TRI) {
10586 auto DestSrc = TII->isCopyLikeInstr(MI);
10587 if (!DestSrc)
10588 return std::nullopt;
10589
10590 Register DestReg = DestSrc->Destination->getReg();
10591 Register SrcReg = DestSrc->Source->getReg();
10592
10593 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10594
10595 // If the described register is the destination, just return the source.
10596 if (DestReg == DescribedReg)
10597 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10598
10599 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10600 if (MI.getOpcode() == AArch64::ORRWrs &&
10601 TRI->isSuperRegister(DestReg, DescribedReg))
10602 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10603
10604 // We may need to describe the lower part of a ORRXrs move.
10605 if (MI.getOpcode() == AArch64::ORRXrs &&
10606 TRI->isSubRegister(DestReg, DescribedReg)) {
10607 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10608 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10609 }
10610
10611 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10612 "Unhandled ORR[XW]rs copy case");
10613
10614 return std::nullopt;
10615}
10616
10618 // Functions cannot be split to different sections on AArch64 if they have
10619 // a red zone. This is because relaxing a cross-section branch may require
10620 // incrementing the stack pointer to spill a register, which would overwrite
10621 // the red zone.
10622 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10623 return false;
10624
10626}
10627
10629 const MachineBasicBlock &MBB) const {
10630 // Asm Goto blocks can contain conditional branches to goto labels, which can
10631 // get moved out of range of the branch instruction.
10632 auto isAsmGoto = [](const MachineInstr &MI) {
10633 return MI.getOpcode() == AArch64::INLINEASM_BR;
10634 };
10635 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10636 return false;
10637
10638 // Because jump tables are label-relative instead of table-relative, they all
10639 // must be in the same section or relocation fixup handling will fail.
10640
10641 // Check if MBB is a jump table target
10643 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10644 return llvm::is_contained(JTE.MBBs, &MBB);
10645 };
10646 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10647 return false;
10648
10649 // Check if MBB contains a jump table lookup
10650 for (const MachineInstr &MI : MBB) {
10651 switch (MI.getOpcode()) {
10652 case TargetOpcode::G_BRJT:
10653 case AArch64::JumpTableDest32:
10654 case AArch64::JumpTableDest16:
10655 case AArch64::JumpTableDest8:
10656 return false;
10657 default:
10658 continue;
10659 }
10660 }
10661
10662 // MBB isn't a special case, so it's safe to be split to the cold section.
10663 return true;
10664}
10665
10666std::optional<ParamLoadedValue>
10668 Register Reg) const {
10669 const MachineFunction *MF = MI.getMF();
10671 switch (MI.getOpcode()) {
10672 case AArch64::MOVZWi:
10673 case AArch64::MOVZXi: {
10674 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10675 // 64-bit parameters, so we need to consider super-registers.
10676 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10677 return std::nullopt;
10678
10679 if (!MI.getOperand(1).isImm())
10680 return std::nullopt;
10681 int64_t Immediate = MI.getOperand(1).getImm();
10682 int Shift = MI.getOperand(2).getImm();
10683 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10684 nullptr);
10685 }
10686 case AArch64::ORRWrs:
10687 case AArch64::ORRXrs:
10688 return describeORRLoadedValue(MI, Reg, this, TRI);
10689 }
10690
10692}
10693
10695 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10696 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10697 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10698 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10699
10700 // Anyexts are nops.
10701 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10702 return true;
10703
10704 Register DefReg = ExtMI.getOperand(0).getReg();
10705 if (!MRI.hasOneNonDBGUse(DefReg))
10706 return false;
10707
10708 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10709 // addressing mode.
10710 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10711 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10712}
10713
10715 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10716}
10717
10719 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10720}
10721
10723 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10724}
10725
10726unsigned int
10728 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10729}
10730
10731bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10732 unsigned Scale) const {
10733 if (Offset && Scale)
10734 return false;
10735
10736 // Check Reg + Imm
10737 if (!Scale) {
10738 // 9-bit signed offset
10739 if (isInt<9>(Offset))
10740 return true;
10741
10742 // 12-bit unsigned offset
10743 unsigned Shift = Log2_64(NumBytes);
10744 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10745 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10746 (Offset >> Shift) << Shift == Offset)
10747 return true;
10748 return false;
10749 }
10750
10751 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10752 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10753}
10754
10756 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10757 return AArch64::BLRNoIP;
10758 else
10759 return AArch64::BLR;
10760}
10761
10764 Register TargetReg, bool FrameSetup) const {
10765 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10766
10768 MachineFunction &MF = *MBB.getParent();
10769 const AArch64InstrInfo *TII =
10770 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10771 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10773
10774 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10775 MachineBasicBlock *LoopTestMBB =
10777 MF.insert(MBBInsertPoint, LoopTestMBB);
10778 MachineBasicBlock *LoopBodyMBB =
10780 MF.insert(MBBInsertPoint, LoopBodyMBB);
10782 MF.insert(MBBInsertPoint, ExitMBB);
10783 MachineInstr::MIFlag Flags =
10785
10786 // LoopTest:
10787 // SUB SP, SP, #ProbeSize
10788 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10789 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10790
10791 // CMP SP, TargetReg
10792 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10793 AArch64::XZR)
10794 .addReg(AArch64::SP)
10795 .addReg(TargetReg)
10797 .setMIFlags(Flags);
10798
10799 // B.<Cond> LoopExit
10800 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10802 .addMBB(ExitMBB)
10803 .setMIFlags(Flags);
10804
10805 // STR XZR, [SP]
10806 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10807 .addReg(AArch64::XZR)
10808 .addReg(AArch64::SP)
10809 .addImm(0)
10810 .setMIFlags(Flags);
10811
10812 // B loop
10813 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10814 .addMBB(LoopTestMBB)
10815 .setMIFlags(Flags);
10816
10817 // LoopExit:
10818 // MOV SP, TargetReg
10819 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10820 .addReg(TargetReg)
10821 .addImm(0)
10823 .setMIFlags(Flags);
10824
10825 // LDR XZR, [SP]
10826 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10827 .addReg(AArch64::XZR, RegState::Define)
10828 .addReg(AArch64::SP)
10829 .addImm(0)
10830 .setMIFlags(Flags);
10831
10832 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10834
10835 LoopTestMBB->addSuccessor(ExitMBB);
10836 LoopTestMBB->addSuccessor(LoopBodyMBB);
10837 LoopBodyMBB->addSuccessor(LoopTestMBB);
10838 MBB.addSuccessor(LoopTestMBB);
10839
10840 // Update liveins.
10841 if (MF.getRegInfo().reservedRegsFrozen())
10842 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10843
10844 return ExitMBB->begin();
10845}
10846
10847namespace {
10848class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10849 MachineFunction *MF;
10850 const TargetInstrInfo *TII;
10851 const TargetRegisterInfo *TRI;
10853
10854 /// The block of the loop
10855 MachineBasicBlock *LoopBB;
10856 /// The conditional branch of the loop
10857 MachineInstr *CondBranch;
10858 /// The compare instruction for loop control
10859 MachineInstr *Comp;
10860 /// The number of the operand of the loop counter value in Comp
10861 unsigned CompCounterOprNum;
10862 /// The instruction that updates the loop counter value
10863 MachineInstr *Update;
10864 /// The number of the operand of the loop counter value in Update
10865 unsigned UpdateCounterOprNum;
10866 /// The initial value of the loop counter
10867 Register Init;
10868 /// True iff Update is a predecessor of Comp
10869 bool IsUpdatePriorComp;
10870
10871 /// The normalized condition used by createTripCountGreaterCondition()
10873
10874public:
10875 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10876 MachineInstr *Comp, unsigned CompCounterOprNum,
10877 MachineInstr *Update, unsigned UpdateCounterOprNum,
10878 Register Init, bool IsUpdatePriorComp,
10880 : MF(Comp->getParent()->getParent()),
10881 TII(MF->getSubtarget().getInstrInfo()),
10882 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10883 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10884 CompCounterOprNum(CompCounterOprNum), Update(Update),
10885 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10886 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10887
10888 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10889 // Make the instructions for loop control be placed in stage 0.
10890 // The predecessors of Comp are considered by the caller.
10891 return MI == Comp;
10892 }
10893
10894 std::optional<bool> createTripCountGreaterCondition(
10895 int TC, MachineBasicBlock &MBB,
10896 SmallVectorImpl<MachineOperand> &CondParam) override {
10897 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10898 // Cond is normalized for such use.
10899 // The predecessors of the branch are assumed to have already been inserted.
10900 CondParam = Cond;
10901 return {};
10902 }
10903
10904 void createRemainingIterationsGreaterCondition(
10906 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10907
10908 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10909
10910 void adjustTripCount(int TripCountAdjust) override {}
10911
10912 bool isMVEExpanderSupported() override { return true; }
10913};
10914} // namespace
10915
10916/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10917/// is replaced by ReplaceReg. The output register is newly created.
10918/// The other operands are unchanged from MI.
10919static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10920 Register ReplaceReg, MachineBasicBlock &MBB,
10921 MachineBasicBlock::iterator InsertTo) {
10924 const TargetRegisterInfo *TRI =
10927 Register Result = 0;
10928 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10929 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10930 Result = MRI.createVirtualRegister(
10931 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10932 NewMI->getOperand(I).setReg(Result);
10933 } else if (I == ReplaceOprNum) {
10934 MRI.constrainRegClass(
10935 ReplaceReg,
10936 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
10937 NewMI->getOperand(I).setReg(ReplaceReg);
10938 }
10939 }
10940 MBB.insert(InsertTo, NewMI);
10941 return Result;
10942}
10943
10944void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10947 // Create and accumulate conditions for next TC iterations.
10948 // Example:
10949 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10950 // # iteration of the kernel
10951 //
10952 // # insert the following instructions
10953 // cond = CSINCXr 0, 0, C, implicit $nzcv
10954 // counter = ADDXri counter, 1 # clone from this->Update
10955 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10956 // cond = CSINCXr cond, cond, C, implicit $nzcv
10957 // ... (repeat TC times)
10958 // SUBSXri cond, 0, implicit-def $nzcv
10959
10960 assert(CondBranch->getOpcode() == AArch64::Bcc);
10961 // CondCode to exit the loop
10963 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10964 if (CondBranch->getOperand(1).getMBB() == LoopBB)
10966
10967 // Accumulate conditions to exit the loop
10968 Register AccCond = AArch64::XZR;
10969
10970 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10971 auto AccumulateCond = [&](Register CurCond,
10973 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
10974 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
10975 .addReg(NewCond, RegState::Define)
10976 .addReg(CurCond)
10977 .addReg(CurCond)
10979 return NewCond;
10980 };
10981
10982 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
10983 // Update and Comp for I==0 are already exists in MBB
10984 // (MBB is an unrolled kernel)
10985 Register Counter;
10986 for (int I = 0; I <= TC; ++I) {
10987 Register NextCounter;
10988 if (I != 0)
10989 NextCounter =
10990 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
10991
10992 AccCond = AccumulateCond(AccCond, CC);
10993
10994 if (I != TC) {
10995 if (I == 0) {
10996 if (Update != Comp && IsUpdatePriorComp) {
10997 Counter =
10998 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
10999 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11000 MBB.end());
11001 } else {
11002 // can use already calculated value
11003 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11004 }
11005 } else if (Update != Comp) {
11006 NextCounter =
11007 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11008 }
11009 }
11010 Counter = NextCounter;
11011 }
11012 } else {
11013 Register Counter;
11014 if (LastStage0Insts.empty()) {
11015 // use initial counter value (testing if the trip count is sufficient to
11016 // be executed by pipelined code)
11017 Counter = Init;
11018 if (IsUpdatePriorComp)
11019 Counter =
11020 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11021 } else {
11022 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11023 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11024 }
11025
11026 for (int I = 0; I <= TC; ++I) {
11027 Register NextCounter;
11028 NextCounter =
11029 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11030 AccCond = AccumulateCond(AccCond, CC);
11031 if (I != TC && Update != Comp)
11032 NextCounter =
11033 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11034 Counter = NextCounter;
11035 }
11036 }
11037
11038 // If AccCond == 0, the remainder is greater than TC.
11039 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11040 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11041 .addReg(AccCond)
11042 .addImm(0)
11043 .addImm(0);
11044 Cond.clear();
11046}
11047
11048static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11049 Register &RegMBB, Register &RegOther) {
11050 assert(Phi.getNumOperands() == 5);
11051 if (Phi.getOperand(2).getMBB() == MBB) {
11052 RegMBB = Phi.getOperand(1).getReg();
11053 RegOther = Phi.getOperand(3).getReg();
11054 } else {
11055 assert(Phi.getOperand(4).getMBB() == MBB);
11056 RegMBB = Phi.getOperand(3).getReg();
11057 RegOther = Phi.getOperand(1).getReg();
11058 }
11059}
11060
11061static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
11062 if (!Reg.isVirtual())
11063 return false;
11064 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11065 return MRI.getVRegDef(Reg)->getParent() != BB;
11066}
11067
11068/// If Reg is an induction variable, return true and set some parameters
11069static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11070 MachineInstr *&UpdateInst,
11071 unsigned &UpdateCounterOprNum, Register &InitReg,
11072 bool &IsUpdatePriorComp) {
11073 // Example:
11074 //
11075 // Preheader:
11076 // InitReg = ...
11077 // LoopBB:
11078 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11079 // Reg = COPY Reg0 ; COPY is ignored.
11080 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11081 // ; Reg is the value calculated in the previous
11082 // ; iteration, so IsUpdatePriorComp == false.
11083
11084 if (LoopBB->pred_size() != 2)
11085 return false;
11086 if (!Reg.isVirtual())
11087 return false;
11088 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11089 UpdateInst = nullptr;
11090 UpdateCounterOprNum = 0;
11091 InitReg = 0;
11092 IsUpdatePriorComp = true;
11093 Register CurReg = Reg;
11094 while (true) {
11095 MachineInstr *Def = MRI.getVRegDef(CurReg);
11096 if (Def->getParent() != LoopBB)
11097 return false;
11098 if (Def->isCopy()) {
11099 // Ignore copy instructions unless they contain subregisters
11100 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11101 return false;
11102 CurReg = Def->getOperand(1).getReg();
11103 } else if (Def->isPHI()) {
11104 if (InitReg != 0)
11105 return false;
11106 if (!UpdateInst)
11107 IsUpdatePriorComp = false;
11108 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11109 } else {
11110 if (UpdateInst)
11111 return false;
11112 switch (Def->getOpcode()) {
11113 case AArch64::ADDSXri:
11114 case AArch64::ADDSWri:
11115 case AArch64::SUBSXri:
11116 case AArch64::SUBSWri:
11117 case AArch64::ADDXri:
11118 case AArch64::ADDWri:
11119 case AArch64::SUBXri:
11120 case AArch64::SUBWri:
11121 UpdateInst = Def;
11122 UpdateCounterOprNum = 1;
11123 break;
11124 case AArch64::ADDSXrr:
11125 case AArch64::ADDSWrr:
11126 case AArch64::SUBSXrr:
11127 case AArch64::SUBSWrr:
11128 case AArch64::ADDXrr:
11129 case AArch64::ADDWrr:
11130 case AArch64::SUBXrr:
11131 case AArch64::SUBWrr:
11132 UpdateInst = Def;
11133 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11134 UpdateCounterOprNum = 1;
11135 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11136 UpdateCounterOprNum = 2;
11137 else
11138 return false;
11139 break;
11140 default:
11141 return false;
11142 }
11143 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11144 }
11145
11146 if (!CurReg.isVirtual())
11147 return false;
11148 if (Reg == CurReg)
11149 break;
11150 }
11151
11152 if (!UpdateInst)
11153 return false;
11154
11155 return true;
11156}
11157
11158std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11160 // Accept loops that meet the following conditions
11161 // * The conditional branch is BCC
11162 // * The compare instruction is ADDS/SUBS/WHILEXX
11163 // * One operand of the compare is an induction variable and the other is a
11164 // loop invariant value
11165 // * The induction variable is incremented/decremented by a single instruction
11166 // * Does not contain CALL or instructions which have unmodeled side effects
11167
11168 for (MachineInstr &MI : *LoopBB)
11169 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11170 // This instruction may use NZCV, which interferes with the instruction to
11171 // be inserted for loop control.
11172 return nullptr;
11173
11174 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11176 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11177 return nullptr;
11178
11179 // Infinite loops are not supported
11180 if (TBB == LoopBB && FBB == LoopBB)
11181 return nullptr;
11182
11183 // Must be conditional branch
11184 if (TBB != LoopBB && FBB == nullptr)
11185 return nullptr;
11186
11187 assert((TBB == LoopBB || FBB == LoopBB) &&
11188 "The Loop must be a single-basic-block loop");
11189
11190 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11192
11193 if (CondBranch->getOpcode() != AArch64::Bcc)
11194 return nullptr;
11195
11196 // Normalization for createTripCountGreaterCondition()
11197 if (TBB == LoopBB)
11199
11200 MachineInstr *Comp = nullptr;
11201 unsigned CompCounterOprNum = 0;
11202 for (MachineInstr &MI : reverse(*LoopBB)) {
11203 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11204 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11205 // operands is a loop invariant value
11206
11207 switch (MI.getOpcode()) {
11208 case AArch64::SUBSXri:
11209 case AArch64::SUBSWri:
11210 case AArch64::ADDSXri:
11211 case AArch64::ADDSWri:
11212 Comp = &MI;
11213 CompCounterOprNum = 1;
11214 break;
11215 case AArch64::ADDSWrr:
11216 case AArch64::ADDSXrr:
11217 case AArch64::SUBSWrr:
11218 case AArch64::SUBSXrr:
11219 Comp = &MI;
11220 break;
11221 default:
11222 if (isWhileOpcode(MI.getOpcode())) {
11223 Comp = &MI;
11224 break;
11225 }
11226 return nullptr;
11227 }
11228
11229 if (CompCounterOprNum == 0) {
11230 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11231 CompCounterOprNum = 2;
11232 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11233 CompCounterOprNum = 1;
11234 else
11235 return nullptr;
11236 }
11237 break;
11238 }
11239 }
11240 if (!Comp)
11241 return nullptr;
11242
11243 MachineInstr *Update = nullptr;
11244 Register Init;
11245 bool IsUpdatePriorComp;
11246 unsigned UpdateCounterOprNum;
11247 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11248 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11249 return nullptr;
11250
11251 return std::make_unique<AArch64PipelinerLoopInfo>(
11252 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11253 Init, IsUpdatePriorComp, Cond);
11254}
11255
11256/// verifyInstruction - Perform target specific instruction verification.
11257bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11258 StringRef &ErrInfo) const {
11259
11260 // Verify that immediate offsets on load/store instructions are within range.
11261 // Stack objects with an FI operand are excluded as they can be fixed up
11262 // during PEI.
11263 TypeSize Scale(0U, false), Width(0U, false);
11264 int64_t MinOffset, MaxOffset;
11265 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11266 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11267 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11268 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11269 if (Imm < MinOffset || Imm > MaxOffset) {
11270 ErrInfo = "Unexpected immediate on load/store instruction";
11271 return false;
11272 }
11273 }
11274 }
11275 return true;
11276}
11277
11278#define GET_INSTRINFO_HELPERS
11279#define GET_INSTRMAP_INFO
11280#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
bool isAccumulationOpcode(unsigned Opcode) const override
Returns true if \P Opcode is an instruction which performs accumulation into a destination register.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruction used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
unsigned getAccumulationStartOpcode(unsigned Opcode) const override
Returns an opcode which defines the accumulator used by \P Opcode.
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
unsigned getReduceOpcodeForAccumulator(unsigned int AccumulatorOpCode) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
unsigned getSVEVectorSizeInBits() const
AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod(const MachineFunction &MF) const
Choose a method of checking LR before performing a tail call.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void buildOffset(MCRegister Reg, int64_t Offset) const
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
bool empty() const
Definition: DenseMap.h:119
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:117
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:76
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1565
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:948
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:409
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:584
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:780
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:404
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
MI-level patchpoint operands.
Definition: StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:105
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:102
constexpr bool isValid() const
Definition: Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
Represents a location in source code.
Definition: SMLoc.h:23
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
bool empty() const
Definition: SmallSet.h:169
bool erase(const T &V)
Definition: SmallSet.h:198
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
MI-level stackmap operands.
Definition: StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:50
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:53
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:45
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:43
MI-level Statepoint operands.
Definition: StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
self_iterator getIterator()
Definition: ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:359
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
LocationAtom
Definition: Dwarf.h:137
constexpr double e
Definition: MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:67
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: SPIRVUtils.cpp:976
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ GATHER_LANE_i16
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ GATHER_LANE_i8
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ GATHER_LANE_i32
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:82
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:257
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:577
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:225
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.