LLVM 22.0.0git
AArch64MIPeepholeOpt.cpp
Go to the documentation of this file.
1//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs below peephole optimizations on MIR level.
10//
11// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
12// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
13//
14// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
16//
17// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19//
20// The mov pseudo instruction could be expanded to multiple mov instructions
21// later. In this case, we could try to split the constant operand of mov
22// instruction into two immediates which can be directly encoded into
23// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24// multiple `mov` + `and/add/sub` instructions.
25//
26// 4. Remove redundant ORRWrs which is generated by zero-extend.
27//
28// %3:gpr32 = ORRWrs $wzr, %2, 0
29// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30//
31// If AArch64's 32-bit form of instruction defines the source operand of
32// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33// operand are set to zero.
34//
35// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
37//
38// 6. %intermediate:gpr32 = COPY %src:fpr128
39// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
40// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
41//
42// In cases where a source FPR is copied to a GPR in order to be copied
43// to a destination FPR, we can directly copy the values between the FPRs,
44// eliminating the use of the Integer unit. When we match a pattern of
45// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
46// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
47// instructions.
48//
49// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50// 64-bits. For example,
51//
52// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53// %2:fpr64 = MOVID 0
54// %4:fpr128 = IMPLICIT_DEF
55// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
56// %6:fpr128 = IMPLICIT_DEF
57// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
58// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
59// ==>
60// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61// %6:fpr128 = IMPLICIT_DEF
62// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
63//
64// 8. Remove redundant CSELs that select between identical registers, by
65// replacing them with unconditional moves.
66//
67// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
68// LSR or LSL alias of UBFM.
69//
70//===----------------------------------------------------------------------===//
71
72#include "AArch64ExpandImm.h"
73#include "AArch64InstrInfo.h"
77
78using namespace llvm;
79
80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
81
82namespace {
83
84struct AArch64MIPeepholeOpt : public MachineFunctionPass {
85 static char ID;
86
87 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {}
88
89 const AArch64InstrInfo *TII;
91 MachineLoopInfo *MLI;
93
94 using OpcodePair = std::pair<unsigned, unsigned>;
95 template <typename T>
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
98 using BuildMIFunc =
99 std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
101
102 /// For instructions where an immediate operand could be split into two
103 /// separate immediate instructions, use the splitTwoPartImm two handle the
104 /// optimization.
105 ///
106 /// To implement, the following function types must be passed to
107 /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
108 /// splitting the immediate is valid and returns the associated new opcode. A
109 /// BuildMIFunc must be implemented to build the two immediate instructions.
110 ///
111 /// Example Pattern (where IMM would require 2+ MOV instructions):
112 /// %dst = <Instr>rr %src IMM [...]
113 /// becomes:
114 /// %tmp = <Instr>ri %src (encode half IMM) [...]
115 /// %dst = <Instr>ri %tmp (encode half IMM) [...]
116 template <typename T>
117 bool splitTwoPartImm(MachineInstr &MI,
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
119
120 bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
121 MachineInstr *&SubregToRegMI);
122
123 template <typename T>
124 bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
125 template <typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
127
128 // Strategy used to split logical immediate bitmasks.
129 enum class SplitStrategy {
130 Intersect,
131 Disjoint,
132 };
133 template <typename T>
134 bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
135 SplitStrategy Strategy, unsigned OtherOpc = 0);
136 bool visitORR(MachineInstr &MI);
137 bool visitCSEL(MachineInstr &MI);
138 bool visitINSERT(MachineInstr &MI);
139 bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
140 bool visitINSvi64lane(MachineInstr &MI);
141 bool visitFMOVDr(MachineInstr &MI);
142 bool visitUBFMXri(MachineInstr &MI);
143 bool visitCopy(MachineInstr &MI);
144 bool runOnMachineFunction(MachineFunction &MF) override;
145
146 StringRef getPassName() const override {
147 return "AArch64 MI Peephole Optimization pass";
148 }
149
150 void getAnalysisUsage(AnalysisUsage &AU) const override {
151 AU.setPreservesCFG();
154 }
155};
156
157char AArch64MIPeepholeOpt::ID = 0;
158
159} // end anonymous namespace
160
161INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
162 "AArch64 MI Peephole Optimization", false, false)
163
164template <typename T>
165static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
166 T UImm = static_cast<T>(Imm);
167 assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");
168
169 // The bitmask immediate consists of consecutive ones. Let's say there is
170 // constant 0b00000000001000000000010000000000 which does not consist of
171 // consecutive ones. We can split it in to two bitmask immediate like
172 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
173 // If we do AND with these two bitmask immediate, we can see original one.
175 unsigned HighestBitSet = Log2_64(UImm);
176
177 // Create a mask which is filled with one from the position of lowest bit set
178 // to the position of highest bit set.
179 T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
180 (static_cast<T>(1) << LowestBitSet);
181 // Create a mask which is filled with one outside the position of lowest bit
182 // set and the position of highest bit set.
183 T NewImm2 = UImm | ~NewImm1;
184
185 // If the split value is not valid bitmask immediate, do not split this
186 // constant.
188 return false;
189
192 return true;
193}
194
195template <typename T>
196static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
197 T &Imm2Enc) {
198 assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");
199
200 // Try to split a bitmask of the form 0b00000000011000000000011110000000 into
201 // two disjoint masks such as 0b00000000011000000000000000000000 and
202 // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
203 // new masks match the original mask.
204 unsigned LowestBitSet = llvm::countr_zero(Imm);
205 unsigned LowestGapBitUnset =
207
208 // Create a mask for the least significant group of consecutive ones.
209 assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
210 T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
211 (static_cast<T>(1) << LowestBitSet);
212 // Create a disjoint mask for the remaining ones.
213 T NewImm2 = Imm & ~NewImm1;
214
215 // Do not split if NewImm2 is not a valid bitmask immediate.
217 return false;
218
221 return true;
222}
223
224template <typename T>
225bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
226 SplitStrategy Strategy,
227 unsigned OtherOpc) {
228 // Try below transformations.
229 //
230 // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
231 // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
232 //
233 // The mov pseudo instruction could be expanded to multiple mov instructions
234 // later. Let's try to split the constant operand of mov instruction into two
235 // bitmask immediates based on the given split strategy. It makes only two
236 // logical instructions instead of multiple mov + logic instructions.
237
238 return splitTwoPartImm<T>(
239 MI,
240 [Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
242 // If this immediate is already a suitable bitmask, don't split it.
243 // TODO: Should we just combine the two instructions in this case?
245 return std::nullopt;
246
247 // If this immediate can be handled by one instruction, don't split it.
250 if (Insn.size() == 1)
251 return std::nullopt;
252
253 bool SplitSucc = false;
254 switch (Strategy) {
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
257 break;
258 case SplitStrategy::Disjoint:
259 SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
260 break;
261 }
262 if (SplitSucc)
263 return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
264 return std::nullopt;
265 },
266 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
267 unsigned Imm1, Register SrcReg, Register NewTmpReg,
268 Register NewDstReg) {
269 DebugLoc DL = MI.getDebugLoc();
270 MachineBasicBlock *MBB = MI.getParent();
271 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
272 .addReg(SrcReg)
273 .addImm(Imm0);
274 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
275 .addReg(NewTmpReg)
276 .addImm(Imm1);
277 });
278}
279
280bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
281 // Check this ORR comes from below zero-extend pattern.
282 //
283 // def : Pat<(i64 (zext GPR32:$src)),
284 // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
285 if (MI.getOperand(3).getImm() != 0)
286 return false;
287
288 if (MI.getOperand(1).getReg() != AArch64::WZR)
289 return false;
290
291 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
292 if (!SrcMI)
293 return false;
294
295 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
296 //
297 // When you use the 32-bit form of an instruction, the upper 32 bits of the
298 // source registers are ignored and the upper 32 bits of the destination
299 // register are set to zero.
300 //
301 // If AArch64's 32-bit form of instruction defines the source operand of
302 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
303 // real AArch64 instruction and if it is not, do not process the opcode
304 // conservatively.
305 if (SrcMI->getOpcode() == TargetOpcode::COPY &&
306 SrcMI->getOperand(1).getReg().isVirtual()) {
307 const TargetRegisterClass *RC =
308 MRI->getRegClass(SrcMI->getOperand(1).getReg());
309
310 // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
311 // that the upper bits are zero.
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
315 SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
316 return false;
317 Register CpySrc;
318 if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
319 CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
320 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
321 TII->get(TargetOpcode::COPY), CpySrc)
322 .add(SrcMI->getOperand(1));
323 } else {
324 CpySrc = SrcMI->getOperand(1).getReg();
325 }
326 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
327 TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
328 .addReg(CpySrc);
329 SrcMI->eraseFromParent();
330 }
331 else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
332 return false;
333
334 Register DefReg = MI.getOperand(0).getReg();
335 Register SrcReg = MI.getOperand(2).getReg();
336 MRI->replaceRegWith(DefReg, SrcReg);
337 MRI->clearKillFlags(SrcReg);
338 LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
339 MI.eraseFromParent();
340
341 return true;
342}
343
344bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
345 // Replace CSEL with MOV when both inputs are the same register.
346 if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
347 return false;
348
349 auto ZeroReg =
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
351 auto OrOpcode =
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
353
354 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
355 .addReg(MI.getOperand(0).getReg(), RegState::Define)
356 .addReg(ZeroReg)
357 .addReg(MI.getOperand(1).getReg())
358 .addImm(0);
359
360 MI.eraseFromParent();
361 return true;
362}
363
364bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
365 // Check this INSERT_SUBREG comes from below zero-extend pattern.
366 //
367 // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
368 // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
369 //
370 // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
371 // COPY would destroy the upper part of the register anyway
372 if (!MI.isRegTiedToDefOperand(1))
373 return false;
374
375 Register DstReg = MI.getOperand(0).getReg();
376 const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
377 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
378 if (!SrcMI)
379 return false;
380
381 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
382 //
383 // When you use the 32-bit form of an instruction, the upper 32 bits of the
384 // source registers are ignored and the upper 32 bits of the destination
385 // register are set to zero.
386 //
387 // If AArch64's 32-bit form of instruction defines the source operand of
388 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
389 // real AArch64 instruction and if it is not, do not process the opcode
390 // conservatively.
391 if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
393 return false;
394
395 // Build a SUBREG_TO_REG instruction
396 MachineInstr *SubregMI =
397 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
398 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
399 .addImm(0)
400 .add(MI.getOperand(2))
401 .add(MI.getOperand(3));
402 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
403 (void)SubregMI;
404 MI.eraseFromParent();
405
406 return true;
407}
408
409template <typename T>
410static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
411 // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
412 // imm0 and imm1 are non-zero 12-bit unsigned int.
413 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
414 (Imm & ~static_cast<T>(0xffffff)) != 0)
415 return false;
416
417 // The immediate can not be composed via a single instruction.
420 if (Insn.size() == 1)
421 return false;
422
423 // Split Imm into (Imm0 << 12) + Imm1;
424 Imm0 = (Imm >> 12) & 0xfff;
425 Imm1 = Imm & 0xfff;
426 return true;
427}
428
429template <typename T>
430bool AArch64MIPeepholeOpt::visitADDSUB(
431 unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
432 // Try below transformation.
433 //
434 // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
435 // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
436 //
437 // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
438 // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
439 //
440 // The mov pseudo instruction could be expanded to multiple mov instructions
441 // later. Let's try to split the constant operand of mov instruction into two
442 // legal add/sub immediates. It makes only two ADD/SUB instructions instead of
443 // multiple `mov` + `and/sub` instructions.
444
445 // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
446 // folded. Make sure that we don't generate invalid instructions that use XZR
447 // in those cases.
448 if (MI.getOperand(1).getReg() == AArch64::XZR ||
449 MI.getOperand(1).getReg() == AArch64::WZR)
450 return false;
451
452 return splitTwoPartImm<T>(
453 MI,
454 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
455 T &Imm1) -> std::optional<OpcodePair> {
456 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
457 return std::make_pair(PosOpc, PosOpc);
458 if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
459 return std::make_pair(NegOpc, NegOpc);
460 return std::nullopt;
461 },
462 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
463 unsigned Imm1, Register SrcReg, Register NewTmpReg,
464 Register NewDstReg) {
465 DebugLoc DL = MI.getDebugLoc();
466 MachineBasicBlock *MBB = MI.getParent();
467 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
468 .addReg(SrcReg)
469 .addImm(Imm0)
470 .addImm(12);
471 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
472 .addReg(NewTmpReg)
473 .addImm(Imm1)
474 .addImm(0);
475 });
476}
477
478template <typename T>
479bool AArch64MIPeepholeOpt::visitADDSSUBS(
480 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
481 // Try the same transformation as ADDSUB but with additional requirement
482 // that the condition code usages are only for Equal and Not Equal
483
484 if (MI.getOperand(1).getReg() == AArch64::XZR ||
485 MI.getOperand(1).getReg() == AArch64::WZR)
486 return false;
487
488 return splitTwoPartImm<T>(
489 MI,
490 [PosOpcs, NegOpcs, &MI, &TRI = TRI,
491 &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
492 T &Imm1) -> std::optional<OpcodePair> {
493 OpcodePair OP;
494 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
495 OP = PosOpcs;
496 else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
497 OP = NegOpcs;
498 else
499 return std::nullopt;
500 // Check conditional uses last since it is expensive for scanning
501 // proceeding instructions
502 MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
503 std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
504 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
505 return std::nullopt;
506 return OP;
507 },
508 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
509 unsigned Imm1, Register SrcReg, Register NewTmpReg,
510 Register NewDstReg) {
511 DebugLoc DL = MI.getDebugLoc();
512 MachineBasicBlock *MBB = MI.getParent();
513 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
514 .addReg(SrcReg)
515 .addImm(Imm0)
516 .addImm(12);
517 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
518 .addReg(NewTmpReg)
519 .addImm(Imm1)
520 .addImm(0);
521 });
522}
523
524// Checks if the corresponding MOV immediate instruction is applicable for
525// this peephole optimization.
526bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
527 MachineInstr *&MovMI,
528 MachineInstr *&SubregToRegMI) {
529 // Check whether current MBB is in loop and the AND is loop invariant.
530 MachineBasicBlock *MBB = MI.getParent();
531 MachineLoop *L = MLI->getLoopFor(MBB);
532 if (L && !L->isLoopInvariant(MI))
533 return false;
534
535 // Check whether current MI's operand is MOV with immediate.
536 MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
537 if (!MovMI)
538 return false;
539
540 // If it is SUBREG_TO_REG, check its operand.
541 SubregToRegMI = nullptr;
542 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
543 SubregToRegMI = MovMI;
544 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
545 if (!MovMI)
546 return false;
547 }
548
549 if (MovMI->getOpcode() != AArch64::MOVi32imm &&
550 MovMI->getOpcode() != AArch64::MOVi64imm)
551 return false;
552
553 // If the MOV has multiple uses, do not split the immediate because it causes
554 // more instructions.
555 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
556 return false;
557 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
558 return false;
559
560 // It is OK to perform this peephole optimization.
561 return true;
562}
563
564template <typename T>
565bool AArch64MIPeepholeOpt::splitTwoPartImm(
567 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
568 unsigned RegSize = sizeof(T) * 8;
569 assert((RegSize == 32 || RegSize == 64) &&
570 "Invalid RegSize for legal immediate peephole optimization");
571
572 // Perform several essential checks against current MI.
573 MachineInstr *MovMI, *SubregToRegMI;
574 if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
575 return false;
576
577 // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
578 T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
579 // For the 32 bit form of instruction, the upper 32 bits of the destination
580 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
581 // of Imm to zero. This is essential if the Immediate value was a negative
582 // number since it was sign extended when we assign to the 64-bit Imm.
583 if (SubregToRegMI)
584 Imm &= 0xFFFFFFFF;
585 OpcodePair Opcode;
586 if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
587 Opcode = *R;
588 else
589 return false;
590
591 // Create new MIs using the first and second opcodes. Opcodes might differ for
592 // flag setting operations that should only set flags on second instruction.
593 // NewTmpReg = Opcode.first SrcReg Imm0
594 // NewDstReg = Opcode.second NewTmpReg Imm1
595
596 // Determine register classes for destinations and register operands
597 MachineFunction *MF = MI.getMF();
598 const TargetRegisterClass *FirstInstrDstRC =
599 TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
600 const TargetRegisterClass *FirstInstrOperandRC =
601 TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
602 const TargetRegisterClass *SecondInstrDstRC =
603 (Opcode.first == Opcode.second)
604 ? FirstInstrDstRC
605 : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
606 const TargetRegisterClass *SecondInstrOperandRC =
607 (Opcode.first == Opcode.second)
608 ? FirstInstrOperandRC
609 : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
610
611 // Get old registers destinations and new register destinations
612 Register DstReg = MI.getOperand(0).getReg();
613 Register SrcReg = MI.getOperand(1).getReg();
614 Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
615 // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
616 // reuse that same destination register.
617 Register NewDstReg = DstReg.isVirtual()
618 ? MRI->createVirtualRegister(SecondInstrDstRC)
619 : DstReg;
620
621 // Constrain registers based on their new uses
622 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
623 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
624 if (DstReg != NewDstReg)
625 MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
626
627 // Call the delegating operation to build the instruction
628 BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
629
630 // replaceRegWith changes MI's definition register. Keep it for SSA form until
631 // deleting MI. Only if we made a new destination register.
632 if (DstReg != NewDstReg) {
633 MRI->replaceRegWith(DstReg, NewDstReg);
634 MI.getOperand(0).setReg(DstReg);
635 }
636
637 // Record the MIs need to be removed.
638 MI.eraseFromParent();
639 if (SubregToRegMI)
640 SubregToRegMI->eraseFromParent();
641 MovMI->eraseFromParent();
642
643 return true;
644}
645
646bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
647 // Check if this INSvi[X]gpr comes from COPY of a source FPR128
648 //
649 // From
650 // %intermediate1:gpr64 = COPY %src:fpr128
651 // %intermediate2:gpr32 = COPY %intermediate1:gpr64
652 // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
653 // To
654 // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
655 // src_index
656 // where src_index = 0, X = [8|16|32|64]
657
658 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
659
660 // For a chain of COPY instructions, find the initial source register
661 // and check if it's an FPR128
662 while (true) {
663 if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
664 return false;
665
666 if (!SrcMI->getOperand(1).getReg().isVirtual())
667 return false;
668
669 if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
670 &AArch64::FPR128RegClass) {
671 break;
672 }
673 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
674 }
675
676 Register DstReg = MI.getOperand(0).getReg();
677 Register SrcReg = SrcMI->getOperand(1).getReg();
678 MachineInstr *INSvilaneMI =
679 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
680 .add(MI.getOperand(1))
681 .add(MI.getOperand(2))
682 .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
683 .addImm(0);
684
685 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
686 (void)INSvilaneMI;
687 MI.eraseFromParent();
688 return true;
689}
690
691// All instructions that set a FPR64 will implicitly zero the top bits of the
692// register.
695 if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
696 return false;
697 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
698 if (RC != &AArch64::FPR64RegClass)
699 return false;
700 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
701}
702
703bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
704 // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
705 // We are expecting below case.
706 //
707 // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
708 // %6:fpr128 = IMPLICIT_DEF
709 // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
710 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
711 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
712 if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
713 return false;
714 Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
715 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
716 return false;
717
718 // Check there is `mov 0` MI for high 64-bits.
719 // We are expecting below cases.
720 //
721 // %2:fpr64 = MOVID 0
722 // %4:fpr128 = IMPLICIT_DEF
723 // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
724 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
725 // or
726 // %5:fpr128 = MOVIv2d_ns 0
727 // %6:fpr64 = COPY %5.dsub:fpr128
728 // %8:fpr128 = IMPLICIT_DEF
729 // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
730 // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
731 MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
732 if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
733 return false;
734 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
735 if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
736 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
737 if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
738 High64MI->getOpcode() != AArch64::MOVIv2d_ns))
739 return false;
740 if (High64MI->getOperand(1).getImm() != 0)
741 return false;
742
743 // Let's remove MIs for high 64-bits.
744 Register OldDef = MI.getOperand(0).getReg();
745 Register NewDef = MI.getOperand(1).getReg();
746 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
747 MRI->replaceRegWith(OldDef, NewDef);
748 MI.eraseFromParent();
749
750 return true;
751}
752
753bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
754 // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
755 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
756 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
757 return false;
758
759 // Let's remove MIs for high 64-bits.
760 Register OldDef = MI.getOperand(0).getReg();
761 Register NewDef = MI.getOperand(1).getReg();
762 LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
763 MRI->clearKillFlags(OldDef);
764 MRI->clearKillFlags(NewDef);
765 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
766 MRI->replaceRegWith(OldDef, NewDef);
767 MI.eraseFromParent();
768
769 return true;
770}
771
772bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
773 // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
774 // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
775 int64_t Immr = MI.getOperand(2).getImm();
776 int64_t Imms = MI.getOperand(3).getImm();
777
778 bool IsLSR = Imms == 31 && Immr <= Imms;
779 bool IsLSL = Immr == Imms + 33;
780 if (!IsLSR && !IsLSL)
781 return false;
782
783 if (IsLSL) {
784 Immr -= 32;
785 }
786
787 const TargetRegisterClass *DstRC64 =
788 TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF());
789 const TargetRegisterClass *DstRC32 =
790 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
791 assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
792 "sub_32 subregister class");
793
794 const TargetRegisterClass *SrcRC64 =
795 TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF());
796 const TargetRegisterClass *SrcRC32 =
797 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
798 assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
799 "subregister class");
800
801 Register DstReg64 = MI.getOperand(0).getReg();
802 Register DstReg32 = MRI->createVirtualRegister(DstRC32);
803 Register SrcReg64 = MI.getOperand(1).getReg();
804 Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);
805
806 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),
807 SrcReg32)
808 .addReg(SrcReg64, 0, AArch64::sub_32);
809 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),
810 DstReg32)
811 .addReg(SrcReg32)
812 .addImm(Immr)
813 .addImm(Imms);
814 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
815 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
816 .addImm(0)
817 .addReg(DstReg32)
818 .addImm(AArch64::sub_32);
819 MI.eraseFromParent();
820 return true;
821}
822
823// Across a basic-block we might have in i32 extract from a value that only
824// operates on upper bits (for example a sxtw). We can replace the COPY with a
825// new version skipping the sxtw.
826bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
827 Register InputReg = MI.getOperand(1).getReg();
828 if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
829 !MRI->hasOneNonDBGUse(InputReg))
830 return false;
831
832 MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
834 DeadInstrs.insert(SrcMI);
835 while (SrcMI && SrcMI->isFullCopy() &&
836 MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
837 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
838 DeadInstrs.insert(SrcMI);
839 }
840
841 if (!SrcMI)
842 return false;
843
844 // Look for SXTW(X) and return Reg.
845 auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
846 if (SrcMI->getOpcode() != AArch64::SBFMXri ||
847 SrcMI->getOperand(2).getImm() != 0 ||
848 SrcMI->getOperand(3).getImm() != 31)
849 return AArch64::NoRegister;
850 return SrcMI->getOperand(1).getReg();
851 };
852 // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
853 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
854 if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
855 SrcMI->getOperand(3).getImm() != AArch64::sub_32 ||
856 !MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg()))
857 return AArch64::NoRegister;
858 MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg());
859 if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
860 Orr->getOperand(1).getReg() != AArch64::WZR ||
861 !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))
862 return AArch64::NoRegister;
863 MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());
864 if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
865 Cpy->getOperand(1).getSubReg() != AArch64::sub_32)
866 return AArch64::NoRegister;
867 DeadInstrs.insert(Orr);
868 return Cpy->getOperand(1).getReg();
869 };
870
871 Register SrcReg = getSXTWSrcReg(SrcMI);
872 if (!SrcReg)
873 SrcReg = getUXTWSrcReg(SrcMI);
874 if (!SrcReg)
875 return false;
876
877 MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
878 LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
879 MI.getOperand(1).setReg(SrcReg);
880 LLVM_DEBUG(dbgs() << " to: " << MI);
881 for (auto *DeadMI : DeadInstrs) {
882 LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI);
883 DeadMI->eraseFromParent();
884 }
885 return true;
886}
887
888bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
889 if (skipFunction(MF.getFunction()))
890 return false;
891
892 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
893 TRI = static_cast<const AArch64RegisterInfo *>(
895 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
896 MRI = &MF.getRegInfo();
897
898 assert(MRI->isSSA() && "Expected to be run on SSA form!");
899
900 bool Changed = false;
901
902 for (MachineBasicBlock &MBB : MF) {
904 switch (MI.getOpcode()) {
905 default:
906 break;
907 case AArch64::INSERT_SUBREG:
908 Changed |= visitINSERT(MI);
909 break;
910 case AArch64::ANDWrr:
911 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri, MI,
912 SplitStrategy::Intersect);
913 break;
914 case AArch64::ANDXrr:
915 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri, MI,
916 SplitStrategy::Intersect);
917 break;
918 case AArch64::ANDSWrr:
919 Changed |= trySplitLogicalImm<uint32_t>(
920 AArch64::ANDWri, MI, SplitStrategy::Intersect, AArch64::ANDSWri);
921 break;
922 case AArch64::ANDSXrr:
923 Changed |= trySplitLogicalImm<uint64_t>(
924 AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
925 break;
926 case AArch64::EORWrr:
927 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
928 SplitStrategy::Disjoint);
929 break;
930 case AArch64::EORXrr:
931 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
932 SplitStrategy::Disjoint);
933 break;
934 case AArch64::ORRWrr:
935 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
936 SplitStrategy::Disjoint);
937 break;
938 case AArch64::ORRXrr:
939 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
940 SplitStrategy::Disjoint);
941 break;
942 case AArch64::ORRWrs:
943 Changed |= visitORR(MI);
944 break;
945 case AArch64::ADDWrr:
946 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
947 break;
948 case AArch64::SUBWrr:
949 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
950 break;
951 case AArch64::ADDXrr:
952 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
953 break;
954 case AArch64::SUBXrr:
955 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
956 break;
957 case AArch64::ADDSWrr:
958 Changed |=
959 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
960 {AArch64::SUBWri, AArch64::SUBSWri}, MI);
961 break;
962 case AArch64::SUBSWrr:
963 Changed |=
964 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
965 {AArch64::ADDWri, AArch64::ADDSWri}, MI);
966 break;
967 case AArch64::ADDSXrr:
968 Changed |=
969 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
970 {AArch64::SUBXri, AArch64::SUBSXri}, MI);
971 break;
972 case AArch64::SUBSXrr:
973 Changed |=
974 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
975 {AArch64::ADDXri, AArch64::ADDSXri}, MI);
976 break;
977 case AArch64::CSELWr:
978 case AArch64::CSELXr:
979 Changed |= visitCSEL(MI);
980 break;
981 case AArch64::INSvi64gpr:
982 Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
983 break;
984 case AArch64::INSvi32gpr:
985 Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
986 break;
987 case AArch64::INSvi16gpr:
988 Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
989 break;
990 case AArch64::INSvi8gpr:
991 Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
992 break;
993 case AArch64::INSvi64lane:
994 Changed |= visitINSvi64lane(MI);
995 break;
996 case AArch64::FMOVDr:
997 Changed |= visitFMOVDr(MI);
998 break;
999 case AArch64::UBFMXri:
1000 Changed |= visitUBFMXri(MI);
1001 break;
1002 case AArch64::COPY:
1003 Changed |= visitCopy(MI);
1004 break;
1005 }
1006 }
1007 }
1008
1009 return Changed;
1010}
1011
1013 return new AArch64MIPeepholeOpt();
1014}
unsigned const MachineRegisterInfo * MRI
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
unsigned HighestBitSet
unsigned T T & Imm2Enc
unsigned T & Imm1Enc
unsigned RegSize
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
unsigned LowestBitSet
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
#define OP(OPC)
Definition: Instruction.h:46
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
A debug info location.
Definition: DebugLoc.h:124
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359
bool isFullCopy() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
unsigned getSubReg() const
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
size_t size() const
Definition: SmallVector.h:79
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.