LLVM: lib/Target/AArch64/AArch64MIPeepholeOpt.cpp Source File

//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass performs below peephole optimizations on MIR level.

//

// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri

//    MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri

//

// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi

//    MOVi64imm + ADDXrr ==> ADDXri + ADDXri

//

// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi

//    MOVi64imm + SUBXrr ==> SUBXri + SUBXri

//

//    The mov pseudo instruction could be expanded to multiple mov instructions

//    later. In this case, we could try to split the constant  operand of mov

//    instruction into two immediates which can be directly encoded into

//    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of

//    multiple `mov` + `and/add/sub` instructions.

//

// 4. Remove redundant ORRWrs which is generated by zero-extend.

//

//    %3:gpr32 = ORRWrs $wzr, %2, 0

//    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32

//

//    If AArch64's 32-bit form of instruction defines the source operand of

//    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source

//    operand are set to zero.

//

// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx

//     ==> %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx

//

// 6. %intermediate:gpr32 = COPY %src:fpr128

//    %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32

//     ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0

//

//    In cases where a source FPR is copied to a GPR in order to be copied

//    to a destination FPR, we can directly copy the values between the FPRs,

//    eliminating the use of the Integer unit. When we match a pattern of

//    INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR

//    source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr

//    instructions.

//

// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high

//    64-bits. For example,

//

//   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr

//   %2:fpr64 = MOVID 0

//   %4:fpr128 = IMPLICIT_DEF

//   %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub

//   %6:fpr128 = IMPLICIT_DEF

//   %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub

//   %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0

//   ==>

//   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr

//   %6:fpr128 = IMPLICIT_DEF

//   %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub

//

// 8. Remove redundant CSELs that select between identical registers, by

//    replacing them with unconditional moves.

//

// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit

//    LSR or LSL alias of UBFM.

//

//===----------------------------------------------------------------------===//


#include "AArch64ExpandImm.h"

#include "AArch64InstrInfo.h"

#include "MCTargetDesc/AArch64AddressingModes.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineLoopInfo.h"


using namespace llvm;


#define DEBUG_TYPE "aarch64-mi-peephole-opt"


namespace {


struct AArch64MIPeepholeOpt : public MachineFunctionPass {

  static char ID;


  AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {}


  const AArch64InstrInfo *TII;

  const AArch64RegisterInfo *TRI;

  MachineLoopInfo *MLI;

  MachineRegisterInfo *MRI;


  using OpcodePair = std::pair<unsigned, unsigned>;

  template <typename T>

  using SplitAndOpcFunc =

      std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;

  using BuildMIFunc =

      std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,

                         Register, Register, Register)>;


  /// For instructions where an immediate operand could be split into two

  /// separate immediate instructions, use the splitTwoPartImm two handle the

  /// optimization.

  ///

  /// To implement, the following function types must be passed to

  /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if

  /// splitting the immediate is valid and returns the associated new opcode. A

  /// BuildMIFunc must be implemented to build the two immediate instructions.

  ///

  /// Example Pattern (where IMM would require 2+ MOV instructions):

  ///     %dst = <Instr>rr %src IMM [...]

  /// becomes:

  ///     %tmp = <Instr>ri %src (encode half IMM) [...]

  ///     %dst = <Instr>ri %tmp (encode half IMM) [...]

  template <typename T>

  bool splitTwoPartImm(MachineInstr &MI,

                       SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);


  bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,

                        MachineInstr *&SubregToRegMI);


  template <typename T>

  bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);

  template <typename T>

  bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);


  // Strategy used to split logical immediate bitmasks.

  enum class SplitStrategy {

    Intersect,

    Disjoint,

  };

  template <typename T>

  bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,

                          SplitStrategy Strategy, unsigned OtherOpc = 0);

  bool visitORR(MachineInstr &MI);

  bool visitCSEL(MachineInstr &MI);

  bool visitINSERT(MachineInstr &MI);

  bool visitINSviGPR(MachineInstr &MI, unsigned Opc);

  bool visitINSvi64lane(MachineInstr &MI);

  bool visitFMOVDr(MachineInstr &MI);

  bool visitUBFMXri(MachineInstr &MI);

  bool visitCopy(MachineInstr &MI);

  bool runOnMachineFunction(MachineFunction &MF) override;


  StringRef getPassName() const override {

    return "AArch64 MI Peephole Optimization pass";

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    AU.addRequired<MachineLoopInfoWrapperPass>();

    MachineFunctionPass::getAnalysisUsage(AU);

  }

};


char AArch64MIPeepholeOpt::ID = 0;


} // end anonymous namespace


INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",

                "AArch64 MI Peephole Optimization", false, false)


template <typename T>

static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {

  T UImm = static_cast<T>(Imm);

  assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");


  // The bitmask immediate consists of consecutive ones.  Let's say there is

  // constant 0b00000000001000000000010000000000 which does not consist of

  // consecutive ones. We can split it in to two bitmask immediate like

  // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.

  // If we do AND with these two bitmask immediate, we can see original one.

  unsigned LowestBitSet = llvm::countr_zero(UImm);

  unsigned HighestBitSet = Log2_64(UImm);


  // Create a mask which is filled with one from the position of lowest bit set

  // to the position of highest bit set.

  T NewImm1 = (static_cast<T>(2) << HighestBitSet) -

              (static_cast<T>(1) << LowestBitSet);

  // Create a mask which is filled with one outside the position of lowest bit

  // set and the position of highest bit set.

  T NewImm2 = UImm | ~NewImm1;


  // If the split value is not valid bitmask immediate, do not split this

  // constant.

  if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))

    return false;


  Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);

  Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);

  return true;

}


template <typename T>

static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,

                                    T &Imm2Enc) {

  assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");


  // Try to split a bitmask of the form 0b00000000011000000000011110000000 into

  // two disjoint masks such as 0b00000000011000000000000000000000 and

  // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the

  // new masks match the original mask.

  unsigned LowestBitSet = llvm::countr_zero(Imm);

  unsigned LowestGapBitUnset =

      LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);


  // Create a mask for the least significant group of consecutive ones.

  assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");

  T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -

              (static_cast<T>(1) << LowestBitSet);

  // Create a disjoint mask for the remaining ones.

  T NewImm2 = Imm & ~NewImm1;


  // Do not split if NewImm2 is not a valid bitmask immediate.

  if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))

    return false;


  Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);

  Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);

  return true;

}


template <typename T>

bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,

                                              SplitStrategy Strategy,

                                              unsigned OtherOpc) {

  // Try below transformations.

  //

  // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri

  // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri

  //

  // The mov pseudo instruction could be expanded to multiple mov instructions

  // later. Let's try to split the constant operand of mov instruction into two

  // bitmask immediates based on the given split strategy. It makes only two

  // logical instructions instead of multiple mov + logic instructions.


  return splitTwoPartImm<T>(

      MI,

      [Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0,

                                T &Imm1) -> std::optional<OpcodePair> {

        // If this immediate is already a suitable bitmask, don't split it.

        // TODO: Should we just combine the two instructions in this case?

        if (AArch64_AM::isLogicalImmediate(Imm, RegSize))

          return std::nullopt;


        // If this immediate can be handled by one instruction, don't split it.

        SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;

        AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);

        if (Insn.size() == 1)

          return std::nullopt;


        bool SplitSucc = false;

        switch (Strategy) {

        case SplitStrategy::Intersect:

          SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);

          break;

        case SplitStrategy::Disjoint:

          SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);

          break;

        }

        if (SplitSucc)

          return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);

        return std::nullopt;

      },

      [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,

                   unsigned Imm1, Register SrcReg, Register NewTmpReg,

                   Register NewDstReg) {

        DebugLoc DL = MI.getDebugLoc();

        MachineBasicBlock *MBB = MI.getParent();

        BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)

            .addReg(SrcReg)

            .addImm(Imm0);

        BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)

            .addReg(NewTmpReg)

            .addImm(Imm1);

      });

}


bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {

  // Check this ORR comes from below zero-extend pattern.

  //

  // def : Pat<(i64 (zext GPR32:$src)),

  //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;

  if (MI.getOperand(3).getImm() != 0)

    return false;


  if (MI.getOperand(1).getReg() != AArch64::WZR)

    return false;


  MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());

  if (!SrcMI)

    return false;


  // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC

  //

  // When you use the 32-bit form of an instruction, the upper 32 bits of the

  // source registers are ignored and the upper 32 bits of the destination

  // register are set to zero.

  //

  // If AArch64's 32-bit form of instruction defines the source operand of

  // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is

  // real AArch64 instruction and if it is not, do not process the opcode

  // conservatively.

  if (SrcMI->getOpcode() == TargetOpcode::COPY &&

      SrcMI->getOperand(1).getReg().isVirtual()) {

    const TargetRegisterClass *RC =

        MRI->getRegClass(SrcMI->getOperand(1).getReg());


    // A COPY from an FPR will become a FMOVSWr, so do so now so that we know

    // that the upper bits are zero.

    if (RC != &AArch64::FPR32RegClass &&

        ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&

          RC != &AArch64::ZPRRegClass) ||

         SrcMI->getOperand(1).getSubReg() != AArch64::ssub))

      return false;

    Register CpySrc;

    if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {

      CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);

      BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),

              TII->get(TargetOpcode::COPY), CpySrc)

          .add(SrcMI->getOperand(1));

    } else {

      CpySrc = SrcMI->getOperand(1).getReg();

    }

    BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),

            TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())

        .addReg(CpySrc);

    SrcMI->eraseFromParent();

  }

  else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)

    return false;


  Register DefReg = MI.getOperand(0).getReg();

  Register SrcReg = MI.getOperand(2).getReg();

  MRI->replaceRegWith(DefReg, SrcReg);

  MRI->clearKillFlags(SrcReg);

  LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");

  MI.eraseFromParent();


  return true;

}


bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {

  // Replace CSEL with MOV when both inputs are the same register.

  if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())

    return false;


  auto ZeroReg =

      MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;

  auto OrOpcode =

      MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;


  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))

      .addReg(MI.getOperand(0).getReg(), RegState::Define)

      .addReg(ZeroReg)

      .addReg(MI.getOperand(1).getReg())

      .addImm(0);


  MI.eraseFromParent();

  return true;

}


bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {

  // Check this INSERT_SUBREG comes from below zero-extend pattern.

  //

  // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx

  // To   %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx

  //

  // We're assuming the first operand to INSERT_SUBREG is irrelevant because a

  // COPY would destroy the upper part of the register anyway

  if (!MI.isRegTiedToDefOperand(1))

    return false;


  Register DstReg = MI.getOperand(0).getReg();

  const TargetRegisterClass *RC = MRI->getRegClass(DstReg);

  MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());

  if (!SrcMI)

    return false;


  // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC

  //

  // When you use the 32-bit form of an instruction, the upper 32 bits of the

  // source registers are ignored and the upper 32 bits of the destination

  // register are set to zero.

  //

  // If AArch64's 32-bit form of instruction defines the source operand of

  // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is

  // real AArch64 instruction and if it is not, do not process the opcode

  // conservatively.

  if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||

      !AArch64::GPR64allRegClass.hasSubClassEq(RC))

    return false;


  // Build a SUBREG_TO_REG instruction

  MachineInstr *SubregMI =

      BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),

              TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)

          .addImm(0)

          .add(MI.getOperand(2))

          .add(MI.getOperand(3));

  LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *SubregMI << "\n");

  (void)SubregMI;

  MI.eraseFromParent();


  return true;

}


template <typename T>

static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {

  // The immediate must be in the form of ((imm0 << 12) + imm1), in which both

  // imm0 and imm1 are non-zero 12-bit unsigned int.

  if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||

      (Imm & ~static_cast<T>(0xffffff)) != 0)

    return false;


  // The immediate can not be composed via a single instruction.

  SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;

  AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);

  if (Insn.size() == 1)

    return false;


  // Split Imm into (Imm0 << 12) + Imm1;

  Imm0 = (Imm >> 12) & 0xfff;

  Imm1 = Imm & 0xfff;

  return true;

}


template <typename T>

bool AArch64MIPeepholeOpt::visitADDSUB(

    unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {

  // Try below transformation.

  //

  // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri

  // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri

  //

  // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri

  // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri

  //

  // The mov pseudo instruction could be expanded to multiple mov instructions

  // later. Let's try to split the constant operand of mov instruction into two

  // legal add/sub immediates. It makes only two ADD/SUB instructions instead of

  // multiple `mov` + `and/sub` instructions.


  // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant

  // folded. Make sure that we don't generate invalid instructions that use XZR

  // in those cases.

  if (MI.getOperand(1).getReg() == AArch64::XZR ||

      MI.getOperand(1).getReg() == AArch64::WZR)

    return false;


  return splitTwoPartImm<T>(

      MI,

      [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,

                       T &Imm1) -> std::optional<OpcodePair> {

        if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))

          return std::make_pair(PosOpc, PosOpc);

        if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))

          return std::make_pair(NegOpc, NegOpc);

        return std::nullopt;

      },

      [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,

                   unsigned Imm1, Register SrcReg, Register NewTmpReg,

                   Register NewDstReg) {

        DebugLoc DL = MI.getDebugLoc();

        MachineBasicBlock *MBB = MI.getParent();

        BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)

            .addReg(SrcReg)

            .addImm(Imm0)

            .addImm(12);

        BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)

            .addReg(NewTmpReg)

            .addImm(Imm1)

            .addImm(0);

      });

}


template <typename T>

bool AArch64MIPeepholeOpt::visitADDSSUBS(

    OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {

  // Try the same transformation as ADDSUB but with additional requirement

  // that the condition code usages are only for Equal and Not Equal


  if (MI.getOperand(1).getReg() == AArch64::XZR ||

      MI.getOperand(1).getReg() == AArch64::WZR)

    return false;


  return splitTwoPartImm<T>(

      MI,

      [PosOpcs, NegOpcs, &MI, &TRI = TRI,

       &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,

                   T &Imm1) -> std::optional<OpcodePair> {

        OpcodePair OP;

        if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))

          OP = PosOpcs;

        else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))

          OP = NegOpcs;

        else

          return std::nullopt;

        // Check conditional uses last since it is expensive for scanning

        // proceeding instructions

        MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());

        std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);

        if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)

          return std::nullopt;

        return OP;

      },

      [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,

                   unsigned Imm1, Register SrcReg, Register NewTmpReg,

                   Register NewDstReg) {

        DebugLoc DL = MI.getDebugLoc();

        MachineBasicBlock *MBB = MI.getParent();

        BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)

            .addReg(SrcReg)

            .addImm(Imm0)

            .addImm(12);

        BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)

            .addReg(NewTmpReg)

            .addImm(Imm1)

            .addImm(0);

      });

}


// Checks if the corresponding MOV immediate instruction is applicable for

// this peephole optimization.

bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,

                                            MachineInstr *&MovMI,

                                            MachineInstr *&SubregToRegMI) {

  // Check whether current MBB is in loop and the AND is loop invariant.

  MachineBasicBlock *MBB = MI.getParent();

  MachineLoop *L = MLI->getLoopFor(MBB);

  if (L && !L->isLoopInvariant(MI))

    return false;


  // Check whether current MI's operand is MOV with immediate.

  MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());

  if (!MovMI)

    return false;


  // If it is SUBREG_TO_REG, check its operand.

  SubregToRegMI = nullptr;

  if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {

    SubregToRegMI = MovMI;

    MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());

    if (!MovMI)

      return false;

  }


  if (MovMI->getOpcode() != AArch64::MOVi32imm &&

      MovMI->getOpcode() != AArch64::MOVi64imm)

    return false;


  // If the MOV has multiple uses, do not split the immediate because it causes

  // more instructions.

  if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))

    return false;

  if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))

    return false;


  // It is OK to perform this peephole optimization.

  return true;

}


template <typename T>

bool AArch64MIPeepholeOpt::splitTwoPartImm(

    MachineInstr &MI,

    SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {

  unsigned RegSize = sizeof(T) * 8;

  assert((RegSize == 32 || RegSize == 64) &&

         "Invalid RegSize for legal immediate peephole optimization");


  // Perform several essential checks against current MI.

  MachineInstr *MovMI, *SubregToRegMI;

  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))

    return false;


  // Split the immediate to Imm0 and Imm1, and calculate the Opcode.

  T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;

  // For the 32 bit form of instruction, the upper 32 bits of the destination

  // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits

  // of Imm to zero. This is essential if the Immediate value was a negative

  // number since it was sign extended when we assign to the 64-bit Imm.

  if (SubregToRegMI)

    Imm &= 0xFFFFFFFF;

  OpcodePair Opcode;

  if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))

    Opcode = *R;

  else

    return false;


  // Create new MIs using the first and second opcodes. Opcodes might differ for

  // flag setting operations that should only set flags on second instruction.

  // NewTmpReg = Opcode.first SrcReg Imm0

  // NewDstReg = Opcode.second NewTmpReg Imm1


  // Determine register classes for destinations and register operands

  MachineFunction *MF = MI.getMF();

  const TargetRegisterClass *FirstInstrDstRC =

      TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);

  const TargetRegisterClass *FirstInstrOperandRC =

      TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);

  const TargetRegisterClass *SecondInstrDstRC =

      (Opcode.first == Opcode.second)

          ? FirstInstrDstRC

          : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);

  const TargetRegisterClass *SecondInstrOperandRC =

      (Opcode.first == Opcode.second)

          ? FirstInstrOperandRC

          : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);


  // Get old registers destinations and new register destinations

  Register DstReg = MI.getOperand(0).getReg();

  Register SrcReg = MI.getOperand(1).getReg();

  Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);

  // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to

  // reuse that same destination register.

  Register NewDstReg = DstReg.isVirtual()

                           ? MRI->createVirtualRegister(SecondInstrDstRC)

                           : DstReg;


  // Constrain registers based on their new uses

  MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);

  MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);

  if (DstReg != NewDstReg)

    MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));


  // Call the delegating operation to build the instruction

  BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);


  // replaceRegWith changes MI's definition register. Keep it for SSA form until

  // deleting MI. Only if we made a new destination register.

  if (DstReg != NewDstReg) {

    MRI->replaceRegWith(DstReg, NewDstReg);

    MI.getOperand(0).setReg(DstReg);

  }


  // Record the MIs need to be removed.

  MI.eraseFromParent();

  if (SubregToRegMI)

    SubregToRegMI->eraseFromParent();

  MovMI->eraseFromParent();


  return true;

}


bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {

  // Check if this INSvi[X]gpr comes from COPY of a source FPR128

  //

  // From

  //  %intermediate1:gpr64 = COPY %src:fpr128

  //  %intermediate2:gpr32 = COPY %intermediate1:gpr64

  //  %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32

  // To

  //  %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,

  //  src_index

  // where src_index = 0, X = [8|16|32|64]


  MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());


  // For a chain of COPY instructions, find the initial source register

  // and check if it's an FPR128

  while (true) {

    if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)

      return false;


    if (!SrcMI->getOperand(1).getReg().isVirtual())

      return false;


    if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==

        &AArch64::FPR128RegClass) {

      break;

    }

    SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());

  }


  Register DstReg = MI.getOperand(0).getReg();

  Register SrcReg = SrcMI->getOperand(1).getReg();

  MachineInstr *INSvilaneMI =

      BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)

          .add(MI.getOperand(1))

          .add(MI.getOperand(2))

          .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))

          .addImm(0);


  LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *INSvilaneMI << "\n");

  (void)INSvilaneMI;

  MI.eraseFromParent();

  return true;

}


// All instructions that set a FPR64 will implicitly zero the top bits of the

// register.

static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,

                                        MachineRegisterInfo *MRI) {

  if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())

    return false;

  const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());

  if (RC != &AArch64::FPR64RegClass)

    return false;

  return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;

}


bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {

  // Check the MI for low 64-bits sets zero for high 64-bits implicitly.

  // We are expecting below case.

  //

  //  %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr

  //  %6:fpr128 = IMPLICIT_DEF

  //  %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub

  //  %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0

  MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());

  if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)

    return false;

  Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());

  if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))

    return false;


  // Check there is `mov 0` MI for high 64-bits.

  // We are expecting below cases.

  //

  //  %2:fpr64 = MOVID 0

  //  %4:fpr128 = IMPLICIT_DEF

  //  %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub

  //  %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0

  // or

  //  %5:fpr128 = MOVIv2d_ns 0

  //  %6:fpr64 = COPY %5.dsub:fpr128

  //  %8:fpr128 = IMPLICIT_DEF

  //  %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub

  //  %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0

  MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());

  if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)

    return false;

  High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());

  if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)

    High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());

  if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&

                    High64MI->getOpcode() != AArch64::MOVIv2d_ns))

    return false;

  if (High64MI->getOperand(1).getImm() != 0)

    return false;


  // Let's remove MIs for high 64-bits.

  Register OldDef = MI.getOperand(0).getReg();

  Register NewDef = MI.getOperand(1).getReg();

  MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));

  MRI->replaceRegWith(OldDef, NewDef);

  MI.eraseFromParent();


  return true;

}


bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {

  // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.

  MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());

  if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))

    return false;


  // Let's remove MIs for high 64-bits.

  Register OldDef = MI.getOperand(0).getReg();

  Register NewDef = MI.getOperand(1).getReg();

  LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");

  MRI->clearKillFlags(OldDef);

  MRI->clearKillFlags(NewDef);

  MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));

  MRI->replaceRegWith(OldDef, NewDef);

  MI.eraseFromParent();


  return true;

}


bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {

  // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of

  // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.

  int64_t Immr = MI.getOperand(2).getImm();

  int64_t Imms = MI.getOperand(3).getImm();


  bool IsLSR = Imms == 31 && Immr <= Imms;

  bool IsLSL = Immr == Imms + 33;

  if (!IsLSR && !IsLSL)

    return false;


  if (IsLSL) {

    Immr -= 32;

  }


  const TargetRegisterClass *DstRC64 =

      TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF());

  const TargetRegisterClass *DstRC32 =

      TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);

  assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "

                    "sub_32 subregister class");


  const TargetRegisterClass *SrcRC64 =

      TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF());

  const TargetRegisterClass *SrcRC32 =

      TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);

  assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "

                    "subregister class");


  Register DstReg64 = MI.getOperand(0).getReg();

  Register DstReg32 = MRI->createVirtualRegister(DstRC32);

  Register SrcReg64 = MI.getOperand(1).getReg();

  Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);


  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),

          SrcReg32)

      .addReg(SrcReg64, 0, AArch64::sub_32);

  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),

          DstReg32)

      .addReg(SrcReg32)

      .addImm(Immr)

      .addImm(Imms);

  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),

          TII->get(AArch64::SUBREG_TO_REG), DstReg64)

      .addImm(0)

      .addReg(DstReg32)

      .addImm(AArch64::sub_32);

  MI.eraseFromParent();

  return true;

}


// Across a basic-block we might have in i32 extract from a value that only

// operates on upper bits (for example a sxtw). We can replace the COPY with a

// new version skipping the sxtw.

bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {

  Register InputReg = MI.getOperand(1).getReg();

  if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||

      !MRI->hasOneNonDBGUse(InputReg))

    return false;


  MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);

  SmallPtrSet<MachineInstr *, 4> DeadInstrs;

  DeadInstrs.insert(SrcMI);

  while (SrcMI && SrcMI->isFullCopy() &&

         MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {

    SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());

    DeadInstrs.insert(SrcMI);

  }


  if (!SrcMI)

    return false;


  // Look for SXTW(X) and return Reg.

  auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {

    if (SrcMI->getOpcode() != AArch64::SBFMXri ||

        SrcMI->getOperand(2).getImm() != 0 ||

        SrcMI->getOperand(3).getImm() != 31)

      return AArch64::NoRegister;

    return SrcMI->getOperand(1).getReg();

  };

  // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))

  auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {

    if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||

        SrcMI->getOperand(3).getImm() != AArch64::sub_32 ||

        !MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg()))

      return AArch64::NoRegister;

    MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg());

    if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||

        Orr->getOperand(1).getReg() != AArch64::WZR ||

        !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))

      return AArch64::NoRegister;

    MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());

    if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||

        Cpy->getOperand(1).getSubReg() != AArch64::sub_32)

      return AArch64::NoRegister;

    DeadInstrs.insert(Orr);

    return Cpy->getOperand(1).getReg();

  };


  Register SrcReg = getSXTWSrcReg(SrcMI);

  if (!SrcReg)

    SrcReg = getUXTWSrcReg(SrcMI);

  if (!SrcReg)

    return false;


  MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));

  LLVM_DEBUG(dbgs() << "Optimizing: " << MI);

  MI.getOperand(1).setReg(SrcReg);

  LLVM_DEBUG(dbgs() << "        to: " << MI);

  for (auto *DeadMI : DeadInstrs) {

    LLVM_DEBUG(dbgs() << "  Removing: " << *DeadMI);

    DeadMI->eraseFromParent();

  }

  return true;

}


bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {

  if (skipFunction(MF.getFunction()))

    return false;


  TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());

  TRI = static_cast<const AArch64RegisterInfo *>(

      MF.getSubtarget().getRegisterInfo());

  MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();

  MRI = &MF.getRegInfo();


  assert(MRI->isSSA() && "Expected to be run on SSA form!");


  bool Changed = false;


  for (MachineBasicBlock &MBB : MF) {

    for (MachineInstr &MI : make_early_inc_range(MBB)) {

      switch (MI.getOpcode()) {

      default:

        break;

      case AArch64::INSERT_SUBREG:

        Changed |= visitINSERT(MI);

        break;

      case AArch64::ANDWrr:

        Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri, MI,

                                                SplitStrategy::Intersect);

        break;

      case AArch64::ANDXrr:

        Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri, MI,

                                                SplitStrategy::Intersect);

        break;

      case AArch64::ANDSWrr:

        Changed |= trySplitLogicalImm<uint32_t>(

            AArch64::ANDWri, MI, SplitStrategy::Intersect, AArch64::ANDSWri);

        break;

      case AArch64::ANDSXrr:

        Changed |= trySplitLogicalImm<uint64_t>(

            AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);

        break;

      case AArch64::EORWrr:

        Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,

                                                SplitStrategy::Disjoint);

        break;

      case AArch64::EORXrr:

        Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,

                                                SplitStrategy::Disjoint);

        break;

      case AArch64::ORRWrr:

        Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,

                                                SplitStrategy::Disjoint);

        break;

      case AArch64::ORRXrr:

        Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,

                                                SplitStrategy::Disjoint);

        break;

      case AArch64::ORRWrs:

        Changed |= visitORR(MI);

        break;

      case AArch64::ADDWrr:

        Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);

        break;

      case AArch64::SUBWrr:

        Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);

        break;

      case AArch64::ADDXrr:

        Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);

        break;

      case AArch64::SUBXrr:

        Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);

        break;

      case AArch64::ADDSWrr:

        Changed |=

            visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},

                                    {AArch64::SUBWri, AArch64::SUBSWri}, MI);

        break;

      case AArch64::SUBSWrr:

        Changed |=

            visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},

                                    {AArch64::ADDWri, AArch64::ADDSWri}, MI);

        break;

      case AArch64::ADDSXrr:

        Changed |=

            visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},

                                    {AArch64::SUBXri, AArch64::SUBSXri}, MI);

        break;

      case AArch64::SUBSXrr:

        Changed |=

            visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},

                                    {AArch64::ADDXri, AArch64::ADDSXri}, MI);

        break;

      case AArch64::CSELWr:

      case AArch64::CSELXr:

        Changed |= visitCSEL(MI);

        break;

      case AArch64::INSvi64gpr:

        Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);

        break;

      case AArch64::INSvi32gpr:

        Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);

        break;

      case AArch64::INSvi16gpr:

        Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);

        break;

      case AArch64::INSvi8gpr:

        Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);

        break;

      case AArch64::INSvi64lane:

        Changed |= visitINSvi64lane(MI);

        break;

      case AArch64::FMOVDr:

        Changed |= visitFMOVDr(MI);

        break;

      case AArch64::UBFMXri:

        Changed |= visitUBFMXri(MI);

        break;

      case AArch64::COPY:

        Changed |= visitCopy(MI);

        break;

      }

    }

  }


  return Changed;

}


FunctionPass *llvm::createAArch64MIPeepholeOptPass() {

  return new AArch64MIPeepholeOpt();

}

AArch64AddressingModes.h

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:103

AArch64ExpandImm.h

AArch64InstrInfo.h

splitDisjointBitmaskImm
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
Definition: AArch64MIPeepholeOpt.cpp:196

HighestBitSet
unsigned HighestBitSet
Definition: AArch64MIPeepholeOpt.cpp:175

NewImm1
T NewImm1
Definition: AArch64MIPeepholeOpt.cpp:179

Imm2Enc
unsigned T T & Imm2Enc
Definition: AArch64MIPeepholeOpt.cpp:165

Imm1Enc
unsigned T & Imm1Enc
Definition: AArch64MIPeepholeOpt.cpp:165

NewImm2
T NewImm2
Definition: AArch64MIPeepholeOpt.cpp:183

RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:165

is64bitDefwithZeroHigh64bit
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
Definition: AArch64MIPeepholeOpt.cpp:693

splitAddSubImm
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
Definition: AArch64MIPeepholeOpt.cpp:410

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

LowestBitSet
unsigned LowestBitSet
Definition: AArch64MIPeepholeOpt.cpp:174

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:110

MachineDominators.h

MachineLoopInfo.h

TRI
Register const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:2118

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56

Opc
auto Opc
Definition: RISCVRedundantCopyElimination.cpp:75

OP
#define OP(OPC)
Definition: Instruction.h:46

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:119

T

llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:180

llvm::AArch64RegisterInfo
Definition: AArch64RegisterInfo.h:26

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:76

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:124

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:122

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:184

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction
Definition: MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:762

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:733

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:253

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:152

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359

llvm::MachineInstr::isFullCopy
bool isFullCopy() const
Definition: MachineInstr.h:1435

llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511

llvm::MachineInstr::eraseFromParent
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:770

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595

llvm::MachineLoopInfoWrapperPass
Definition: MachineLoopInfo.h:161

llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:109

llvm::MachineLoop
Definition: MachineLoopInfo.h:48

llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:373

llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:556

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:368

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:53

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:79

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45

llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:99

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.

unsigned

llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:275

llvm::AArch64_AM::encodeLogicalImmediate
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
Definition: AArch64AddressingModes.h:282

llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition: AArch64ExpandImm.cpp:533

llvm::AMDGPU::Imm
@ Imm
Definition: AMDGPURegBankLegalizeRules.h:129

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::M68k::MemAddrModeKind::L
@ L

llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:47

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:369

llvm::createAArch64MIPeepholeOptPass
FunctionPass * createAArch64MIPeepholeOptPass()
Definition: AArch64MIPeepholeOpt.cpp:1012

llvm::countr_one
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663

llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342

llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207

llvm::examineCFlagsUse
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
Definition: AArch64InstrInfo.cpp:1859

llvm::getRegState
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
Definition: MachineInstrBuilder.h:563