LLVM: lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp Source File

//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

/// \file

/// This file implements the targeting of the InstructionSelector class for

/// AArch64.

/// \todo This should be generated by TableGen.

//===----------------------------------------------------------------------===//


#include "AArch64GlobalISelUtils.h"

#include "AArch64InstrInfo.h"

#include "AArch64MachineFunctionInfo.h"

#include "AArch64RegisterBankInfo.h"

#include "AArch64RegisterInfo.h"

#include "AArch64Subtarget.h"

#include "AArch64TargetMachine.h"

#include "MCTargetDesc/AArch64AddressingModes.h"

#include "MCTargetDesc/AArch64MCTargetDesc.h"

#include "llvm/BinaryFormat/Dwarf.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineConstantPool.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineMemOperand.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicsAArch64.h"

#include "llvm/IR/Type.h"

#include "llvm/Pass.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include <optional>


#define DEBUG_TYPE "aarch64-isel"


using namespace llvm;

using namespace MIPatternMatch;

using namespace AArch64GISelUtils;


namespace llvm {

class BlockFrequencyInfo;

class ProfileSummaryInfo;

}


namespace {


#define GET_GLOBALISEL_PREDICATE_BITSET

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_PREDICATE_BITSET


class AArch64InstructionSelector : public InstructionSelector {

public:

  AArch64InstructionSelector(const AArch64TargetMachine &TM,

                             const AArch64Subtarget &STI,

                             const AArch64RegisterBankInfo &RBI);


  bool select(MachineInstr &I) override;

  static const char *getName() { return DEBUG_TYPE; }


  void setupMF(MachineFunction &MF, GISelValueTracking *VT,

               CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,

               BlockFrequencyInfo *BFI) override {

    InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);

    MIB.setMF(MF);


    // hasFnAttribute() is expensive to call on every BRCOND selection, so

    // cache it here for each run of the selector.

    ProduceNonFlagSettingCondBr =

        !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);

    MFReturnAddr = Register();


    processPHIs(MF);

  }


private:

  /// tblgen-erated 'select' implementation, used as the initial selector for

  /// the patterns that don't require complex C++.

  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;


  // A lowering phase that runs before any selection attempts.

  // Returns true if the instruction was modified.

  bool preISelLower(MachineInstr &I);


  // An early selection function that runs before the selectImpl() call.

  bool earlySelect(MachineInstr &I);


  /// Save state that is shared between select calls, call select on \p I and

  /// then restore the saved state. This can be used to recursively call select

  /// within a select call.

  bool selectAndRestoreState(MachineInstr &I);


  // Do some preprocessing of G_PHIs before we begin selection.

  void processPHIs(MachineFunction &MF);


  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);


  /// Eliminate same-sized cross-bank copies into stores before selectImpl().

  bool contractCrossBankCopyIntoStore(MachineInstr &I,

                                      MachineRegisterInfo &MRI);


  bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);


  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,

                          MachineRegisterInfo &MRI) const;

  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,

                           MachineRegisterInfo &MRI) const;


  ///@{

  /// Helper functions for selectCompareBranch.

  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,

                                    MachineIRBuilder &MIB) const;

  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,

                                    MachineIRBuilder &MIB) const;

  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,

                                    MachineIRBuilder &MIB) const;

  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,

                                  MachineBasicBlock *DstMBB,

                                  MachineIRBuilder &MIB) const;

  ///@}


  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,

                           MachineRegisterInfo &MRI);


  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);


  // Helper to generate an equivalent of scalar_to_vector into a new register,

  // returned via 'Dst'.

  MachineInstr *emitScalarToVector(unsigned EltSize,

                                   const TargetRegisterClass *DstRC,

                                   Register Scalar,

                                   MachineIRBuilder &MIRBuilder) const;

  /// Helper to narrow vector that was widened by emitScalarToVector.

  /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit

  /// vector, correspondingly.

  MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,

                                 MachineIRBuilder &MIRBuilder,

                                 MachineRegisterInfo &MRI) const;


  /// Emit a lane insert into \p DstReg, or a new vector register if

  /// std::nullopt is provided.

  ///

  /// The lane inserted into is defined by \p LaneIdx. The vector source

  /// register is given by \p SrcReg. The register containing the element is

  /// given by \p EltReg.

  MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,

                               Register EltReg, unsigned LaneIdx,

                               const RegisterBank &RB,

                               MachineIRBuilder &MIRBuilder) const;


  /// Emit a sequence of instructions representing a constant \p CV for a

  /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)

  ///

  /// \returns the last instruction in the sequence on success, and nullptr

  /// otherwise.

  MachineInstr *emitConstantVector(Register Dst, Constant *CV,

                                   MachineIRBuilder &MIRBuilder,

                                   MachineRegisterInfo &MRI);


  MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,

                                  MachineIRBuilder &MIRBuilder);


  MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder, bool Inv);


  MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder, bool Inv);

  MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder);

  MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,

                                     MachineIRBuilder &MIRBuilder, bool Inv);

  MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder);


  bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,

                              MachineRegisterInfo &MRI);

  /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a

  /// SUBREG_TO_REG.

  bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);

  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);


  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);


  /// Helper function to select vector load intrinsics like

  /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.

  /// \p Opc is the opcode that the selected instruction should use.

  /// \p NumVecs is the number of vector destinations for the instruction.

  /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.

  bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,

                                 MachineInstr &I);

  bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,

                                     MachineInstr &I);

  void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,

                                  unsigned Opc);

  bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,

                                      unsigned Opc);

  bool selectIntrinsicWithSideEffects(MachineInstr &I,

                                      MachineRegisterInfo &MRI);

  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectPtrAuthGlobalValue(MachineInstr &I,

                                MachineRegisterInfo &MRI) const;

  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);

  void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,

                   unsigned Opc1, unsigned Opc2, bool isExt);


  bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);


  unsigned emitConstantPoolEntry(const Constant *CPVal,

                                 MachineFunction &MF) const;

  MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,

                                         MachineIRBuilder &MIRBuilder) const;


  // Emit a vector concat operation.

  MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,

                                 Register Op2,

                                 MachineIRBuilder &MIRBuilder) const;


  // Emit an integer compare between LHS and RHS, which checks for Predicate.

  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,

                                   MachineOperand &Predicate,

                                   MachineIRBuilder &MIRBuilder) const;


  /// Emit a floating point comparison between \p LHS and \p RHS.

  /// \p Pred if given is the intended predicate to use.

  MachineInstr *

  emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,

                std::optional<CmpInst::Predicate> = std::nullopt) const;


  MachineInstr *

  emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,

            std::initializer_list<llvm::SrcOp> SrcOps,

            MachineIRBuilder &MIRBuilder,

            const ComplexRendererFns &RenderFns = std::nullopt) const;

  /// Helper function to emit an add or sub instruction.

  ///

  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above

  /// in a specific order.

  ///

  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.

  ///

  /// \code

  ///   const std::array<std::array<unsigned, 2>, 4> Table {

  ///    {{AArch64::ADDXri, AArch64::ADDWri},

  ///     {AArch64::ADDXrs, AArch64::ADDWrs},

  ///     {AArch64::ADDXrr, AArch64::ADDWrr},

  ///     {AArch64::SUBXri, AArch64::SUBWri},

  ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};

  /// \endcode

  ///

  /// Each row in the table corresponds to a different addressing mode. Each

  /// column corresponds to a different register size.

  ///

  /// \attention Rows must be structured as follows:

  ///   - Row 0: The ri opcode variants

  ///   - Row 1: The rs opcode variants

  ///   - Row 2: The rr opcode variants

  ///   - Row 3: The ri opcode variants for negative immediates

  ///   - Row 4: The rx opcode variants

  ///

  /// \attention Columns must be structured as follows:

  ///   - Column 0: The 64-bit opcode variants

  ///   - Column 1: The 32-bit opcode variants

  ///

  /// \p Dst is the destination register of the binop to emit.

  /// \p LHS is the left-hand operand of the binop to emit.

  /// \p RHS is the right-hand operand of the binop to emit.

  MachineInstr *emitAddSub(

      const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,

      Register Dst, MachineOperand &LHS, MachineOperand &RHS,

      MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,

                        MachineOperand &RHS,

                        MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,

                        MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,

                        MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,

                           AArch64CC::CondCode CC,

                           MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,

                                     const RegisterBank &DstRB, LLT ScalarTy,

                                     Register VecReg, unsigned LaneIdx,

                                     MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,

                          AArch64CC::CondCode Pred,

                          MachineIRBuilder &MIRBuilder) const;

  /// Emit a CSet for a FP compare.

  ///

  /// \p Dst is expected to be a 32-bit scalar register.

  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,

                                MachineIRBuilder &MIRBuilder) const;


  /// Emit an instruction that sets NZCV to the carry-in expected by \p I.

  /// Might elide the instruction if the previous instruction already sets NZCV

  /// correctly.

  MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);


  /// Emit the overflow op for \p Opcode.

  ///

  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,

  /// G_USUBO, etc.

  std::pair<MachineInstr *, AArch64CC::CondCode>

  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,

                 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;


  bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);


  /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).

  /// In some cases this is even possible with OR operations in the expression.

  MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,

                                MachineIRBuilder &MIB) const;

  MachineInstr *emitConditionalComparison(Register LHS, Register RHS,

                                          CmpInst::Predicate CC,

                                          AArch64CC::CondCode Predicate,

                                          AArch64CC::CondCode OutCC,

                                          MachineIRBuilder &MIB) const;

  MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,

                                   bool Negate, Register CCOp,

                                   AArch64CC::CondCode Predicate,

                                   MachineIRBuilder &MIB) const;


  /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.

  /// \p IsNegative is true if the test should be "not zero".

  /// This will also optimize the test bit instruction when possible.

  MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,

                            MachineBasicBlock *DstMBB,

                            MachineIRBuilder &MIB) const;


  /// Emit a CB(N)Z instruction which branches to \p DestMBB.

  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,

                        MachineBasicBlock *DestMBB,

                        MachineIRBuilder &MIB) const;


  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.

  // We use these manually instead of using the importer since it doesn't

  // support SDNodeXForm.

  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;

  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;

  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;

  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;


  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;

  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;

  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;


  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,

                                            unsigned Size) const;


  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 1);

  }

  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 2);

  }

  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 4);

  }

  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 8);

  }

  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 16);

  }


  /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used

  /// from complex pattern matchers like selectAddrModeIndexed().

  ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,

                                          MachineRegisterInfo &MRI) const;


  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,

                                           unsigned Size) const;

  template <int Width>

  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {

    return selectAddrModeIndexed(Root, Width / 8);

  }


  std::optional<bool>

  isWorthFoldingIntoAddrMode(MachineInstr &MI,

                             const MachineRegisterInfo &MRI) const;


  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,

                                     const MachineRegisterInfo &MRI,

                                     bool IsAddrOperand) const;

  ComplexRendererFns

  selectAddrModeShiftedExtendXReg(MachineOperand &Root,

                                  unsigned SizeInBytes) const;


  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether

  /// or not a shift + extend should be folded into an addressing mode. Returns

  /// None when this is not profitable or possible.

  ComplexRendererFns

  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,

                    MachineOperand &Offset, unsigned SizeInBytes,

                    bool WantsExt) const;

  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;

  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,

                                       unsigned SizeInBytes) const;

  template <int Width>

  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {

    return selectAddrModeXRO(Root, Width / 8);

  }


  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,

                                       unsigned SizeInBytes) const;

  template <int Width>

  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {

    return selectAddrModeWRO(Root, Width / 8);

  }


  ComplexRendererFns selectShiftedRegister(MachineOperand &Root,

                                           bool AllowROR = false) const;


  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {

    return selectShiftedRegister(Root);

  }


  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {

    return selectShiftedRegister(Root, true);

  }


  /// Given an extend instruction, determine the correct shift-extend type for

  /// that instruction.

  ///

  /// If the instruction is going to be used in a load or store, pass

  /// \p IsLoadStore = true.

  AArch64_AM::ShiftExtendType

  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,

                       bool IsLoadStore = false) const;


  /// Move \p Reg to \p RC if \p Reg is not already on \p RC.

  ///

  /// \returns Either \p Reg if no change was necessary, or the new register

  /// created by moving \p Reg.

  ///

  /// Note: This uses emitCopy right now.

  Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,

                              MachineIRBuilder &MIB) const;


  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;


  ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;


  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,

                      int OpIdx = -1) const;

  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,

                          int OpIdx = -1) const;

  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,

                          int OpIdx = -1) const;

  void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,

                       int OpIdx) const;

  void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,

                     int OpIdx = -1) const;

  void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,

                     int OpIdx = -1) const;

  void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,

                     int OpIdx = -1) const;

  void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,

                                    const MachineInstr &MI,

                                    int OpIdx = -1) const;


  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.

  void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);


  // Optimization methods.

  bool tryOptSelect(GSelect &Sel);

  bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);

  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,

                                      MachineOperand &Predicate,

                                      MachineIRBuilder &MIRBuilder) const;


  /// Return true if \p MI is a load or store of \p NumBytes bytes.

  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;


  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit

  /// register zeroed out. In other words, the result of MI has been explicitly

  /// zero extended.

  bool isDef32(const MachineInstr &MI) const;


  const AArch64TargetMachine &TM;

  const AArch64Subtarget &STI;

  const AArch64InstrInfo &TII;

  const AArch64RegisterInfo &TRI;

  const AArch64RegisterBankInfo &RBI;


  bool ProduceNonFlagSettingCondBr = false;


  // Some cached values used during selection.

  // We use LR as a live-in register, and we keep track of it here as it can be

  // clobbered by calls.

  Register MFReturnAddr;


  MachineIRBuilder MIB;


#define GET_GLOBALISEL_PREDICATES_DECL

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_PREDICATES_DECL


// We declare the temporaries used by selectImpl() in the class to minimize the

// cost of constructing placeholder values.

#define GET_GLOBALISEL_TEMPORARIES_DECL

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_TEMPORARIES_DECL

};


} // end anonymous namespace


#define GET_GLOBALISEL_IMPL

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_IMPL


AArch64InstructionSelector::AArch64InstructionSelector(

    const AArch64TargetMachine &TM, const AArch64Subtarget &STI,

    const AArch64RegisterBankInfo &RBI)

    : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),

      RBI(RBI),

#define GET_GLOBALISEL_PREDICATES_INIT

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_PREDICATES_INIT

#define GET_GLOBALISEL_TEMPORARIES_INIT

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_TEMPORARIES_INIT

{

}


// FIXME: This should be target-independent, inferred from the types declared

// for each class in the bank.

//

/// Given a register bank, and a type, return the smallest register class that

/// can represent that combination.

static const TargetRegisterClass *

getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,

                         bool GetAllRegSet = false) {

  if (RB.getID() == AArch64::GPRRegBankID) {

    if (Ty.getSizeInBits() <= 32)

      return GetAllRegSet ? &AArch64::GPR32allRegClass

                          : &AArch64::GPR32RegClass;

    if (Ty.getSizeInBits() == 64)

      return GetAllRegSet ? &AArch64::GPR64allRegClass

                          : &AArch64::GPR64RegClass;

    if (Ty.getSizeInBits() == 128)

      return &AArch64::XSeqPairsClassRegClass;

    return nullptr;

  }


  if (RB.getID() == AArch64::FPRRegBankID) {

    switch (Ty.getSizeInBits()) {

    case 8:

      return &AArch64::FPR8RegClass;

    case 16:

      return &AArch64::FPR16RegClass;

    case 32:

      return &AArch64::FPR32RegClass;

    case 64:

      return &AArch64::FPR64RegClass;

    case 128:

      return &AArch64::FPR128RegClass;

    }

    return nullptr;

  }


  return nullptr;

}


/// Given a register bank, and size in bits, return the smallest register class

/// that can represent that combination.

static const TargetRegisterClass *

getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,

                      bool GetAllRegSet = false) {

  if (SizeInBits.isScalable()) {

    assert(RB.getID() == AArch64::FPRRegBankID &&

           "Expected FPR regbank for scalable type size");

    return &AArch64::ZPRRegClass;

  }


  unsigned RegBankID = RB.getID();


  if (RegBankID == AArch64::GPRRegBankID) {

    assert(!SizeInBits.isScalable() && "Unexpected scalable register size");

    if (SizeInBits <= 32)

      return GetAllRegSet ? &AArch64::GPR32allRegClass

                          : &AArch64::GPR32RegClass;

    if (SizeInBits == 64)

      return GetAllRegSet ? &AArch64::GPR64allRegClass

                          : &AArch64::GPR64RegClass;

    if (SizeInBits == 128)

      return &AArch64::XSeqPairsClassRegClass;

  }


  if (RegBankID == AArch64::FPRRegBankID) {

    if (SizeInBits.isScalable()) {

      assert(SizeInBits == TypeSize::getScalable(128) &&

             "Unexpected scalable register size");

      return &AArch64::ZPRRegClass;

    }


    switch (SizeInBits) {

    default:

      return nullptr;

    case 8:

      return &AArch64::FPR8RegClass;

    case 16:

      return &AArch64::FPR16RegClass;

    case 32:

      return &AArch64::FPR32RegClass;

    case 64:

      return &AArch64::FPR64RegClass;

    case 128:

      return &AArch64::FPR128RegClass;

    }

  }


  return nullptr;

}


/// Returns the correct subregister to use for a given register class.

static bool getSubRegForClass(const TargetRegisterClass *RC,

                              const TargetRegisterInfo &TRI, unsigned &SubReg) {

  switch (TRI.getRegSizeInBits(*RC)) {

  case 8:

    SubReg = AArch64::bsub;

    break;

  case 16:

    SubReg = AArch64::hsub;

    break;

  case 32:

    if (RC != &AArch64::FPR32RegClass)

      SubReg = AArch64::sub_32;

    else

      SubReg = AArch64::ssub;

    break;

  case 64:

    SubReg = AArch64::dsub;

    break;

  default:

    LLVM_DEBUG(

        dbgs() << "Couldn't find appropriate subregister for register class.");

    return false;

  }


  return true;

}


/// Returns the minimum size the given register bank can hold.

static unsigned getMinSizeForRegBank(const RegisterBank &RB) {

  switch (RB.getID()) {

  case AArch64::GPRRegBankID:

    return 32;

  case AArch64::FPRRegBankID:

    return 8;

  default:

    llvm_unreachable("Tried to get minimum size for unknown register bank.");

  }

}


/// Create a REG_SEQUENCE instruction using the registers in \p Regs.

/// Helper function for functions like createDTuple and createQTuple.

///

/// \p RegClassIDs - The list of register class IDs available for some tuple of

/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is

/// expected to contain between 2 and 4 tuple classes.

///

/// \p SubRegs - The list of subregister classes associated with each register

/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0

/// subregister class. The index of each subregister class is expected to

/// correspond with the index of each register class.

///

/// \returns Either the destination register of REG_SEQUENCE instruction that

/// was created, or the 0th element of \p Regs if \p Regs contains a single

/// element.

static Register createTuple(ArrayRef<Register> Regs,

                            const unsigned RegClassIDs[],

                            const unsigned SubRegs[], MachineIRBuilder &MIB) {

  unsigned NumRegs = Regs.size();

  if (NumRegs == 1)

    return Regs[0];

  assert(NumRegs >= 2 && NumRegs <= 4 &&

         "Only support between two and 4 registers in a tuple!");

  const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();

  auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);

  auto RegSequence =

      MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});

  for (unsigned I = 0, E = Regs.size(); I < E; ++I) {

    RegSequence.addUse(Regs[I]);

    RegSequence.addImm(SubRegs[I]);

  }

  return RegSequence.getReg(0);

}


/// Create a tuple of D-registers using the registers in \p Regs.

static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {

  static const unsigned RegClassIDs[] = {

      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};

  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,

                                     AArch64::dsub2, AArch64::dsub3};

  return createTuple(Regs, RegClassIDs, SubRegs, MIB);

}


/// Create a tuple of Q-registers using the registers in \p Regs.

static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {

  static const unsigned RegClassIDs[] = {

      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};

  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,

                                     AArch64::qsub2, AArch64::qsub3};

  return createTuple(Regs, RegClassIDs, SubRegs, MIB);

}


static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {

  auto &MI = *Root.getParent();

  auto &MBB = *MI.getParent();

  auto &MF = *MBB.getParent();

  auto &MRI = MF.getRegInfo();

  uint64_t Immed;

  if (Root.isImm())

    Immed = Root.getImm();

  else if (Root.isCImm())

    Immed = Root.getCImm()->getZExtValue();

  else if (Root.isReg()) {

    auto ValAndVReg =

        getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);

    if (!ValAndVReg)

      return std::nullopt;

    Immed = ValAndVReg->Value.getSExtValue();

  } else

    return std::nullopt;

  return Immed;

}


/// Check whether \p I is a currently unsupported binary operation:

/// - it has an unsized type

/// - an operand is not a vreg

/// - all operands are not in the same bank

/// These are checks that should someday live in the verifier, but right now,

/// these are mostly limitations of the aarch64 selector.

static bool unsupportedBinOp(const MachineInstr &I,

                             const AArch64RegisterBankInfo &RBI,

                             const MachineRegisterInfo &MRI,

                             const AArch64RegisterInfo &TRI) {

  LLT Ty = MRI.getType(I.getOperand(0).getReg());

  if (!Ty.isValid()) {

    LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");

    return true;

  }


  const RegisterBank *PrevOpBank = nullptr;

  for (auto &MO : I.operands()) {

    // FIXME: Support non-register operands.

    if (!MO.isReg()) {

      LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");

      return true;

    }


    // FIXME: Can generic operations have physical registers operands? If

    // so, this will need to be taught about that, and we'll need to get the

    // bank out of the minimal class for the register.

    // Either way, this needs to be documented (and possibly verified).

    if (!MO.getReg().isVirtual()) {

      LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");

      return true;

    }


    const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);

    if (!OpBank) {

      LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");

      return true;

    }


    if (PrevOpBank && OpBank != PrevOpBank) {

      LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");

      return true;

    }

    PrevOpBank = OpBank;

  }

  return false;

}


/// Select the AArch64 opcode for the basic binary operation \p GenericOpc

/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID

/// and of size \p OpSize.

/// \returns \p GenericOpc if the combination is unsupported.

static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,

                               unsigned OpSize) {

  switch (RegBankID) {

  case AArch64::GPRRegBankID:

    if (OpSize == 32) {

      switch (GenericOpc) {

      case TargetOpcode::G_SHL:

        return AArch64::LSLVWr;

      case TargetOpcode::G_LSHR:

        return AArch64::LSRVWr;

      case TargetOpcode::G_ASHR:

        return AArch64::ASRVWr;

      default:

        return GenericOpc;

      }

    } else if (OpSize == 64) {

      switch (GenericOpc) {

      case TargetOpcode::G_PTR_ADD:

        return AArch64::ADDXrr;

      case TargetOpcode::G_SHL:

        return AArch64::LSLVXr;

      case TargetOpcode::G_LSHR:

        return AArch64::LSRVXr;

      case TargetOpcode::G_ASHR:

        return AArch64::ASRVXr;

      default:

        return GenericOpc;

      }

    }

    break;

  case AArch64::FPRRegBankID:

    switch (OpSize) {

    case 32:

      switch (GenericOpc) {

      case TargetOpcode::G_FADD:

        return AArch64::FADDSrr;

      case TargetOpcode::G_FSUB:

        return AArch64::FSUBSrr;

      case TargetOpcode::G_FMUL:

        return AArch64::FMULSrr;

      case TargetOpcode::G_FDIV:

        return AArch64::FDIVSrr;

      default:

        return GenericOpc;

      }

    case 64:

      switch (GenericOpc) {

      case TargetOpcode::G_FADD:

        return AArch64::FADDDrr;

      case TargetOpcode::G_FSUB:

        return AArch64::FSUBDrr;

      case TargetOpcode::G_FMUL:

        return AArch64::FMULDrr;

      case TargetOpcode::G_FDIV:

        return AArch64::FDIVDrr;

      case TargetOpcode::G_OR:

        return AArch64::ORRv8i8;

      default:

        return GenericOpc;

      }

    }

    break;

  }

  return GenericOpc;

}


/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,

/// appropriate for the (value) register bank \p RegBankID and of memory access

/// size \p OpSize.  This returns the variant with the base+unsigned-immediate

/// addressing mode (e.g., LDRXui).

/// \returns \p GenericOpc if the combination is unsupported.

static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,

                                    unsigned OpSize) {

  const bool isStore = GenericOpc == TargetOpcode::G_STORE;

  switch (RegBankID) {

  case AArch64::GPRRegBankID:

    switch (OpSize) {

    case 8:

      return isStore ? AArch64::STRBBui : AArch64::LDRBBui;

    case 16:

      return isStore ? AArch64::STRHHui : AArch64::LDRHHui;

    case 32:

      return isStore ? AArch64::STRWui : AArch64::LDRWui;

    case 64:

      return isStore ? AArch64::STRXui : AArch64::LDRXui;

    }

    break;

  case AArch64::FPRRegBankID:

    switch (OpSize) {

    case 8:

      return isStore ? AArch64::STRBui : AArch64::LDRBui;

    case 16:

      return isStore ? AArch64::STRHui : AArch64::LDRHui;

    case 32:

      return isStore ? AArch64::STRSui : AArch64::LDRSui;

    case 64:

      return isStore ? AArch64::STRDui : AArch64::LDRDui;

    case 128:

      return isStore ? AArch64::STRQui : AArch64::LDRQui;

    }

    break;

  }

  return GenericOpc;

}


/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg

/// to \p *To.

///

/// E.g "To = COPY SrcReg:SubReg"

static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,

                       const RegisterBankInfo &RBI, Register SrcReg,

                       const TargetRegisterClass *To, unsigned SubReg) {

  assert(SrcReg.isValid() && "Expected a valid source register?");

  assert(To && "Destination register class cannot be null");

  assert(SubReg && "Expected a valid subregister");


  MachineIRBuilder MIB(I);

  auto SubRegCopy =

      MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);

  MachineOperand &RegOp = I.getOperand(1);

  RegOp.setReg(SubRegCopy.getReg(0));


  // It's possible that the destination register won't be constrained. Make

  // sure that happens.

  if (!I.getOperand(0).getReg().isPhysical())

    RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);


  return true;

}


/// Helper function to get the source and destination register classes for a

/// copy. Returns a std::pair containing the source register class for the

/// copy, and the destination register class for the copy. If a register class

/// cannot be determined, then it will be nullptr.

static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>

getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,

                     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,

                     const RegisterBankInfo &RBI) {

  Register DstReg = I.getOperand(0).getReg();

  Register SrcReg = I.getOperand(1).getReg();

  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);


  TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);

  TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);


  // Special casing for cross-bank copies of s1s. We can technically represent

  // a 1-bit value with any size of register. The minimum size for a GPR is 32

  // bits. So, we need to put the FPR on 32 bits as well.

  //

  // FIXME: I'm not sure if this case holds true outside of copies. If it does,

  // then we can pull it into the helpers that get the appropriate class for a

  // register bank. Or make a new helper that carries along some constraint

  // information.

  if (SrcRegBank != DstRegBank &&

      (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))

    SrcSize = DstSize = TypeSize::getFixed(32);


  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),

          getMinClassForRegBank(DstRegBank, DstSize, true)};

}


// FIXME: We need some sort of API in RBI/TRI to allow generic code to

// constrain operands of simple instructions given a TargetRegisterClass

// and LLT

static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,

                             const RegisterBankInfo &RBI) {

  for (MachineOperand &MO : I.operands()) {

    if (!MO.isReg())

      continue;

    Register Reg = MO.getReg();

    if (!Reg)

      continue;

    if (Reg.isPhysical())

      continue;

    LLT Ty = MRI.getType(Reg);

    const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);

    const TargetRegisterClass *RC =

        dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

    if (!RC) {

      const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);

      RC = getRegClassForTypeOnBank(Ty, RB);

      if (!RC) {

        LLVM_DEBUG(

            dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");

        break;

      }

    }

    RBI.constrainGenericRegister(Reg, *RC, MRI);

  }


  return true;

}


static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,

                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,

                       const RegisterBankInfo &RBI) {

  Register DstReg = I.getOperand(0).getReg();

  Register SrcReg = I.getOperand(1).getReg();

  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);


  // Find the correct register classes for the source and destination registers.

  const TargetRegisterClass *SrcRC;

  const TargetRegisterClass *DstRC;

  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);


  if (!DstRC) {

    LLVM_DEBUG(dbgs() << "Unexpected dest size "

                      << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');

    return false;

  }


  // Is this a copy? If so, then we may need to insert a subregister copy.

  if (I.isCopy()) {

    // Yes. Check if there's anything to fix up.

    if (!SrcRC) {

      LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");

      return false;

    }


    const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);

    const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);

    unsigned SubReg;


    // If the source bank doesn't support a subregister copy small enough,

    // then we first need to copy to the destination bank.

    if (getMinSizeForRegBank(SrcRegBank) > DstSize) {

      const TargetRegisterClass *DstTempRC =

          getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);

      getSubRegForClass(DstRC, TRI, SubReg);


      MachineIRBuilder MIB(I);

      auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});

      copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);

    } else if (SrcSize > DstSize) {

      // If the source register is bigger than the destination we need to

      // perform a subregister copy.

      const TargetRegisterClass *SubRegRC =

          getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);

      getSubRegForClass(SubRegRC, TRI, SubReg);

      copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);

    } else if (DstSize > SrcSize) {

      // If the destination register is bigger than the source we need to do

      // a promotion using SUBREG_TO_REG.

      const TargetRegisterClass *PromotionRC =

          getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);

      getSubRegForClass(SrcRC, TRI, SubReg);


      Register PromoteReg = MRI.createVirtualRegister(PromotionRC);

      BuildMI(*I.getParent(), I, I.getDebugLoc(),

              TII.get(AArch64::SUBREG_TO_REG), PromoteReg)

          .addImm(0)

          .addUse(SrcReg)

          .addImm(SubReg);

      MachineOperand &RegOp = I.getOperand(1);

      RegOp.setReg(PromoteReg);

    }


    // If the destination is a physical register, then there's nothing to

    // change, so we're done.

    if (DstReg.isPhysical())

      return true;

  }


  // No need to constrain SrcReg. It will get constrained when we hit another

  // of its use or its defs. Copies do not have constraints.

  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {

    LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())

                      << " operand\n");

    return false;

  }


  // If this a GPR ZEXT that we want to just reduce down into a copy.

  // The sizes will be mismatched with the source < 32b but that's ok.

  if (I.getOpcode() == TargetOpcode::G_ZEXT) {

    I.setDesc(TII.get(AArch64::COPY));

    assert(SrcRegBank.getID() == AArch64::GPRRegBankID);

    return selectCopy(I, TII, MRI, TRI, RBI);

  }


  I.setDesc(TII.get(AArch64::COPY));

  return true;

}


MachineInstr *

AArch64InstructionSelector::emitSelect(Register Dst, Register True,

                                       Register False, AArch64CC::CondCode CC,

                                       MachineIRBuilder &MIB) const {

  MachineRegisterInfo &MRI = *MIB.getMRI();

  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==

             RBI.getRegBank(True, MRI, TRI)->getID() &&

         "Expected both select operands to have the same regbank?");

  LLT Ty = MRI.getType(True);

  if (Ty.isVector())

    return nullptr;

  const unsigned Size = Ty.getSizeInBits();

  assert((Size == 32 || Size == 64) &&

         "Expected 32 bit or 64 bit select only?");

  const bool Is32Bit = Size == 32;

  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {

    unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;

    auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);

    constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);

    return &*FCSel;

  }


  // By default, we'll try and emit a CSEL.

  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;

  bool Optimized = false;

  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,

                                 &Optimized](Register &Reg, Register &OtherReg,

                                             bool Invert) {

    if (Optimized)

      return false;


    // Attempt to fold:

    //

    // %sub = G_SUB 0, %x

    // %select = G_SELECT cc, %reg, %sub

    //

    // Into:

    // %select = CSNEG %reg, %x, cc

    Register MatchReg;

    if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {

      Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;

      Reg = MatchReg;

      if (Invert) {

        CC = AArch64CC::getInvertedCondCode(CC);

        std::swap(Reg, OtherReg);

      }

      return true;

    }


    // Attempt to fold:

    //

    // %xor = G_XOR %x, -1

    // %select = G_SELECT cc, %reg, %xor

    //

    // Into:

    // %select = CSINV %reg, %x, cc

    if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {

      Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

      Reg = MatchReg;

      if (Invert) {

        CC = AArch64CC::getInvertedCondCode(CC);

        std::swap(Reg, OtherReg);

      }

      return true;

    }


    // Attempt to fold:

    //

    // %add = G_ADD %x, 1

    // %select = G_SELECT cc, %reg, %add

    //

    // Into:

    // %select = CSINC %reg, %x, cc

    if (mi_match(Reg, MRI,

                 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),

                          m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {

      Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

      Reg = MatchReg;

      if (Invert) {

        CC = AArch64CC::getInvertedCondCode(CC);

        std::swap(Reg, OtherReg);

      }

      return true;

    }


    return false;

  };


  // Helper lambda which tries to use CSINC/CSINV for the instruction when its

  // true/false values are constants.

  // FIXME: All of these patterns already exist in tablegen. We should be

  // able to import these.

  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,

                          &Optimized]() {

    if (Optimized)

      return false;

    auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);

    auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);

    if (!TrueCst && !FalseCst)

      return false;


    Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;

    if (TrueCst && FalseCst) {

      int64_t T = TrueCst->Value.getSExtValue();

      int64_t F = FalseCst->Value.getSExtValue();


      if (T == 0 && F == 1) {

        // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc

        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

        True = ZReg;

        False = ZReg;

        return true;

      }


      if (T == 0 && F == -1) {

        // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc

        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

        True = ZReg;

        False = ZReg;

        return true;

      }

    }


    if (TrueCst) {

      int64_t T = TrueCst->Value.getSExtValue();

      if (T == 1) {

        // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc

        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

        True = False;

        False = ZReg;

        CC = AArch64CC::getInvertedCondCode(CC);

        return true;

      }


      if (T == -1) {

        // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc

        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

        True = False;

        False = ZReg;

        CC = AArch64CC::getInvertedCondCode(CC);

        return true;

      }

    }


    if (FalseCst) {

      int64_t F = FalseCst->Value.getSExtValue();

      if (F == 1) {

        // G_SELECT cc, t, 1 -> CSINC t, zreg, cc

        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

        False = ZReg;

        return true;

      }


      if (F == -1) {

        // G_SELECT cc, t, -1 -> CSINC t, zreg, cc

        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

        False = ZReg;

        return true;

      }

    }

    return false;

  };


  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);

  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);

  Optimized |= TryOptSelectCst();

  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);

  constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);

  return &*SelectInst;

}


static AArch64CC::CondCode

changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS = {},

                          MachineRegisterInfo *MRI = nullptr) {

  switch (P) {

  default:

    llvm_unreachable("Unknown condition code!");

  case CmpInst::ICMP_NE:

    return AArch64CC::NE;

  case CmpInst::ICMP_EQ:

    return AArch64CC::EQ;

  case CmpInst::ICMP_SGT:

    return AArch64CC::GT;

  case CmpInst::ICMP_SGE:

    if (RHS && MRI) {

      auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);

      if (ValAndVReg && ValAndVReg->Value == 0)

        return AArch64CC::PL;

    }

    return AArch64CC::GE;

  case CmpInst::ICMP_SLT:

    if (RHS && MRI) {

      auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);

      if (ValAndVReg && ValAndVReg->Value == 0)

        return AArch64CC::MI;

    }

    return AArch64CC::LT;

  case CmpInst::ICMP_SLE:

    return AArch64CC::LE;

  case CmpInst::ICMP_UGT:

    return AArch64CC::HI;

  case CmpInst::ICMP_UGE:

    return AArch64CC::HS;

  case CmpInst::ICMP_ULT:

    return AArch64CC::LO;

  case CmpInst::ICMP_ULE:

    return AArch64CC::LS;

  }

}


/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.

static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,

                                    AArch64CC::CondCode &CondCode,

                                    AArch64CC::CondCode &CondCode2) {

  CondCode2 = AArch64CC::AL;

  switch (CC) {

  default:

    llvm_unreachable("Unknown FP condition!");

  case CmpInst::FCMP_OEQ:

    CondCode = AArch64CC::EQ;

    break;

  case CmpInst::FCMP_OGT:

    CondCode = AArch64CC::GT;

    break;

  case CmpInst::FCMP_OGE:

    CondCode = AArch64CC::GE;

    break;

  case CmpInst::FCMP_OLT:

    CondCode = AArch64CC::MI;

    break;

  case CmpInst::FCMP_OLE:

    CondCode = AArch64CC::LS;

    break;

  case CmpInst::FCMP_ONE:

    CondCode = AArch64CC::MI;

    CondCode2 = AArch64CC::GT;

    break;

  case CmpInst::FCMP_ORD:

    CondCode = AArch64CC::VC;

    break;

  case CmpInst::FCMP_UNO:

    CondCode = AArch64CC::VS;

    break;

  case CmpInst::FCMP_UEQ:

    CondCode = AArch64CC::EQ;

    CondCode2 = AArch64CC::VS;

    break;

  case CmpInst::FCMP_UGT:

    CondCode = AArch64CC::HI;

    break;

  case CmpInst::FCMP_UGE:

    CondCode = AArch64CC::PL;

    break;

  case CmpInst::FCMP_ULT:

    CondCode = AArch64CC::LT;

    break;

  case CmpInst::FCMP_ULE:

    CondCode = AArch64CC::LE;

    break;

  case CmpInst::FCMP_UNE:

    CondCode = AArch64CC::NE;

    break;

  }

}


/// Convert an IR fp condition code to an AArch64 CC.

/// This differs from changeFPCCToAArch64CC in that it returns cond codes that

/// should be AND'ed instead of OR'ed.

static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,

                                     AArch64CC::CondCode &CondCode,

                                     AArch64CC::CondCode &CondCode2) {

  CondCode2 = AArch64CC::AL;

  switch (CC) {

  default:

    changeFPCCToORAArch64CC(CC, CondCode, CondCode2);

    assert(CondCode2 == AArch64CC::AL);

    break;

  case CmpInst::FCMP_ONE:

    // (a one b)

    // == ((a olt b) || (a ogt b))

    // == ((a ord b) && (a une b))

    CondCode = AArch64CC::VC;

    CondCode2 = AArch64CC::NE;

    break;

  case CmpInst::FCMP_UEQ:

    // (a ueq b)

    // == ((a uno b) || (a oeq b))

    // == ((a ule b) && (a uge b))

    CondCode = AArch64CC::PL;

    CondCode2 = AArch64CC::LE;

    break;

  }

}


/// Return a register which can be used as a bit to test in a TB(N)Z.

static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,

                              MachineRegisterInfo &MRI) {

  assert(Reg.isValid() && "Expected valid register!");

  bool HasZext = false;

  while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {

    unsigned Opc = MI->getOpcode();


    if (!MI->getOperand(0).isReg() ||

        !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))

      break;


    // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.

    //

    // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number

    // on the truncated x is the same as the bit number on x.

    if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||

        Opc == TargetOpcode::G_TRUNC) {

      if (Opc == TargetOpcode::G_ZEXT)

        HasZext = true;


      Register NextReg = MI->getOperand(1).getReg();

      // Did we find something worth folding?

      if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))

        break;


      // NextReg is worth folding. Keep looking.

      Reg = NextReg;

      continue;

    }


    // Attempt to find a suitable operation with a constant on one side.

    std::optional<uint64_t> C;

    Register TestReg;

    switch (Opc) {

    default:

      break;

    case TargetOpcode::G_AND:

    case TargetOpcode::G_XOR: {

      TestReg = MI->getOperand(1).getReg();

      Register ConstantReg = MI->getOperand(2).getReg();

      auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

      if (!VRegAndVal) {

        // AND commutes, check the other side for a constant.

        // FIXME: Can we canonicalize the constant so that it's always on the

        // same side at some point earlier?

        std::swap(ConstantReg, TestReg);

        VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

      }

      if (VRegAndVal) {

        if (HasZext)

          C = VRegAndVal->Value.getZExtValue();

        else

          C = VRegAndVal->Value.getSExtValue();

      }

      break;

    }

    case TargetOpcode::G_ASHR:

    case TargetOpcode::G_LSHR:

    case TargetOpcode::G_SHL: {

      TestReg = MI->getOperand(1).getReg();

      auto VRegAndVal =

          getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);

      if (VRegAndVal)

        C = VRegAndVal->Value.getSExtValue();

      break;

    }

    }


    // Didn't find a constant or viable register. Bail out of the loop.

    if (!C || !TestReg.isValid())

      break;


    // We found a suitable instruction with a constant. Check to see if we can

    // walk through the instruction.

    Register NextReg;

    unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();

    switch (Opc) {

    default:

      break;

    case TargetOpcode::G_AND:

      // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.

      if ((*C >> Bit) & 1)

        NextReg = TestReg;

      break;

    case TargetOpcode::G_SHL:

      // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in

      // the type of the register.

      if (*C <= Bit && (Bit - *C) < TestRegSize) {

        NextReg = TestReg;

        Bit = Bit - *C;

      }

      break;

    case TargetOpcode::G_ASHR:

      // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits

      // in x

      NextReg = TestReg;

      Bit = Bit + *C;

      if (Bit >= TestRegSize)

        Bit = TestRegSize - 1;

      break;

    case TargetOpcode::G_LSHR:

      // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x

      if ((Bit + *C) < TestRegSize) {

        NextReg = TestReg;

        Bit = Bit + *C;

      }

      break;

    case TargetOpcode::G_XOR:

      // We can walk through a G_XOR by inverting whether we use tbz/tbnz when

      // appropriate.

      //

      // e.g. If x' = xor x, c, and the b-th bit is set in c then

      //

      // tbz x', b -> tbnz x, b

      //

      // Because x' only has the b-th bit set if x does not.

      if ((*C >> Bit) & 1)

        Invert = !Invert;

      NextReg = TestReg;

      break;

    }


    // Check if we found anything worth folding.

    if (!NextReg.isValid())

      return Reg;

    Reg = NextReg;

  }


  return Reg;

}


MachineInstr *AArch64InstructionSelector::emitTestBit(

    Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,

    MachineIRBuilder &MIB) const {

  assert(TestReg.isValid());

  assert(ProduceNonFlagSettingCondBr &&

         "Cannot emit TB(N)Z with speculation tracking!");

  MachineRegisterInfo &MRI = *MIB.getMRI();


  // Attempt to optimize the test bit by walking over instructions.

  TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);

  LLT Ty = MRI.getType(TestReg);

  unsigned Size = Ty.getSizeInBits();

  assert(!Ty.isVector() && "Expected a scalar!");

  assert(Bit < 64 && "Bit is too large!");


  // When the test register is a 64-bit register, we have to narrow to make

  // TBNZW work.

  bool UseWReg = Bit < 32;

  unsigned NecessarySize = UseWReg ? 32 : 64;

  if (Size != NecessarySize)

    TestReg = moveScalarRegClass(

        TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,

        MIB);


  static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},

                                          {AArch64::TBZW, AArch64::TBNZW}};

  unsigned Opc = OpcTable[UseWReg][IsNegative];

  auto TestBitMI =

      MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);

  constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);

  return &*TestBitMI;

}


bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(

    MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,

    MachineIRBuilder &MIB) const {

  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");

  // Given something like this:

  //

  //  %x = ...Something...

  //  %one = G_CONSTANT i64 1

  //  %zero = G_CONSTANT i64 0

  //  %and = G_AND %x, %one

  //  %cmp = G_ICMP intpred(ne), %and, %zero

  //  %cmp_trunc = G_TRUNC %cmp

  //  G_BRCOND %cmp_trunc, %bb.3

  //

  // We want to try and fold the AND into the G_BRCOND and produce either a

  // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).

  //

  // In this case, we'd get

  //

  // TBNZ %x %bb.3

  //


  // Check if the AND has a constant on its RHS which we can use as a mask.

  // If it's a power of 2, then it's the same as checking a specific bit.

  // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)

  auto MaybeBit = getIConstantVRegValWithLookThrough(

      AndInst.getOperand(2).getReg(), *MIB.getMRI());

  if (!MaybeBit)

    return false;


  int32_t Bit = MaybeBit->Value.exactLogBase2();

  if (Bit < 0)

    return false;


  Register TestReg = AndInst.getOperand(1).getReg();


  // Emit a TB(N)Z.

  emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);

  return true;

}


MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,

                                                  bool IsNegative,

                                                  MachineBasicBlock *DestMBB,

                                                  MachineIRBuilder &MIB) const {

  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");

  MachineRegisterInfo &MRI = *MIB.getMRI();

  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==

             AArch64::GPRRegBankID &&

         "Expected GPRs only?");

  auto Ty = MRI.getType(CompareReg);

  unsigned Width = Ty.getSizeInBits();

  assert(!Ty.isVector() && "Expected scalar only?");

  assert(Width <= 64 && "Expected width to be at most 64?");

  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},

                                          {AArch64::CBNZW, AArch64::CBNZX}};

  unsigned Opc = OpcTable[IsNegative][Width == 64];

  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);

  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);

  return &*BranchMI;

}


bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(

    MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {

  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);

  assert(I.getOpcode() == TargetOpcode::G_BRCOND);

  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't

  // totally clean.  Some of them require two branches to implement.

  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();

  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,

                Pred);

  AArch64CC::CondCode CC1, CC2;

  changeFCMPPredToAArch64CC(Pred, CC1, CC2);

  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();

  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);

  if (CC2 != AArch64CC::AL)

    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(

    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {

  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);

  assert(I.getOpcode() == TargetOpcode::G_BRCOND);

  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.

  //

  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z

  // instructions will not be produced, as they are conditional branch

  // instructions that do not set flags.

  if (!ProduceNonFlagSettingCondBr)

    return false;


  MachineRegisterInfo &MRI = *MIB.getMRI();

  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();

  auto Pred =

      static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());

  Register LHS = ICmp.getOperand(2).getReg();

  Register RHS = ICmp.getOperand(3).getReg();


  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.

  auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);

  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);


  // When we can emit a TB(N)Z, prefer that.

  //

  // Handle non-commutative condition codes first.

  // Note that we don't want to do this when we have a G_AND because it can

  // become a tst. The tst will make the test bit in the TB(N)Z redundant.

  if (VRegAndVal && !AndInst) {

    int64_t C = VRegAndVal->Value.getSExtValue();


    // When we have a greater-than comparison, we can just test if the msb is

    // zero.

    if (C == -1 && Pred == CmpInst::ICMP_SGT) {

      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;

      emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);

      I.eraseFromParent();

      return true;

    }


    // When we have a less than comparison, we can just test if the msb is not

    // zero.

    if (C == 0 && Pred == CmpInst::ICMP_SLT) {

      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;

      emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);

      I.eraseFromParent();

      return true;

    }


    // Inversely, if we have a signed greater-than-or-equal comparison to zero,

    // we can test if the msb is zero.

    if (C == 0 && Pred == CmpInst::ICMP_SGE) {

      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;

      emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);

      I.eraseFromParent();

      return true;

    }

  }


  // Attempt to handle commutative condition codes. Right now, that's only

  // eq/ne.

  if (ICmpInst::isEquality(Pred)) {

    if (!VRegAndVal) {

      std::swap(RHS, LHS);

      VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);

      AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);

    }


    if (VRegAndVal && VRegAndVal->Value == 0) {

      // If there's a G_AND feeding into this branch, try to fold it away by

      // emitting a TB(N)Z instead.

      //

      // Note: If we have LT, then it *is* possible to fold, but it wouldn't be

      // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding

      // would be redundant.

      if (AndInst &&

          tryOptAndIntoCompareBranch(

              *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {

        I.eraseFromParent();

        return true;

      }


      // Otherwise, try to emit a CB(N)Z instead.

      auto LHSTy = MRI.getType(LHS);

      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {

        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);

        I.eraseFromParent();

        return true;

      }

    }

  }


  return false;

}


bool AArch64InstructionSelector::selectCompareBranchFedByICmp(

    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {

  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);

  assert(I.getOpcode() == TargetOpcode::G_BRCOND);

  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))

    return true;


  // Couldn't optimize. Emit a compare + a Bcc.

  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();

  auto &PredOp = ICmp.getOperand(1);

  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);

  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(

      static_cast<CmpInst::Predicate>(PredOp.getPredicate()),

      ICmp.getOperand(3).getReg(), MIB.getMRI());

  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectCompareBranch(

    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {

  Register CondReg = I.getOperand(0).getReg();

  MachineInstr *CCMI = MRI.getVRegDef(CondReg);

  // Try to select the G_BRCOND using whatever is feeding the condition if

  // possible.

  unsigned CCMIOpc = CCMI->getOpcode();

  if (CCMIOpc == TargetOpcode::G_FCMP)

    return selectCompareBranchFedByFCmp(I, *CCMI, MIB);

  if (CCMIOpc == TargetOpcode::G_ICMP)

    return selectCompareBranchFedByICmp(I, *CCMI, MIB);


  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z

  // instructions will not be produced, as they are conditional branch

  // instructions that do not set flags.

  if (ProduceNonFlagSettingCondBr) {

    emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,

                I.getOperand(1).getMBB(), MIB);

    I.eraseFromParent();

    return true;

  }


  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.

  auto TstMI =

      MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);

  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);

  auto Bcc = MIB.buildInstr(AArch64::Bcc)

                 .addImm(AArch64CC::NE)

                 .addMBB(I.getOperand(1).getMBB());

  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);

}


/// Returns the element immediate value of a vector shift operand if found.

/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.

static std::optional<int64_t> getVectorShiftImm(Register Reg,

                                                MachineRegisterInfo &MRI) {

  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");

  MachineInstr *OpMI = MRI.getVRegDef(Reg);

  return getAArch64VectorSplatScalar(*OpMI, MRI);

}


/// Matches and returns the shift immediate value for a SHL instruction given

/// a shift operand.

static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,

                                              MachineRegisterInfo &MRI) {

  std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);

  if (!ShiftImm)

    return std::nullopt;

  // Check the immediate is in range for a SHL.

  int64_t Imm = *ShiftImm;

  if (Imm < 0)

    return std::nullopt;

  switch (SrcTy.getElementType().getSizeInBits()) {

  default:

    LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");

    return std::nullopt;

  case 8:

    if (Imm > 7)

      return std::nullopt;

    break;

  case 16:

    if (Imm > 15)

      return std::nullopt;

    break;

  case 32:

    if (Imm > 31)

      return std::nullopt;

    break;

  case 64:

    if (Imm > 63)

      return std::nullopt;

    break;

  }

  return Imm;

}


bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,

                                                 MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_SHL);

  Register DstReg = I.getOperand(0).getReg();

  const LLT Ty = MRI.getType(DstReg);

  Register Src1Reg = I.getOperand(1).getReg();

  Register Src2Reg = I.getOperand(2).getReg();


  if (!Ty.isVector())

    return false;


  // Check if we have a vector of constants on RHS that we can select as the

  // immediate form.

  std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);


  unsigned Opc = 0;

  if (Ty == LLT::fixed_vector(2, 64)) {

    Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;

  } else if (Ty == LLT::fixed_vector(4, 32)) {

    Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;

  } else if (Ty == LLT::fixed_vector(2, 32)) {

    Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;

  } else if (Ty == LLT::fixed_vector(4, 16)) {

    Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;

  } else if (Ty == LLT::fixed_vector(8, 16)) {

    Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;

  } else if (Ty == LLT::fixed_vector(16, 8)) {

    Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;

  } else if (Ty == LLT::fixed_vector(8, 8)) {

    Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;

  } else {

    LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");

    return false;

  }


  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});

  if (ImmVal)

    Shl.addImm(*ImmVal);

  else

    Shl.addUse(Src2Reg);

  constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVectorAshrLshr(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_ASHR ||

         I.getOpcode() == TargetOpcode::G_LSHR);

  Register DstReg = I.getOperand(0).getReg();

  const LLT Ty = MRI.getType(DstReg);

  Register Src1Reg = I.getOperand(1).getReg();

  Register Src2Reg = I.getOperand(2).getReg();


  if (!Ty.isVector())

    return false;


  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;


  // We expect the immediate case to be lowered in the PostLegalCombiner to

  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.


  // There is not a shift right register instruction, but the shift left

  // register instruction takes a signed value, where negative numbers specify a

  // right shift.


  unsigned Opc = 0;

  unsigned NegOpc = 0;

  const TargetRegisterClass *RC =

      getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));

  if (Ty == LLT::fixed_vector(2, 64)) {

    Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;

    NegOpc = AArch64::NEGv2i64;

  } else if (Ty == LLT::fixed_vector(4, 32)) {

    Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;

    NegOpc = AArch64::NEGv4i32;

  } else if (Ty == LLT::fixed_vector(2, 32)) {

    Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;

    NegOpc = AArch64::NEGv2i32;

  } else if (Ty == LLT::fixed_vector(4, 16)) {

    Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;

    NegOpc = AArch64::NEGv4i16;

  } else if (Ty == LLT::fixed_vector(8, 16)) {

    Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;

    NegOpc = AArch64::NEGv8i16;

  } else if (Ty == LLT::fixed_vector(16, 8)) {

    Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;

    NegOpc = AArch64::NEGv16i8;

  } else if (Ty == LLT::fixed_vector(8, 8)) {

    Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;

    NegOpc = AArch64::NEGv8i8;

  } else {

    LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");

    return false;

  }


  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});

  constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);

  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});

  constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVaStartAAPCS(

    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {


  if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),

                             MF.getFunction().isVarArg()))

    return false;


  // The layout of the va_list struct is specified in the AArch64 Procedure Call

  // Standard, section 10.1.5.


  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

  const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;

  const auto *PtrRegClass =

      STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;


  const MCInstrDesc &MCIDAddAddr =

      TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);

  const MCInstrDesc &MCIDStoreAddr =

      TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);


  /*

   * typedef struct va_list {

   *  void * stack; // next stack param

   *  void * gr_top; // end of GP arg reg save area

   *  void * vr_top; // end of FP/SIMD arg reg save area

   *  int gr_offs; // offset from gr_top to next GP register arg

   *  int vr_offs; // offset from vr_top to next FP/SIMD register arg

   * } va_list;

   */

  const auto VAList = I.getOperand(0).getReg();


  // Our current offset in bytes from the va_list struct (VAList).

  unsigned OffsetBytes = 0;


  // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes

  // and increment OffsetBytes by PtrSize.

  const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {

    const Register Top = MRI.createVirtualRegister(PtrRegClass);

    auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)

                   .addDef(Top)

                   .addFrameIndex(FrameIndex)

                   .addImm(Imm)

                   .addImm(0);

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


    const auto *MMO = *I.memoperands_begin();

    MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)

              .addUse(Top)

              .addUse(VAList)

              .addImm(OffsetBytes / PtrSize)

              .addMemOperand(MF.getMachineMemOperand(

                  MMO->getPointerInfo().getWithOffset(OffsetBytes),

                  MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


    OffsetBytes += PtrSize;

  };


  // void* stack at offset 0

  PushAddress(FuncInfo->getVarArgsStackIndex(), 0);


  // void* gr_top at offset 8 (4 on ILP32)

  const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();

  PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);


  // void* vr_top at offset 16 (8 on ILP32)

  const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();

  PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);


  // Helper function to store a 4-byte integer constant to VAList at offset

  // OffsetBytes, and increment OffsetBytes by 4.

  const auto PushIntConstant = [&](const int32_t Value) {

    constexpr int IntSize = 4;

    const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

    auto MIB =

        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))

            .addDef(Temp)

            .addImm(Value);

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


    const auto *MMO = *I.memoperands_begin();

    MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))

              .addUse(Temp)

              .addUse(VAList)

              .addImm(OffsetBytes / IntSize)

              .addMemOperand(MF.getMachineMemOperand(

                  MMO->getPointerInfo().getWithOffset(OffsetBytes),

                  MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);

    OffsetBytes += IntSize;

  };


  // int gr_offs at offset 24 (12 on ILP32)

  PushIntConstant(-static_cast<int32_t>(GPRSize));


  // int vr_offs at offset 28 (16 on ILP32)

  PushIntConstant(-static_cast<int32_t>(FPRSize));


  assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVaStartDarwin(

    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {

  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

  Register ListReg = I.getOperand(0).getReg();


  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);


  int FrameIdx = FuncInfo->getVarArgsStackIndex();

  if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(

          MF.getFunction().getCallingConv(), MF.getFunction().isVarArg())) {

    FrameIdx = FuncInfo->getVarArgsGPRSize() > 0

                   ? FuncInfo->getVarArgsGPRIndex()

                   : FuncInfo->getVarArgsStackIndex();

  }


  auto MIB =

      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))

          .addDef(ArgsAddrReg)

          .addFrameIndex(FrameIdx)

          .addImm(0)

          .addImm(0);


  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))

            .addUse(ArgsAddrReg)

            .addUse(ListReg)

            .addImm(0)

            .addMemOperand(*I.memoperands_begin());


  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


void AArch64InstructionSelector::materializeLargeCMVal(

    MachineInstr &I, const Value *V, unsigned OpFlags) {

  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});

  MovZ->addOperand(MF, I.getOperand(1));

  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |

                                     AArch64II::MO_NC);

  MovZ->addOperand(MF, MachineOperand::CreateImm(0));

  constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);


  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,

                       Register ForceDstReg) {

    Register DstReg = ForceDstReg

                          ? ForceDstReg

                          : MRI.createVirtualRegister(&AArch64::GPR64RegClass);

    auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);

    if (auto *GV = dyn_cast<GlobalValue>(V)) {

      MovI->addOperand(MF, MachineOperand::CreateGA(

                               GV, MovZ->getOperand(1).getOffset(), Flags));

    } else {

      MovI->addOperand(

          MF, MachineOperand::CreateBA(cast<BlockAddress>(V),

                                       MovZ->getOperand(1).getOffset(), Flags));

    }

    MovI->addOperand(MF, MachineOperand::CreateImm(Offset));

    constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);

    return DstReg;

  };

  Register DstReg = BuildMovK(MovZ.getReg(0),

                              AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);

  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);

  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());

}


bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {

  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  switch (I.getOpcode()) {

  case TargetOpcode::G_STORE: {

    bool Changed = contractCrossBankCopyIntoStore(I, MRI);

    MachineOperand &SrcOp = I.getOperand(0);

    if (MRI.getType(SrcOp.getReg()).isPointer()) {

      // Allow matching with imported patterns for stores of pointers. Unlike

      // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy

      // and constrain.

      auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);

      Register NewSrc = Copy.getReg(0);

      SrcOp.setReg(NewSrc);

      RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);

      Changed = true;

    }

    return Changed;

  }

  case TargetOpcode::G_PTR_ADD: {

    // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer

    // arithmetic semantics instead of falling back to regular arithmetic.

    const auto &TL = STI.getTargetLowering();

    if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))

      return false;

    return convertPtrAddToAdd(I, MRI);

  }

  case TargetOpcode::G_LOAD: {

    // For scalar loads of pointers, we try to convert the dest type from p0

    // to s64 so that our imported patterns can match. Like with the G_PTR_ADD

    // conversion, this should be ok because all users should have been

    // selected already, so the type doesn't matter for them.

    Register DstReg = I.getOperand(0).getReg();

    const LLT DstTy = MRI.getType(DstReg);

    if (!DstTy.isPointer())

      return false;

    MRI.setType(DstReg, LLT::scalar(64));

    return true;

  }

  case AArch64::G_DUP: {

    // Convert the type from p0 to s64 to help selection.

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    if (!DstTy.isPointerVector())

      return false;

    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());

    MRI.setType(I.getOperand(0).getReg(),

                DstTy.changeElementType(LLT::scalar(64)));

    MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);

    I.getOperand(1).setReg(NewSrc.getReg(0));

    return true;

  }

  case AArch64::G_INSERT_VECTOR_ELT: {

    // Convert the type from p0 to s64 to help selection.

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());

    if (!SrcVecTy.isPointerVector())

      return false;

    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());

    MRI.setType(I.getOperand(1).getReg(),

                DstTy.changeElementType(LLT::scalar(64)));

    MRI.setType(I.getOperand(0).getReg(),

                DstTy.changeElementType(LLT::scalar(64)));

    MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);

    I.getOperand(2).setReg(NewSrc.getReg(0));

    return true;

  }

  case TargetOpcode::G_UITOFP:

  case TargetOpcode::G_SITOFP: {

    // If both source and destination regbanks are FPR, then convert the opcode

    // to G_SITOF so that the importer can select it to an fpr variant.

    // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank

    // copy.

    Register SrcReg = I.getOperand(1).getReg();

    LLT SrcTy = MRI.getType(SrcReg);

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())

      return false;


    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {

      if (I.getOpcode() == TargetOpcode::G_SITOFP)

        I.setDesc(TII.get(AArch64::G_SITOF));

      else

        I.setDesc(TII.get(AArch64::G_UITOF));

      return true;

    }

    return false;

  }

  default:

    return false;

  }

}


/// This lowering tries to look for G_PTR_ADD instructions and then converts

/// them to a standard G_ADD with a COPY on the source.

///

/// The motivation behind this is to expose the add semantics to the imported

/// tablegen patterns. We shouldn't need to check for uses being loads/stores,

/// because the selector works bottom up, uses before defs. By the time we

/// end up trying to select a G_PTR_ADD, we should have already attempted to

/// fold this into addressing modes and were therefore unsuccessful.

bool AArch64InstructionSelector::convertPtrAddToAdd(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");

  Register DstReg = I.getOperand(0).getReg();

  Register AddOp1Reg = I.getOperand(1).getReg();

  const LLT PtrTy = MRI.getType(DstReg);

  if (PtrTy.getAddressSpace() != 0)

    return false;


  const LLT CastPtrTy =

      PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);

  auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);

  // Set regbanks on the registers.

  if (PtrTy.isVector())

    MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));

  else

    MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));


  // Now turn the %dst(p0) = G_PTR_ADD %base, off into:

  // %dst(intty) = G_ADD %intbase, off

  I.setDesc(TII.get(TargetOpcode::G_ADD));

  MRI.setType(DstReg, CastPtrTy);

  I.getOperand(1).setReg(PtrToInt.getReg(0));

  if (!select(*PtrToInt)) {

    LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");

    return false;

  }


  // Also take the opportunity here to try to do some optimization.

  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.

  Register NegatedReg;

  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))

    return true;

  I.getOperand(2).setReg(NegatedReg);

  I.setDesc(TII.get(TargetOpcode::G_SUB));

  return true;

}


bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,

                                                MachineRegisterInfo &MRI) {

  // We try to match the immediate variant of LSL, which is actually an alias

  // for a special case of UBFM. Otherwise, we fall back to the imported

  // selector which will match the register variant.

  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");

  const auto &MO = I.getOperand(2);

  auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);

  if (!VRegAndVal)

    return false;


  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  if (DstTy.isVector())

    return false;

  bool Is64Bit = DstTy.getSizeInBits() == 64;

  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);

  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);


  if (!Imm1Fn || !Imm2Fn)

    return false;


  auto NewI =

      MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,

                     {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});


  for (auto &RenderFn : *Imm1Fn)

    RenderFn(NewI);

  for (auto &RenderFn : *Imm2Fn)

    RenderFn(NewI);


  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);

}


bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");

  // If we're storing a scalar, it doesn't matter what register bank that

  // scalar is on. All that matters is the size.

  //

  // So, if we see something like this (with a 32-bit scalar as an example):

  //

  // %x:gpr(s32) = ... something ...

  // %y:fpr(s32) = COPY %x:gpr(s32)

  // G_STORE %y:fpr(s32)

  //

  // We can fix this up into something like this:

  //

  // G_STORE %x:gpr(s32)

  //

  // And then continue the selection process normally.

  Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);

  if (!DefDstReg.isValid())

    return false;

  LLT DefDstTy = MRI.getType(DefDstReg);

  Register StoreSrcReg = I.getOperand(0).getReg();

  LLT StoreSrcTy = MRI.getType(StoreSrcReg);


  // If we get something strange like a physical register, then we shouldn't

  // go any further.

  if (!DefDstTy.isValid())

    return false;


  // Are the source and dst types the same size?

  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())

    return false;


  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==

      RBI.getRegBank(DefDstReg, MRI, TRI))

    return false;


  // We have a cross-bank copy, which is entering a store. Let's fold it.

  I.getOperand(0).setReg(DefDstReg);

  return true;

}


bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {

  assert(I.getParent() && "Instruction should be in a basic block!");

  assert(I.getParent()->getParent() && "Instruction should be in a function!");


  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  switch (I.getOpcode()) {

  case AArch64::G_DUP: {

    // Before selecting a DUP instruction, check if it is better selected as a

    // MOV or load from a constant pool.

    Register Src = I.getOperand(1).getReg();

    auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);

    if (!ValAndVReg)

      return false;

    LLVMContext &Ctx = MF.getFunction().getContext();

    Register Dst = I.getOperand(0).getReg();

    auto *CV = ConstantDataVector::getSplat(

        MRI.getType(Dst).getNumElements(),

        ConstantInt::get(

            Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),

            ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));

    if (!emitConstantVector(Dst, CV, MIB, MRI))

      return false;

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_SEXT:

    // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV

    // over a normal extend.

    if (selectUSMovFromExtend(I, MRI))

      return true;

    return false;

  case TargetOpcode::G_BR:

    return false;

  case TargetOpcode::G_SHL:

    return earlySelectSHL(I, MRI);

  case TargetOpcode::G_CONSTANT: {

    bool IsZero = false;

    if (I.getOperand(1).isCImm())

      IsZero = I.getOperand(1).getCImm()->isZero();

    else if (I.getOperand(1).isImm())

      IsZero = I.getOperand(1).getImm() == 0;


    if (!IsZero)

      return false;


    Register DefReg = I.getOperand(0).getReg();

    LLT Ty = MRI.getType(DefReg);

    if (Ty.getSizeInBits() == 64) {

      I.getOperand(1).ChangeToRegister(AArch64::XZR, false);

      RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);

    } else if (Ty.getSizeInBits() == 32) {

      I.getOperand(1).ChangeToRegister(AArch64::WZR, false);

      RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);

    } else

      return false;


    I.setDesc(TII.get(TargetOpcode::COPY));

    return true;

  }


  case TargetOpcode::G_ADD: {

    // Check if this is being fed by a G_ICMP on either side.

    //

    // (cmp pred, x, y) + z

    //

    // In the above case, when the cmp is true, we increment z by 1. So, we can

    // fold the add into the cset for the cmp by using cinc.

    //

    // FIXME: This would probably be a lot nicer in PostLegalizerLowering.

    Register AddDst = I.getOperand(0).getReg();

    Register AddLHS = I.getOperand(1).getReg();

    Register AddRHS = I.getOperand(2).getReg();

    // Only handle scalars.

    LLT Ty = MRI.getType(AddLHS);

    if (Ty.isVector())

      return false;

    // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64

    // bits.

    unsigned Size = Ty.getSizeInBits();

    if (Size != 32 && Size != 64)

      return false;

    auto MatchCmp = [&](Register Reg) -> MachineInstr * {

      if (!MRI.hasOneNonDBGUse(Reg))

        return nullptr;

      // If the LHS of the add is 32 bits, then we want to fold a 32-bit

      // compare.

      if (Size == 32)

        return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);

      // We model scalar compares using 32-bit destinations right now.

      // If it's a 64-bit compare, it'll have 64-bit sources.

      Register ZExt;

      if (!mi_match(Reg, MRI,

                    m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))

        return nullptr;

      auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);

      if (!Cmp ||

          MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)

        return nullptr;

      return Cmp;

    };

    // Try to match

    // z + (cmp pred, x, y)

    MachineInstr *Cmp = MatchCmp(AddRHS);

    if (!Cmp) {

      // (cmp pred, x, y) + z

      std::swap(AddLHS, AddRHS);

      Cmp = MatchCmp(AddRHS);

      if (!Cmp)

        return false;

    }

    auto &PredOp = Cmp->getOperand(1);

    MIB.setInstrAndDebugLoc(I);

    emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),

                       /*RHS=*/Cmp->getOperand(3), PredOp, MIB);

    auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());

    const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(

        CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);

    emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_OR: {

    // Look for operations that take the lower `Width=Size-ShiftImm` bits of

    // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via

    // shifting and masking that we can replace with a BFI (encoded as a BFM).

    Register Dst = I.getOperand(0).getReg();

    LLT Ty = MRI.getType(Dst);


    if (!Ty.isScalar())

      return false;


    unsigned Size = Ty.getSizeInBits();

    if (Size != 32 && Size != 64)

      return false;


    Register ShiftSrc;

    int64_t ShiftImm;

    Register MaskSrc;

    int64_t MaskImm;

    if (!mi_match(

            Dst, MRI,

            m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),

                  m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))

      return false;


    if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))

      return false;


    int64_t Immr = Size - ShiftImm;

    int64_t Imms = Size - ShiftImm - 1;

    unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;

    emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_FENCE: {

    if (I.getOperand(1).getImm() == 0)

      BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));

    else

      BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))

          .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);

    I.eraseFromParent();

    return true;

  }

  default:

    return false;

  }

}


bool AArch64InstructionSelector::select(MachineInstr &I) {

  assert(I.getParent() && "Instruction should be in a basic block!");

  assert(I.getParent()->getParent() && "Instruction should be in a function!");


  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();

  if (Subtarget->requiresStrictAlign()) {

    // We don't support this feature yet.

    LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");

    return false;

  }


  MIB.setInstrAndDebugLoc(I);


  unsigned Opcode = I.getOpcode();

  // G_PHI requires same handling as PHI

  if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {

    // Certain non-generic instructions also need some special handling.


    if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);


    if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {

      const Register DefReg = I.getOperand(0).getReg();

      const LLT DefTy = MRI.getType(DefReg);


      const RegClassOrRegBank &RegClassOrBank =

        MRI.getRegClassOrRegBank(DefReg);


      const TargetRegisterClass *DefRC =

          dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

      if (!DefRC) {

        if (!DefTy.isValid()) {

          LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");

          return false;

        }

        const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);

        DefRC = getRegClassForTypeOnBank(DefTy, RB);

        if (!DefRC) {

          LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");

          return false;

        }

      }


      I.setDesc(TII.get(TargetOpcode::PHI));


      return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);

    }


    if (I.isCopy())

      return selectCopy(I, TII, MRI, TRI, RBI);


    if (I.isDebugInstr())

      return selectDebugInstr(I, MRI, RBI);


    return true;

  }


  if (I.getNumOperands() != I.getNumExplicitOperands()) {

    LLVM_DEBUG(

        dbgs() << "Generic instruction has unexpected implicit operands\n");

    return false;

  }


  // Try to do some lowering before we start instruction selecting. These

  // lowerings are purely transformations on the input G_MIR and so selection

  // must continue after any modification of the instruction.

  if (preISelLower(I)) {

    Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.

  }


  // There may be patterns where the importer can't deal with them optimally,

  // but does select it to a suboptimal sequence so our custom C++ selection

  // code later never has a chance to work on it. Therefore, we have an early

  // selection attempt here to give priority to certain selection routines

  // over the imported ones.

  if (earlySelect(I))

    return true;


  if (selectImpl(I, *CoverageInfo))

    return true;


  LLT Ty =

      I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};


  switch (Opcode) {

  case TargetOpcode::G_SBFX:

  case TargetOpcode::G_UBFX: {

    static const unsigned OpcTable[2][2] = {

        {AArch64::UBFMWri, AArch64::UBFMXri},

        {AArch64::SBFMWri, AArch64::SBFMXri}};

    bool IsSigned = Opcode == TargetOpcode::G_SBFX;

    unsigned Size = Ty.getSizeInBits();

    unsigned Opc = OpcTable[IsSigned][Size == 64];

    auto Cst1 =

        getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);

    assert(Cst1 && "Should have gotten a constant for src 1?");

    auto Cst2 =

        getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);

    assert(Cst2 && "Should have gotten a constant for src 2?");

    auto LSB = Cst1->Value.getZExtValue();

    auto Width = Cst2->Value.getZExtValue();

    auto BitfieldInst =

        MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})

            .addImm(LSB)

            .addImm(LSB + Width - 1);

    I.eraseFromParent();

    return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);

  }

  case TargetOpcode::G_BRCOND:

    return selectCompareBranch(I, MF, MRI);


  case TargetOpcode::G_BRINDIRECT: {

    const Function &Fn = MF.getFunction();

    if (std::optional<uint16_t> BADisc =

            STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {

      auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});

      MI.addImm(AArch64PACKey::IA);

      MI.addImm(*BADisc);

      MI.addReg(/*AddrDisc=*/AArch64::XZR);

      I.eraseFromParent();

      return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);

    }

    I.setDesc(TII.get(AArch64::BR));

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_BRJT:

    return selectBrJT(I, MRI);


  case AArch64::G_ADD_LOW: {

    // This op may have been separated from it's ADRP companion by the localizer

    // or some other code motion pass. Given that many CPUs will try to

    // macro fuse these operations anyway, select this into a MOVaddr pseudo

    // which will later be expanded into an ADRP+ADD pair after scheduling.

    MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());

    if (BaseMI->getOpcode() != AArch64::ADRP) {

      I.setDesc(TII.get(AArch64::ADDXri));

      I.addOperand(MachineOperand::CreateImm(0));

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    }

    assert(TM.getCodeModel() == CodeModel::Small &&

           "Expected small code model");

    auto Op1 = BaseMI->getOperand(1);

    auto Op2 = I.getOperand(2);

    auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})

                       .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),

                                         Op1.getTargetFlags())

                       .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),

                                         Op2.getTargetFlags());

    I.eraseFromParent();

    return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);

  }


  case TargetOpcode::G_FCONSTANT:

  case TargetOpcode::G_CONSTANT: {

    const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;


    const LLT s8 = LLT::scalar(8);

    const LLT s16 = LLT::scalar(16);

    const LLT s32 = LLT::scalar(32);

    const LLT s64 = LLT::scalar(64);

    const LLT s128 = LLT::scalar(128);

    const LLT p0 = LLT::pointer(0, 64);


    const Register DefReg = I.getOperand(0).getReg();

    const LLT DefTy = MRI.getType(DefReg);

    const unsigned DefSize = DefTy.getSizeInBits();

    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);


    // FIXME: Redundant check, but even less readable when factored out.

    if (isFP) {

      if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {

        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty

                          << " constant, expected: " << s16 << " or " << s32

                          << " or " << s64 << " or " << s128 << '\n');

        return false;

      }


      if (RB.getID() != AArch64::FPRRegBankID) {

        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty

                          << " constant on bank: " << RB

                          << ", expected: FPR\n");

        return false;

      }


      // The case when we have 0.0 is covered by tablegen. Reject it here so we

      // can be sure tablegen works correctly and isn't rescued by this code.

      // 0.0 is not covered by tablegen for FP128. So we will handle this

      // scenario in the code here.

      if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))

        return false;

    } else {

      // s32 and s64 are covered by tablegen.

      if (Ty != p0 && Ty != s8 && Ty != s16) {

        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty

                          << " constant, expected: " << s32 << ", " << s64

                          << ", or " << p0 << '\n');

        return false;

      }


      if (RB.getID() != AArch64::GPRRegBankID) {

        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty

                          << " constant on bank: " << RB

                          << ", expected: GPR\n");

        return false;

      }

    }


    if (isFP) {

      const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);

      // For 16, 64, and 128b values, emit a constant pool load.

      switch (DefSize) {

      default:

        llvm_unreachable("Unexpected destination size for G_FCONSTANT?");

      case 32:

      case 64: {

        bool OptForSize = shouldOptForSize(&MF);

        const auto &TLI = MF.getSubtarget().getTargetLowering();

        // If TLI says that this fpimm is illegal, then we'll expand to a

        // constant pool load.

        if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),

                              EVT::getFloatingPointVT(DefSize), OptForSize))

          break;

        [[fallthrough]];

      }

      case 16:

      case 128: {

        auto *FPImm = I.getOperand(1).getFPImm();

        auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);

        if (!LoadMI) {

          LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");

          return false;

        }

        MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});

        I.eraseFromParent();

        return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);

      }

      }


      assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");

      // Either emit a FMOV, or emit a copy to emit a normal mov.

      const Register DefGPRReg = MRI.createVirtualRegister(

          DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);

      MachineOperand &RegOp = I.getOperand(0);

      RegOp.setReg(DefGPRReg);

      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));

      MIB.buildCopy({DefReg}, {DefGPRReg});


      if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {

        LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");

        return false;

      }


      MachineOperand &ImmOp = I.getOperand(1);

      // FIXME: Is going through int64_t always correct?

      ImmOp.ChangeToImmediate(

          ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());

    } else if (I.getOperand(1).isCImm()) {

      uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();

      I.getOperand(1).ChangeToImmediate(Val);

    } else if (I.getOperand(1).isImm()) {

      uint64_t Val = I.getOperand(1).getImm();

      I.getOperand(1).ChangeToImmediate(Val);

    }


    const unsigned MovOpc =

        DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;

    I.setDesc(TII.get(MovOpc));

    constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    return true;

  }

  case TargetOpcode::G_EXTRACT: {

    Register DstReg = I.getOperand(0).getReg();

    Register SrcReg = I.getOperand(1).getReg();

    LLT SrcTy = MRI.getType(SrcReg);

    LLT DstTy = MRI.getType(DstReg);

    (void)DstTy;

    unsigned SrcSize = SrcTy.getSizeInBits();


    if (SrcTy.getSizeInBits() > 64) {

      // This should be an extract of an s128, which is like a vector extract.

      if (SrcTy.getSizeInBits() != 128)

        return false;

      // Only support extracting 64 bits from an s128 at the moment.

      if (DstTy.getSizeInBits() != 64)

        return false;


      unsigned Offset = I.getOperand(2).getImm();

      if (Offset % 64 != 0)

        return false;


      // Check we have the right regbank always.

      const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);

      const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

      assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");


      if (SrcRB.getID() == AArch64::GPRRegBankID) {

        auto NewI =

            MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})

                .addUse(SrcReg, 0,

                        Offset == 0 ? AArch64::sube64 : AArch64::subo64);

        constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,

                                 AArch64::GPR64RegClass, NewI->getOperand(0));

        I.eraseFromParent();

        return true;

      }


      // Emit the same code as a vector extract.

      // Offset must be a multiple of 64.

      unsigned LaneIdx = Offset / 64;

      MachineInstr *Extract = emitExtractVectorElt(

          DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);

      if (!Extract)

        return false;

      I.eraseFromParent();

      return true;

    }


    I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));

    MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +

                                      Ty.getSizeInBits() - 1);


    if (SrcSize < 64) {

      assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&

             "unexpected G_EXTRACT types");

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    }


    DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));

    MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));

    MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})

        .addReg(DstReg, 0, AArch64::sub_32);

    RBI.constrainGenericRegister(I.getOperand(0).getReg(),

                                 AArch64::GPR32RegClass, MRI);

    I.getOperand(0).setReg(DstReg);


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_INSERT: {

    LLT SrcTy = MRI.getType(I.getOperand(2).getReg());

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    unsigned DstSize = DstTy.getSizeInBits();

    // Larger inserts are vectors, same-size ones should be something else by

    // now (split up or turned into COPYs).

    if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)

      return false;


    I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));

    unsigned LSB = I.getOperand(3).getImm();

    unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();

    I.getOperand(3).setImm((DstSize - LSB) % DstSize);

    MachineInstrBuilder(MF, I).addImm(Width - 1);


    if (DstSize < 64) {

      assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&

             "unexpected G_INSERT types");

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    }


    Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));

    BuildMI(MBB, I.getIterator(), I.getDebugLoc(),

            TII.get(AArch64::SUBREG_TO_REG))

        .addDef(SrcReg)

        .addImm(0)

        .addUse(I.getOperand(2).getReg())

        .addImm(AArch64::sub_32);

    RBI.constrainGenericRegister(I.getOperand(2).getReg(),

                                 AArch64::GPR32RegClass, MRI);

    I.getOperand(2).setReg(SrcReg);


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }

  case TargetOpcode::G_FRAME_INDEX: {

    // allocas and G_FRAME_INDEX are only supported in addrspace(0).

    if (Ty != LLT::pointer(0, 64)) {

      LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty

                        << ", expected: " << LLT::pointer(0, 64) << '\n');

      return false;

    }

    I.setDesc(TII.get(AArch64::ADDXri));


    // MOs for a #0 shifted immediate.

    I.addOperand(MachineOperand::CreateImm(0));

    I.addOperand(MachineOperand::CreateImm(0));


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_GLOBAL_VALUE: {

    const GlobalValue *GV = nullptr;

    unsigned OpFlags;

    if (I.getOperand(1).isSymbol()) {

      OpFlags = I.getOperand(1).getTargetFlags();

      // Currently only used by "RtLibUseGOT".

      assert(OpFlags == AArch64II::MO_GOT);

    } else {

      GV = I.getOperand(1).getGlobal();

      if (GV->isThreadLocal()) {

        // We don't support instructions with emulated TLS variables yet

        if (TM.useEmulatedTLS())

          return false;

        return selectTLSGlobalValue(I, MRI);

      }

      OpFlags = STI.ClassifyGlobalReference(GV, TM);

    }


    if (OpFlags & AArch64II::MO_GOT) {

      I.setDesc(TII.get(MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()

                            ? AArch64::LOADgotAUTH

                            : AArch64::LOADgot));

      I.getOperand(1).setTargetFlags(OpFlags);

    } else if (TM.getCodeModel() == CodeModel::Large &&

               !TM.isPositionIndependent()) {

      // Materialize the global using movz/movk instructions.

      materializeLargeCMVal(I, GV, OpFlags);

      I.eraseFromParent();

      return true;

    } else if (TM.getCodeModel() == CodeModel::Tiny) {

      I.setDesc(TII.get(AArch64::ADR));

      I.getOperand(1).setTargetFlags(OpFlags);

    } else {

      I.setDesc(TII.get(AArch64::MOVaddr));

      I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);

      MachineInstrBuilder MIB(MF, I);

      MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),

                           OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);

    }

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:

    return selectPtrAuthGlobalValue(I, MRI);


  case TargetOpcode::G_ZEXTLOAD:

  case TargetOpcode::G_LOAD:

  case TargetOpcode::G_STORE: {

    GLoadStore &LdSt = cast<GLoadStore>(I);

    bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;

    LLT PtrTy = MRI.getType(LdSt.getPointerReg());


    // Can only handle AddressSpace 0, 64-bit pointers.

    if (PtrTy != LLT::pointer(0, 64)) {

      return false;

    }


    uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();

    unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();

    AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();


    // Need special instructions for atomics that affect ordering.

    if (Order != AtomicOrdering::NotAtomic &&

        Order != AtomicOrdering::Unordered &&

        Order != AtomicOrdering::Monotonic) {

      assert(!isa<GZExtLoad>(LdSt));

      assert(MemSizeInBytes <= 8 &&

             "128-bit atomics should already be custom-legalized");


      if (isa<GLoad>(LdSt)) {

        static constexpr unsigned LDAPROpcodes[] = {

            AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};

        static constexpr unsigned LDAROpcodes[] = {

            AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};

        ArrayRef<unsigned> Opcodes =

            STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent

                ? LDAPROpcodes

                : LDAROpcodes;

        I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));

      } else {

        static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,

                                               AArch64::STLRW, AArch64::STLRX};

        Register ValReg = LdSt.getReg(0);

        if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {

          // Emit a subreg copy of 32 bits.

          Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

          MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})

              .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);

          I.getOperand(0).setReg(NewVal);

        }

        I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));

      }

      constrainSelectedInstRegOperands(I, TII, TRI, RBI);

      return true;

    }


#ifndef NDEBUG

    const Register PtrReg = LdSt.getPointerReg();

    const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);

    // Check that the pointer register is valid.

    assert(PtrRB.getID() == AArch64::GPRRegBankID &&

           "Load/Store pointer operand isn't a GPR");

    assert(MRI.getType(PtrReg).isPointer() &&

           "Load/Store pointer operand isn't a pointer");

#endif


    const Register ValReg = LdSt.getReg(0);

    const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);

    LLT ValTy = MRI.getType(ValReg);


    // The code below doesn't support truncating stores, so we need to split it

    // again.

    if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {

      unsigned SubReg;

      LLT MemTy = LdSt.getMMO().getMemoryType();

      auto *RC = getRegClassForTypeOnBank(MemTy, RB);

      if (!getSubRegForClass(RC, TRI, SubReg))

        return false;


      // Generate a subreg copy.

      auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})

                      .addReg(ValReg, 0, SubReg)

                      .getReg(0);

      RBI.constrainGenericRegister(Copy, *RC, MRI);

      LdSt.getOperand(0).setReg(Copy);

    } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {

      // If this is an any-extending load from the FPR bank, split it into a regular

      // load + extend.

      if (RB.getID() == AArch64::FPRRegBankID) {

        unsigned SubReg;

        LLT MemTy = LdSt.getMMO().getMemoryType();

        auto *RC = getRegClassForTypeOnBank(MemTy, RB);

        if (!getSubRegForClass(RC, TRI, SubReg))

          return false;

        Register OldDst = LdSt.getReg(0);

        Register NewDst =

            MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());

        LdSt.getOperand(0).setReg(NewDst);

        MRI.setRegBank(NewDst, RB);

        // Generate a SUBREG_TO_REG to extend it.

        MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));

        MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})

            .addImm(0)

            .addUse(NewDst)

            .addImm(SubReg);

        auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);

        RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);

        MIB.setInstr(LdSt);

        ValTy = MemTy; // This is no longer an extending load.

      }

    }


    // Helper lambda for partially selecting I. Either returns the original

    // instruction with an updated opcode, or a new instruction.

    auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {

      bool IsStore = isa<GStore>(I);

      const unsigned NewOpc =

          selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);

      if (NewOpc == I.getOpcode())

        return nullptr;

      // Check if we can fold anything into the addressing mode.

      auto AddrModeFns =

          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);

      if (!AddrModeFns) {

        // Can't fold anything. Use the original instruction.

        I.setDesc(TII.get(NewOpc));

        I.addOperand(MachineOperand::CreateImm(0));

        return &I;

      }


      // Folded something. Create a new instruction and return it.

      auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());

      Register CurValReg = I.getOperand(0).getReg();

      IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);

      NewInst.cloneMemRefs(I);

      for (auto &Fn : *AddrModeFns)

        Fn(NewInst);

      I.eraseFromParent();

      return &*NewInst;

    };


    MachineInstr *LoadStore = SelectLoadStoreAddressingMode();

    if (!LoadStore)

      return false;


    // If we're storing a 0, use WZR/XZR.

    if (Opcode == TargetOpcode::G_STORE) {

      auto CVal = getIConstantVRegValWithLookThrough(

          LoadStore->getOperand(0).getReg(), MRI);

      if (CVal && CVal->Value == 0) {

        switch (LoadStore->getOpcode()) {

        case AArch64::STRWui:

        case AArch64::STRHHui:

        case AArch64::STRBBui:

          LoadStore->getOperand(0).setReg(AArch64::WZR);

          break;

        case AArch64::STRXui:

          LoadStore->getOperand(0).setReg(AArch64::XZR);

          break;

        }

      }

    }


    if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&

                       ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {

      // The any/zextload from a smaller type to i32 should be handled by the

      // importer.

      if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)

        return false;

      // If we have an extending load then change the load's type to be a

      // narrower reg and zero_extend with SUBREG_TO_REG.

      Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

      Register DstReg = LoadStore->getOperand(0).getReg();

      LoadStore->getOperand(0).setReg(LdReg);


      MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));

      MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})

          .addImm(0)

          .addUse(LdReg)

          .addImm(AArch64::sub_32);

      constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);

      return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,

                                          MRI);

    }

    return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);

  }


  case TargetOpcode::G_INDEXED_ZEXTLOAD:

  case TargetOpcode::G_INDEXED_SEXTLOAD:

    return selectIndexedExtLoad(I, MRI);

  case TargetOpcode::G_INDEXED_LOAD:

    return selectIndexedLoad(I, MRI);

  case TargetOpcode::G_INDEXED_STORE:

    return selectIndexedStore(cast<GIndexedStore>(I), MRI);


  case TargetOpcode::G_LSHR:

  case TargetOpcode::G_ASHR:

    if (MRI.getType(I.getOperand(0).getReg()).isVector())

      return selectVectorAshrLshr(I, MRI);

    [[fallthrough]];

  case TargetOpcode::G_SHL:

    if (Opcode == TargetOpcode::G_SHL &&

        MRI.getType(I.getOperand(0).getReg()).isVector())

      return selectVectorSHL(I, MRI);


    // These shifts were legalized to have 64 bit shift amounts because we

    // want to take advantage of the selection patterns that assume the

    // immediates are s64s, however, selectBinaryOp will assume both operands

    // will have the same bit size.

    {

      Register SrcReg = I.getOperand(1).getReg();

      Register ShiftReg = I.getOperand(2).getReg();

      const LLT ShiftTy = MRI.getType(ShiftReg);

      const LLT SrcTy = MRI.getType(SrcReg);

      if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&

          ShiftTy.getSizeInBits() == 64) {

        assert(!ShiftTy.isVector() && "unexpected vector shift ty");

        // Insert a subregister copy to implement a 64->32 trunc

        auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})

                         .addReg(ShiftReg, 0, AArch64::sub_32);

        MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));

        I.getOperand(2).setReg(Trunc.getReg(0));

      }

    }

    [[fallthrough]];

  case TargetOpcode::G_OR: {

    // Reject the various things we don't support yet.

    if (unsupportedBinOp(I, RBI, MRI, TRI))

      return false;


    const unsigned OpSize = Ty.getSizeInBits();


    const Register DefReg = I.getOperand(0).getReg();

    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);


    const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);

    if (NewOpc == I.getOpcode())

      return false;


    I.setDesc(TII.get(NewOpc));

    // FIXME: Should the type be always reset in setDesc?


    // Now that we selected an opcode, we need to constrain the register

    // operands to use appropriate classes.

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_PTR_ADD: {

    emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);

    I.eraseFromParent();

    return true;

  }


  case TargetOpcode::G_SADDE:

  case TargetOpcode::G_UADDE:

  case TargetOpcode::G_SSUBE:

  case TargetOpcode::G_USUBE:

  case TargetOpcode::G_SADDO:

  case TargetOpcode::G_UADDO:

  case TargetOpcode::G_SSUBO:

  case TargetOpcode::G_USUBO:

    return selectOverflowOp(I, MRI);


  case TargetOpcode::G_PTRMASK: {

    Register MaskReg = I.getOperand(2).getReg();

    std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);

    // TODO: Implement arbitrary cases

    if (!MaskVal || !isShiftedMask_64(*MaskVal))

      return false;


    uint64_t Mask = *MaskVal;

    I.setDesc(TII.get(AArch64::ANDXri));

    I.getOperand(2).ChangeToImmediate(

        AArch64_AM::encodeLogicalImmediate(Mask, 64));


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }

  case TargetOpcode::G_PTRTOINT:

  case TargetOpcode::G_TRUNC: {

    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());


    const Register DstReg = I.getOperand(0).getReg();

    const Register SrcReg = I.getOperand(1).getReg();


    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

    const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);


    if (DstRB.getID() != SrcRB.getID()) {

      LLVM_DEBUG(

          dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");

      return false;

    }


    if (DstRB.getID() == AArch64::GPRRegBankID) {

      const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);

      if (!DstRC)

        return false;


      const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);

      if (!SrcRC)

        return false;


      if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||

          !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {

        LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");

        return false;

      }


      if (DstRC == SrcRC) {

        // Nothing to be done

      } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&

                 SrcTy == LLT::scalar(64)) {

        llvm_unreachable("TableGen can import this case");

        return false;

      } else if (DstRC == &AArch64::GPR32RegClass &&

                 SrcRC == &AArch64::GPR64RegClass) {

        I.getOperand(1).setSubReg(AArch64::sub_32);

      } else {

        LLVM_DEBUG(

            dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");

        return false;

      }


      I.setDesc(TII.get(TargetOpcode::COPY));

      return true;

    } else if (DstRB.getID() == AArch64::FPRRegBankID) {

      if (DstTy == LLT::fixed_vector(4, 16) &&

          SrcTy == LLT::fixed_vector(4, 32)) {

        I.setDesc(TII.get(AArch64::XTNv4i16));

        constrainSelectedInstRegOperands(I, TII, TRI, RBI);

        return true;

      }


      if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {

        MachineInstr *Extract = emitExtractVectorElt(

            DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);

        if (!Extract)

          return false;

        I.eraseFromParent();

        return true;

      }


      // We might have a vector G_PTRTOINT, in which case just emit a COPY.

      if (Opcode == TargetOpcode::G_PTRTOINT) {

        assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");

        I.setDesc(TII.get(TargetOpcode::COPY));

        return selectCopy(I, TII, MRI, TRI, RBI);

      }

    }


    return false;

  }


  case TargetOpcode::G_ANYEXT: {

    if (selectUSMovFromExtend(I, MRI))

      return true;


    const Register DstReg = I.getOperand(0).getReg();

    const Register SrcReg = I.getOperand(1).getReg();


    const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);

    if (RBDst.getID() != AArch64::GPRRegBankID) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst

                        << ", expected: GPR\n");

      return false;

    }


    const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);

    if (RBSrc.getID() != AArch64::GPRRegBankID) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc

                        << ", expected: GPR\n");

      return false;

    }


    const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();


    if (DstSize == 0) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");

      return false;

    }


    if (DstSize != 64 && DstSize > 32) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize

                        << ", expected: 32 or 64\n");

      return false;

    }

    // At this point G_ANYEXT is just like a plain COPY, but we need

    // to explicitly form the 64-bit value if any.

    if (DstSize > 32) {

      Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);

      BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))

          .addDef(ExtSrc)

          .addImm(0)

          .addUse(SrcReg)

          .addImm(AArch64::sub_32);

      I.getOperand(1).setReg(ExtSrc);

    }

    return selectCopy(I, TII, MRI, TRI, RBI);

  }


  case TargetOpcode::G_ZEXT:

  case TargetOpcode::G_SEXT_INREG:

  case TargetOpcode::G_SEXT: {

    if (selectUSMovFromExtend(I, MRI))

      return true;


    unsigned Opcode = I.getOpcode();

    const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;

    const Register DefReg = I.getOperand(0).getReg();

    Register SrcReg = I.getOperand(1).getReg();

    const LLT DstTy = MRI.getType(DefReg);

    const LLT SrcTy = MRI.getType(SrcReg);

    unsigned DstSize = DstTy.getSizeInBits();

    unsigned SrcSize = SrcTy.getSizeInBits();


    // SEXT_INREG has the same src reg size as dst, the size of the value to be

    // extended is encoded in the imm.

    if (Opcode == TargetOpcode::G_SEXT_INREG)

      SrcSize = I.getOperand(2).getImm();


    if (DstTy.isVector())

      return false; // Should be handled by imported patterns.


    assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==

               AArch64::GPRRegBankID &&

           "Unexpected ext regbank");


    MachineInstr *ExtI;


    // First check if we're extending the result of a load which has a dest type

    // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest

    // GPR register on AArch64 and all loads which are smaller automatically

    // zero-extend the upper bits. E.g.

    // %v(s8) = G_LOAD %p, :: (load 1)

    // %v2(s32) = G_ZEXT %v(s8)

    if (!IsSigned) {

      auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);

      bool IsGPR =

          RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;

      if (LoadMI && IsGPR) {

        const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();

        unsigned BytesLoaded = MemOp->getSize().getValue();

        if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)

          return selectCopy(I, TII, MRI, TRI, RBI);

      }


      // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)

      // + SUBREG_TO_REG.

      if (IsGPR && SrcSize == 32 && DstSize == 64) {

        Register SubregToRegSrc =

            MRI.createVirtualRegister(&AArch64::GPR32RegClass);

        const Register ZReg = AArch64::WZR;

        MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})

            .addImm(0);


        MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})

            .addImm(0)

            .addUse(SubregToRegSrc)

            .addImm(AArch64::sub_32);


        if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,

                                          MRI)) {

          LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");

          return false;

        }


        if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,

                                          MRI)) {

          LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");

          return false;

        }


        I.eraseFromParent();

        return true;

      }

    }


    if (DstSize == 64) {

      if (Opcode != TargetOpcode::G_SEXT_INREG) {

        // FIXME: Can we avoid manually doing this?

        if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,

                                          MRI)) {

          LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)

                            << " operand\n");

          return false;

        }

        SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,

                                {&AArch64::GPR64RegClass}, {})

                     .addImm(0)

                     .addUse(SrcReg)

                     .addImm(AArch64::sub_32)

                     .getReg(0);

      }


      ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,

                             {DefReg}, {SrcReg})

                  .addImm(0)

                  .addImm(SrcSize - 1);

    } else if (DstSize <= 32) {

      ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,

                             {DefReg}, {SrcReg})

                  .addImm(0)

                  .addImm(SrcSize - 1);

    } else {

      return false;

    }


    constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);

    I.eraseFromParent();

    return true;

  }


  case TargetOpcode::G_FREEZE:

    return selectCopy(I, TII, MRI, TRI, RBI);


  case TargetOpcode::G_INTTOPTR:

    // The importer is currently unable to import pointer types since they

    // didn't exist in SelectionDAG.

    return selectCopy(I, TII, MRI, TRI, RBI);


  case TargetOpcode::G_BITCAST:

    // Imported SelectionDAG rules can handle every bitcast except those that

    // bitcast from a type to the same type. Ideally, these shouldn't occur

    // but we might not run an optimizer that deletes them. The other exception

    // is bitcasts involving pointer types, as SelectionDAG has no knowledge

    // of them.

    return selectCopy(I, TII, MRI, TRI, RBI);


  case TargetOpcode::G_SELECT: {

    auto &Sel = cast<GSelect>(I);

    const Register CondReg = Sel.getCondReg();

    const Register TReg = Sel.getTrueReg();

    const Register FReg = Sel.getFalseReg();


    if (tryOptSelect(Sel))

      return true;


    // Make sure to use an unused vreg instead of wzr, so that the peephole

    // optimizations will be able to optimize these.

    Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

    auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})

                     .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));

    constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);

    if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))

      return false;

    Sel.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_ICMP: {

    if (Ty.isVector())

      return false;


    if (Ty != LLT::scalar(32)) {

      LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty

                        << ", expected: " << LLT::scalar(32) << '\n');

      return false;

    }


    auto &PredOp = I.getOperand(1);

    emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);

    auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());

    const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(

        CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);

    emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,

              /*Src2=*/AArch64::WZR, InvCC, MIB);

    I.eraseFromParent();

    return true;

  }


  case TargetOpcode::G_FCMP: {

    CmpInst::Predicate Pred =

        static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());

    if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,

                       Pred) ||

        !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))

      return false;

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_VASTART:

    return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)

                                : selectVaStartAAPCS(I, MF, MRI);

  case TargetOpcode::G_INTRINSIC:

    return selectIntrinsic(I, MRI);

  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:

    return selectIntrinsicWithSideEffects(I, MRI);

  case TargetOpcode::G_IMPLICIT_DEF: {

    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));

    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    const Register DstReg = I.getOperand(0).getReg();

    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

    const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);

    RBI.constrainGenericRegister(DstReg, *DstRC, MRI);

    return true;

  }

  case TargetOpcode::G_BLOCK_ADDR: {

    Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();

    if (std::optional<uint16_t> BADisc =

            STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {

      MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});

      MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

      MIB.buildInstr(AArch64::MOVaddrPAC)

          .addBlockAddress(I.getOperand(1).getBlockAddress())

          .addImm(AArch64PACKey::IA)

          .addReg(/*AddrDisc=*/AArch64::XZR)

          .addImm(*BADisc)

          .constrainAllUses(TII, TRI, RBI);

      MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));

      RBI.constrainGenericRegister(I.getOperand(0).getReg(),

                                   AArch64::GPR64RegClass, MRI);

      I.eraseFromParent();

      return true;

    }

    if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {

      materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);

      I.eraseFromParent();

      return true;

    } else {

      I.setDesc(TII.get(AArch64::MOVaddrBA));

      auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),

                           I.getOperand(0).getReg())

                       .addBlockAddress(I.getOperand(1).getBlockAddress(),

                                        /* Offset */ 0, AArch64II::MO_PAGE)

                       .addBlockAddress(

                           I.getOperand(1).getBlockAddress(), /* Offset */ 0,

                           AArch64II::MO_NC | AArch64II::MO_PAGEOFF);

      I.eraseFromParent();

      return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);

    }

  }

  case AArch64::G_DUP: {

    // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by

    // imported patterns. Do it manually here. Avoiding generating s16 gpr is

    // difficult because at RBS we may end up pessimizing the fpr case if we

    // decided to add an anyextend to fix this. Manual selection is the most

    // robust solution for now.

    if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=

        AArch64::GPRRegBankID)

      return false; // We expect the fpr regbank case to be imported.

    LLT VecTy = MRI.getType(I.getOperand(0).getReg());

    if (VecTy == LLT::fixed_vector(8, 8))

      I.setDesc(TII.get(AArch64::DUPv8i8gpr));

    else if (VecTy == LLT::fixed_vector(16, 8))

      I.setDesc(TII.get(AArch64::DUPv16i8gpr));

    else if (VecTy == LLT::fixed_vector(4, 16))

      I.setDesc(TII.get(AArch64::DUPv4i16gpr));

    else if (VecTy == LLT::fixed_vector(8, 16))

      I.setDesc(TII.get(AArch64::DUPv8i16gpr));

    else

      return false;

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }

  case TargetOpcode::G_BUILD_VECTOR:

    return selectBuildVector(I, MRI);

  case TargetOpcode::G_MERGE_VALUES:

    return selectMergeValues(I, MRI);

  case TargetOpcode::G_UNMERGE_VALUES:

    return selectUnmergeValues(I, MRI);

  case TargetOpcode::G_SHUFFLE_VECTOR:

    return selectShuffleVector(I, MRI);

  case TargetOpcode::G_EXTRACT_VECTOR_ELT:

    return selectExtractElt(I, MRI);

  case TargetOpcode::G_CONCAT_VECTORS:

    return selectConcatVectors(I, MRI);

  case TargetOpcode::G_JUMP_TABLE:

    return selectJumpTable(I, MRI);

  case TargetOpcode::G_MEMCPY:

  case TargetOpcode::G_MEMCPY_INLINE:

  case TargetOpcode::G_MEMMOVE:

  case TargetOpcode::G_MEMSET:

    assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");

    return selectMOPS(I, MRI);

  }


  return false;

}


bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {

  MachineIRBuilderState OldMIBState = MIB.getState();

  bool Success = select(I);

  MIB.setState(OldMIBState);

  return Success;

}


bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,

                                            MachineRegisterInfo &MRI) {

  unsigned Mopcode;

  switch (GI.getOpcode()) {

  case TargetOpcode::G_MEMCPY:

  case TargetOpcode::G_MEMCPY_INLINE:

    Mopcode = AArch64::MOPSMemoryCopyPseudo;

    break;

  case TargetOpcode::G_MEMMOVE:

    Mopcode = AArch64::MOPSMemoryMovePseudo;

    break;

  case TargetOpcode::G_MEMSET:

    // For tagged memset see llvm.aarch64.mops.memset.tag

    Mopcode = AArch64::MOPSMemorySetPseudo;

    break;

  }


  auto &DstPtr = GI.getOperand(0);

  auto &SrcOrVal = GI.getOperand(1);

  auto &Size = GI.getOperand(2);


  // Create copies of the registers that can be clobbered.

  const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());

  const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());

  const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());


  const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;

  const auto &SrcValRegClass =

      IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;


  // Constrain to specific registers

  RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);

  RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);

  RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);


  MIB.buildCopy(DstPtrCopy, DstPtr);

  MIB.buildCopy(SrcValCopy, SrcOrVal);

  MIB.buildCopy(SizeCopy, Size);


  // New instruction uses the copied registers because it must update them.

  // The defs are not used since they don't exist in G_MEM*. They are still

  // tied.

  // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE

  Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);

  Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

  if (IsSet) {

    MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},

                   {DstPtrCopy, SizeCopy, SrcValCopy});

  } else {

    Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);

    MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},

                   {DstPtrCopy, SrcValCopy, SizeCopy});

  }


  GI.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,

                                            MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");

  Register JTAddr = I.getOperand(0).getReg();

  unsigned JTI = I.getOperand(1).getIndex();

  Register Index = I.getOperand(2).getReg();


  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);


  // With aarch64-jump-table-hardening, we only expand the jump table dispatch

  // sequence later, to guarantee the integrity of the intermediate values.

  if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {

    CodeModel::Model CM = TM.getCodeModel();

    if (STI.isTargetMachO()) {

      if (CM != CodeModel::Small && CM != CodeModel::Large)

        report_fatal_error("Unsupported code-model for hardened jump-table");

    } else {

      // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.

      assert(STI.isTargetELF() &&

             "jump table hardening only supported on MachO/ELF");

      if (CM != CodeModel::Small)

        report_fatal_error("Unsupported code-model for hardened jump-table");

    }


    MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());

    MIB.buildInstr(AArch64::BR_JumpTable)

        .addJumpTableIndex(I.getOperand(1).getIndex());

    I.eraseFromParent();

    return true;

  }


  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);


  auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,

                                      {TargetReg, ScratchReg}, {JTAddr, Index})

                           .addJumpTableIndex(JTI);

  // Save the jump table info.

  MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},

                 {static_cast<int64_t>(JTI)});

  // Build the indirect branch.

  MIB.buildInstr(AArch64::BR, {}, {TargetReg});

  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);

}


bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,

                                                 MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");

  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");


  Register DstReg = I.getOperand(0).getReg();

  unsigned JTI = I.getOperand(1).getIndex();

  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.

  auto MovMI =

    MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})

          .addJumpTableIndex(JTI, AArch64II::MO_PAGE)

          .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);

  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);

}


bool AArch64InstructionSelector::selectTLSGlobalValue(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  if (!STI.isTargetMachO())

    return false;

  MachineFunction &MF = *I.getParent()->getParent();

  MF.getFrameInfo().setAdjustsStack(true);


  const auto &GlobalOp = I.getOperand(1);

  assert(GlobalOp.getOffset() == 0 &&

         "Shouldn't have an offset on TLS globals!");

  const GlobalValue &GV = *GlobalOp.getGlobal();


  auto LoadGOT =

      MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})

          .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);


  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},

                             {LoadGOT.getReg(0)})

                  .addImm(0);


  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));

  // TLS calls preserve all registers except those that absolutely must be

  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be

  // silly).

  unsigned Opcode = getBLRCallOpcode(MF);


  // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).

  if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {

    assert(Opcode == AArch64::BLR);

    Opcode = AArch64::BLRAAZ;

  }


  MIB.buildInstr(Opcode, {}, {Load})

      .addUse(AArch64::X0, RegState::Implicit)

      .addDef(AArch64::X0, RegState::Implicit)

      .addRegMask(TRI.getTLSCallPreservedMask());


  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));

  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,

                               MRI);

  I.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::emitScalarToVector(

    unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,

    MachineIRBuilder &MIRBuilder) const {

  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});


  auto BuildFn = [&](unsigned SubregIndex) {

    auto Ins =

        MIRBuilder

            .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})

            .addImm(SubregIndex);

    constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);

    constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);

    return &*Ins;

  };


  switch (EltSize) {

  case 8:

    return BuildFn(AArch64::bsub);

  case 16:

    return BuildFn(AArch64::hsub);

  case 32:

    return BuildFn(AArch64::ssub);

  case 64:

    return BuildFn(AArch64::dsub);

  default:

    return nullptr;

  }

}


MachineInstr *

AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,

                                             MachineIRBuilder &MIB,

                                             MachineRegisterInfo &MRI) const {

  LLT DstTy = MRI.getType(DstReg);

  const TargetRegisterClass *RC =

      getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));

  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {

    LLVM_DEBUG(dbgs() << "Unsupported register class!\n");

    return nullptr;

  }

  unsigned SubReg = 0;

  if (!getSubRegForClass(RC, TRI, SubReg))

    return nullptr;

  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {

    LLVM_DEBUG(dbgs() << "Unsupported destination size! ("

                      << DstTy.getSizeInBits() << "\n");

    return nullptr;

  }

  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})

                  .addReg(SrcReg, 0, SubReg);

  RBI.constrainGenericRegister(DstReg, *RC, MRI);

  return Copy;

}


bool AArch64InstructionSelector::selectMergeValues(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");

  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());

  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");

  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);


  if (I.getNumOperands() != 3)

    return false;


  // Merging 2 s64s into an s128.

  if (DstTy == LLT::scalar(128)) {

    if (SrcTy.getSizeInBits() != 64)

      return false;

    Register DstReg = I.getOperand(0).getReg();

    Register Src1Reg = I.getOperand(1).getReg();

    Register Src2Reg = I.getOperand(2).getReg();

    auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});

    MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,

                                         /* LaneIdx */ 0, RB, MIB);

    if (!InsMI)

      return false;

    MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),

                                          Src2Reg, /* LaneIdx */ 1, RB, MIB);

    if (!Ins2MI)

      return false;

    constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);

    constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);

    I.eraseFromParent();

    return true;

  }


  if (RB.getID() != AArch64::GPRRegBankID)

    return false;


  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)

    return false;


  auto *DstRC = &AArch64::GPR64RegClass;

  Register SubToRegDef = MRI.createVirtualRegister(DstRC);

  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),

                                    TII.get(TargetOpcode::SUBREG_TO_REG))

                                .addDef(SubToRegDef)

                                .addImm(0)

                                .addUse(I.getOperand(1).getReg())

                                .addImm(AArch64::sub_32);

  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);

  // Need to anyext the second scalar before we can use bfm

  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),

                                    TII.get(TargetOpcode::SUBREG_TO_REG))

                                .addDef(SubToRegDef2)

                                .addImm(0)

                                .addUse(I.getOperand(2).getReg())

                                .addImm(AArch64::sub_32);

  MachineInstr &BFM =

      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))

           .addDef(I.getOperand(0).getReg())

           .addUse(SubToRegDef)

           .addUse(SubToRegDef2)

           .addImm(32)

           .addImm(31);

  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);

  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);

  constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,

                              const unsigned EltSize) {

  // Choose a lane copy opcode and subregister based off of the size of the

  // vector's elements.

  switch (EltSize) {

  case 8:

    CopyOpc = AArch64::DUPi8;

    ExtractSubReg = AArch64::bsub;

    break;

  case 16:

    CopyOpc = AArch64::DUPi16;

    ExtractSubReg = AArch64::hsub;

    break;

  case 32:

    CopyOpc = AArch64::DUPi32;

    ExtractSubReg = AArch64::ssub;

    break;

  case 64:

    CopyOpc = AArch64::DUPi64;

    ExtractSubReg = AArch64::dsub;

    break;

  default:

    // Unknown size, bail out.

    LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");

    return false;

  }

  return true;

}


MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(

    std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,

    Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  unsigned CopyOpc = 0;

  unsigned ExtractSubReg = 0;

  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {

    LLVM_DEBUG(

        dbgs() << "Couldn't determine lane copy opcode for instruction.\n");

    return nullptr;

  }


  const TargetRegisterClass *DstRC =

      getRegClassForTypeOnBank(ScalarTy, DstRB, true);

  if (!DstRC) {

    LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");

    return nullptr;

  }


  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);

  const LLT &VecTy = MRI.getType(VecReg);

  const TargetRegisterClass *VecRC =

      getRegClassForTypeOnBank(VecTy, VecRB, true);

  if (!VecRC) {

    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");

    return nullptr;

  }


  // The register that we're going to copy into.

  Register InsertReg = VecReg;

  if (!DstReg)

    DstReg = MRI.createVirtualRegister(DstRC);

  // If the lane index is 0, we just use a subregister COPY.

  if (LaneIdx == 0) {

    auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})

                    .addReg(VecReg, 0, ExtractSubReg);

    RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);

    return &*Copy;

  }


  // Lane copies require 128-bit wide registers. If we're dealing with an

  // unpacked vector, then we need to move up to that width. Insert an implicit

  // def and a subregister insert to get us there.

  if (VecTy.getSizeInBits() != 128) {

    MachineInstr *ScalarToVector = emitScalarToVector(

        VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);

    if (!ScalarToVector)

      return nullptr;

    InsertReg = ScalarToVector->getOperand(0).getReg();

  }


  MachineInstr *LaneCopyMI =

      MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);

  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);


  // Make sure that we actually constrain the initial copy.

  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);

  return LaneCopyMI;

}


bool AArch64InstructionSelector::selectExtractElt(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&

         "unexpected opcode!");

  Register DstReg = I.getOperand(0).getReg();

  const LLT NarrowTy = MRI.getType(DstReg);

  const Register SrcReg = I.getOperand(1).getReg();

  const LLT WideTy = MRI.getType(SrcReg);

  (void)WideTy;

  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&

         "source register size too small!");

  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");


  // Need the lane index to determine the correct copy opcode.

  MachineOperand &LaneIdxOp = I.getOperand(2);

  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");


  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {

    LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");

    return false;

  }


  // Find the index to extract from.

  auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);

  if (!VRegAndVal)

    return false;

  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();


  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,

                                               LaneIdx, MIB);

  if (!Extract)

    return false;


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectSplitVectorUnmerge(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  unsigned NumElts = I.getNumOperands() - 1;

  Register SrcReg = I.getOperand(NumElts).getReg();

  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());

  const LLT SrcTy = MRI.getType(SrcReg);


  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");

  if (SrcTy.getSizeInBits() > 128) {

    LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");

    return false;

  }


  // We implement a split vector operation by treating the sub-vectors as

  // scalars and extracting them.

  const RegisterBank &DstRB =

      *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);

  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {

    Register Dst = I.getOperand(OpIdx).getReg();

    MachineInstr *Extract =

        emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);

    if (!Extract)

      return false;

  }

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,

                                                     MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&

         "unexpected opcode");


  // TODO: Handle unmerging into GPRs and from scalars to scalars.

  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=

          AArch64::FPRRegBankID ||

      RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=

          AArch64::FPRRegBankID) {

    LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "

                         "currently unsupported.\n");

    return false;

  }


  // The last operand is the vector source register, and every other operand is

  // a register to unpack into.

  unsigned NumElts = I.getNumOperands() - 1;

  Register SrcReg = I.getOperand(NumElts).getReg();

  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());

  const LLT WideTy = MRI.getType(SrcReg);

  (void)WideTy;

  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&

         "can only unmerge from vector or s128 types!");

  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&

         "source register size too small!");


  if (!NarrowTy.isScalar())

    return selectSplitVectorUnmerge(I, MRI);


  // Choose a lane copy opcode and subregister based off of the size of the

  // vector's elements.

  unsigned CopyOpc = 0;

  unsigned ExtractSubReg = 0;

  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))

    return false;


  // Set up for the lane copies.

  MachineBasicBlock &MBB = *I.getParent();


  // Stores the registers we'll be copying from.

  SmallVector<Register, 4> InsertRegs;


  // We'll use the first register twice, so we only need NumElts-1 registers.

  unsigned NumInsertRegs = NumElts - 1;


  // If our elements fit into exactly 128 bits, then we can copy from the source

  // directly. Otherwise, we need to do a bit of setup with some subregister

  // inserts.

  if (NarrowTy.getSizeInBits() * NumElts == 128) {

    InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);

  } else {

    // No. We have to perform subregister inserts. For each insert, create an

    // implicit def and a subregister insert, and save the register we create.

    const TargetRegisterClass *RC = getRegClassForTypeOnBank(

        LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),

        *RBI.getRegBank(SrcReg, MRI, TRI));

    unsigned SubReg = 0;

    bool Found = getSubRegForClass(RC, TRI, SubReg);

    (void)Found;

    assert(Found && "expected to find last operand's subeg idx");

    for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {

      Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);

      MachineInstr &ImpDefMI =

          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),

                   ImpDefReg);


      // Now, create the subregister insert from SrcReg.

      Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);

      MachineInstr &InsMI =

          *BuildMI(MBB, I, I.getDebugLoc(),

                   TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)

               .addUse(ImpDefReg)

               .addUse(SrcReg)

               .addImm(SubReg);


      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);

      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);


      // Save the register so that we can copy from it after.

      InsertRegs.push_back(InsertReg);

    }

  }


  // Now that we've created any necessary subregister inserts, we can

  // create the copies.

  //

  // Perform the first copy separately as a subregister copy.

  Register CopyTo = I.getOperand(0).getReg();

  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})

                       .addReg(InsertRegs[0], 0, ExtractSubReg);

  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);


  // Now, perform the remaining copies as vector lane copies.

  unsigned LaneIdx = 1;

  for (Register InsReg : InsertRegs) {

    Register CopyTo = I.getOperand(LaneIdx).getReg();

    MachineInstr &CopyInst =

        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)

             .addUse(InsReg)

             .addImm(LaneIdx);

    constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);

    ++LaneIdx;

  }


  // Separately constrain the first copy's destination. Because of the

  // limitation in constrainOperandRegClass, we can't guarantee that this will

  // actually be constrained. So, do it ourselves using the second operand.

  const TargetRegisterClass *RC =

      MRI.getRegClassOrNull(I.getOperand(1).getReg());

  if (!RC) {

    LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");

    return false;

  }


  RBI.constrainGenericRegister(CopyTo, *RC, MRI);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectConcatVectors(

    MachineInstr &I, MachineRegisterInfo &MRI)  {

  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&

         "Unexpected opcode");

  Register Dst = I.getOperand(0).getReg();

  Register Op1 = I.getOperand(1).getReg();

  Register Op2 = I.getOperand(2).getReg();

  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);

  if (!ConcatMI)

    return false;

  I.eraseFromParent();

  return true;

}


unsigned

AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,

                                                  MachineFunction &MF) const {

  Type *CPTy = CPVal->getType();

  Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);


  MachineConstantPool *MCP = MF.getConstantPool();

  return MCP->getConstantPoolIndex(CPVal, Alignment);

}


MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(

    const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {

  const TargetRegisterClass *RC;

  unsigned Opc;

  bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;

  unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());

  switch (Size) {

  case 16:

    RC = &AArch64::FPR128RegClass;

    Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;

    break;

  case 8:

    RC = &AArch64::FPR64RegClass;

    Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;

    break;

  case 4:

    RC = &AArch64::FPR32RegClass;

    Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;

    break;

  case 2:

    RC = &AArch64::FPR16RegClass;

    Opc = AArch64::LDRHui;

    break;

  default:

    LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "

                      << *CPVal->getType());

    return nullptr;

  }


  MachineInstr *LoadMI = nullptr;

  auto &MF = MIRBuilder.getMF();

  unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);

  if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {

    // Use load(literal) for tiny code model.

    LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);

  } else {

    auto Adrp =

        MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})

            .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);


    LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})

                   .addConstantPoolIndex(

                       CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);


    constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);

  }


  MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);

  LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,

                                                    MachineMemOperand::MOLoad,

                                                    Size, Align(Size)));

  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);

  return LoadMI;

}


/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given

/// size and RB.

static std::pair<unsigned, unsigned>

getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {

  unsigned Opc, SubregIdx;

  if (RB.getID() == AArch64::GPRRegBankID) {

    if (EltSize == 8) {

      Opc = AArch64::INSvi8gpr;

      SubregIdx = AArch64::bsub;

    } else if (EltSize == 16) {

      Opc = AArch64::INSvi16gpr;

      SubregIdx = AArch64::ssub;

    } else if (EltSize == 32) {

      Opc = AArch64::INSvi32gpr;

      SubregIdx = AArch64::ssub;

    } else if (EltSize == 64) {

      Opc = AArch64::INSvi64gpr;

      SubregIdx = AArch64::dsub;

    } else {

      llvm_unreachable("invalid elt size!");

    }

  } else {

    if (EltSize == 8) {

      Opc = AArch64::INSvi8lane;

      SubregIdx = AArch64::bsub;

    } else if (EltSize == 16) {

      Opc = AArch64::INSvi16lane;

      SubregIdx = AArch64::hsub;

    } else if (EltSize == 32) {

      Opc = AArch64::INSvi32lane;

      SubregIdx = AArch64::ssub;

    } else if (EltSize == 64) {

      Opc = AArch64::INSvi64lane;

      SubregIdx = AArch64::dsub;

    } else {

      llvm_unreachable("invalid elt size!");

    }

  }

  return std::make_pair(Opc, SubregIdx);

}


MachineInstr *AArch64InstructionSelector::emitInstr(

    unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,

    std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,

    const ComplexRendererFns &RenderFns) const {

  assert(Opcode && "Expected an opcode?");

  assert(!isPreISelGenericOpcode(Opcode) &&

         "Function should only be used to produce selected instructions!");

  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);

  if (RenderFns)

    for (auto &Fn : *RenderFns)

      Fn(MI);

  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);

  return &*MI;

}


MachineInstr *AArch64InstructionSelector::emitAddSub(

    const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,

    Register Dst, MachineOperand &LHS, MachineOperand &RHS,

    MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  auto Ty = MRI.getType(LHS.getReg());

  assert(!Ty.isVector() && "Expected a scalar or pointer?");

  unsigned Size = Ty.getSizeInBits();

  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");

  bool Is32Bit = Size == 32;


  // INSTRri form with positive arithmetic immediate.

  if (auto Fns = selectArithImmed(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);


  // INSTRri form with negative arithmetic immediate.

  if (auto Fns = selectNegArithImmed(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);


  // INSTRrx form.

  if (auto Fns = selectArithExtendedRegister(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);


  // INSTRrs form.

  if (auto Fns = selectShiftedRegister(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);

  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},

                   MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,

                                    MachineOperand &RHS,

                                    MachineIRBuilder &MIRBuilder) const {

  const std::array<std::array<unsigned, 2>, 5> OpcTable{

      {{AArch64::ADDXri, AArch64::ADDWri},

       {AArch64::ADDXrs, AArch64::ADDWrs},

       {AArch64::ADDXrr, AArch64::ADDWrr},

       {AArch64::SUBXri, AArch64::SUBWri},

       {AArch64::ADDXrx, AArch64::ADDWrx}}};

  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  const std::array<std::array<unsigned, 2>, 5> OpcTable{

      {{AArch64::ADDSXri, AArch64::ADDSWri},

       {AArch64::ADDSXrs, AArch64::ADDSWrs},

       {AArch64::ADDSXrr, AArch64::ADDSWrr},

       {AArch64::SUBSXri, AArch64::SUBSWri},

       {AArch64::ADDSXrx, AArch64::ADDSWrx}}};

  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  const std::array<std::array<unsigned, 2>, 5> OpcTable{

      {{AArch64::SUBSXri, AArch64::SUBSWri},

       {AArch64::SUBSXrs, AArch64::SUBSWrs},

       {AArch64::SUBSXrr, AArch64::SUBSWrr},

       {AArch64::ADDSXri, AArch64::ADDSWri},

       {AArch64::SUBSXrx, AArch64::SUBSWrx}}};

  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  MachineRegisterInfo *MRI = MIRBuilder.getMRI();

  bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);

  static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};

  return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  MachineRegisterInfo *MRI = MIRBuilder.getMRI();

  bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);

  static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};

  return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,

                                    MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);

  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;

  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,

                                    MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  LLT Ty = MRI.getType(LHS.getReg());

  unsigned RegSize = Ty.getSizeInBits();

  bool Is32Bit = (RegSize == 32);

  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},

                                   {AArch64::ANDSXrs, AArch64::ANDSWrs},

                                   {AArch64::ANDSXrr, AArch64::ANDSWrr}};

  // ANDS needs a logical immediate for its immediate form. Check if we can

  // fold one in.

  if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {

    int64_t Imm = ValAndVReg->Value.getSExtValue();


    if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {

      auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});

      TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));

      constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);

      return &*TstMI;

    }

  }


  if (auto Fns = selectLogicalShiftedRegister(RHS))

    return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);

  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);

}


MachineInstr *AArch64InstructionSelector::emitIntegerCompare(

    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,

    MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");

  assert(Predicate.isPredicate() && "Expected predicate?");

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  LLT CmpTy = MRI.getType(LHS.getReg());

  assert(!CmpTy.isVector() && "Expected scalar or pointer");

  unsigned Size = CmpTy.getSizeInBits();

  (void)Size;

  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");

  // Fold the compare into a cmn or tst if possible.

  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))

    return FoldCmp;

  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());

  return emitSUBS(Dst, LHS, RHS, MIRBuilder);

}


MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(

    Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

#ifndef NDEBUG

  LLT Ty = MRI.getType(Dst);

  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&

         "Expected a 32-bit scalar register?");

#endif

  const Register ZReg = AArch64::WZR;

  AArch64CC::CondCode CC1, CC2;

  changeFCMPPredToAArch64CC(Pred, CC1, CC2);

  auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);

  if (CC2 == AArch64CC::AL)

    return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,

                     MIRBuilder);

  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;

  Register Def1Reg = MRI.createVirtualRegister(RC);

  Register Def2Reg = MRI.createVirtualRegister(RC);

  auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);

  emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);

  emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);

  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});

  constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);

  return &*OrMI;

}


MachineInstr *AArch64InstructionSelector::emitFPCompare(

    Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,

    std::optional<CmpInst::Predicate> Pred) const {

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  LLT Ty = MRI.getType(LHS);

  if (Ty.isVector())

    return nullptr;

  unsigned OpSize = Ty.getSizeInBits();

  assert(OpSize == 16 || OpSize == 32 || OpSize == 64);


  // If this is a compare against +0.0, then we don't have

  // to explicitly materialize a constant.

  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);

  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());


  auto IsEqualityPred = [](CmpInst::Predicate P) {

    return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||

           P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;

  };

  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {

    // Try commutating the operands.

    const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);

    if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {

      ShouldUseImm = true;

      std::swap(LHS, RHS);

    }

  }

  unsigned CmpOpcTbl[2][3] = {

      {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},

      {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};

  unsigned CmpOpc =

      CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];


  // Partially build the compare. Decide if we need to add a use for the

  // third operand based off whether or not we're comparing against 0.0.

  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);

  CmpMI.setMIFlags(MachineInstr::NoFPExcept);

  if (!ShouldUseImm)

    CmpMI.addUse(RHS);

  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);

  return &*CmpMI;

}


MachineInstr *AArch64InstructionSelector::emitVectorConcat(

    std::optional<Register> Dst, Register Op1, Register Op2,

    MachineIRBuilder &MIRBuilder) const {

  // We implement a vector concat by:

  // 1. Use scalar_to_vector to insert the lower vector into the larger dest

  // 2. Insert the upper vector into the destination's upper element

  // TODO: some of this code is common with G_BUILD_VECTOR handling.

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();


  const LLT Op1Ty = MRI.getType(Op1);

  const LLT Op2Ty = MRI.getType(Op2);


  if (Op1Ty != Op2Ty) {

    LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");

    return nullptr;

  }

  assert(Op1Ty.isVector() && "Expected a vector for vector concat");


  if (Op1Ty.getSizeInBits() >= 128) {

    LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");

    return nullptr;

  }


  // At the moment we just support 64 bit vector concats.

  if (Op1Ty.getSizeInBits() != 64) {

    LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");

    return nullptr;

  }


  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());

  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);

  const TargetRegisterClass *DstRC =

      getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);


  MachineInstr *WidenedOp1 =

      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);

  MachineInstr *WidenedOp2 =

      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);

  if (!WidenedOp1 || !WidenedOp2) {

    LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");

    return nullptr;

  }


  // Now do the insert of the upper element.

  unsigned InsertOpc, InsSubRegIdx;

  std::tie(InsertOpc, InsSubRegIdx) =

      getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());


  if (!Dst)

    Dst = MRI.createVirtualRegister(DstRC);

  auto InsElt =

      MIRBuilder

          .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})

          .addImm(1) /* Lane index */

          .addUse(WidenedOp2->getOperand(0).getReg())

          .addImm(0);

  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);

  return &*InsElt;

}


MachineInstr *

AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,

                                      Register Src2, AArch64CC::CondCode Pred,

                                      MachineIRBuilder &MIRBuilder) const {

  auto &MRI = *MIRBuilder.getMRI();

  const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);

  // If we used a register class, then this won't necessarily have an LLT.

  // Compute the size based off whether or not we have a class or bank.

  unsigned Size;

  if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))

    Size = TRI.getRegSizeInBits(*RC);

  else

    Size = MRI.getType(Dst).getSizeInBits();

  // Some opcodes use s1.

  assert(Size <= 64 && "Expected 64 bits or less only!");

  static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};

  unsigned Opc = OpcTable[Size == 64];

  auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);

  constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);

  return &*CSINC;

}


MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,

                                                      Register CarryReg) {

  MachineRegisterInfo *MRI = MIB.getMRI();

  unsigned Opcode = I.getOpcode();


  // If the instruction is a SUB, we need to negate the carry,

  // because borrowing is indicated by carry-flag == 0.

  bool NeedsNegatedCarry =

      (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);


  // If the previous instruction will already produce the correct carry, do not

  // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences

  // generated during legalization of wide add/sub. This optimization depends on

  // these sequences not being interrupted by other instructions.

  // We have to select the previous instruction before the carry-using

  // instruction is deleted by the calling function, otherwise the previous

  // instruction might become dead and would get deleted.

  MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);

  if (SrcMI == I.getPrevNode()) {

    if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {

      bool ProducesNegatedCarry = CarrySrcMI->isSub();

      if (NeedsNegatedCarry == ProducesNegatedCarry &&

          CarrySrcMI->isUnsigned() &&

          CarrySrcMI->getCarryOutReg() == CarryReg &&

          selectAndRestoreState(*SrcMI))

        return nullptr;

    }

  }


  Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);


  if (NeedsNegatedCarry) {

    // (0 - Carry) sets !C in NZCV when Carry == 1

    Register ZReg = AArch64::WZR;

    return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);

  }


  // (Carry - 1) sets !C in NZCV when Carry == 0

  auto Fns = select12BitValueWithLeftShift(1);

  return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);

}


bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,

                                                  MachineRegisterInfo &MRI) {

  auto &CarryMI = cast<GAddSubCarryOut>(I);


  if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {

    // Set NZCV carry according to carry-in VReg

    emitCarryIn(I, CarryInMI->getCarryInReg());

  }


  // Emit the operation and get the correct condition code.

  auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),

                                CarryMI.getLHS(), CarryMI.getRHS(), MIB);


  Register CarryOutReg = CarryMI.getCarryOutReg();


  // Don't convert carry-out to VReg if it is never used

  if (!MRI.use_nodbg_empty(CarryOutReg)) {

    // Now, put the overflow result in the register given by the first operand

    // to the overflow op. CSINC increments the result when the predicate is

    // false, so to get the increment when it's true, we need to use the

    // inverse. In this case, we want to increment when carry is set.

    Register ZReg = AArch64::WZR;

    emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,

              getInvertedCondCode(OpAndCC.second), MIB);

  }


  I.eraseFromParent();

  return true;

}


std::pair<MachineInstr *, AArch64CC::CondCode>

AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,

                                           MachineOperand &LHS,

                                           MachineOperand &RHS,

                                           MachineIRBuilder &MIRBuilder) const {

  switch (Opcode) {

  default:

    llvm_unreachable("Unexpected opcode!");

  case TargetOpcode::G_SADDO:

    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_UADDO:

    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);

  case TargetOpcode::G_SSUBO:

    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_USUBO:

    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);

  case TargetOpcode::G_SADDE:

    return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_UADDE:

    return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);

  case TargetOpcode::G_SSUBE:

    return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_USUBE:

    return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);

  }

}


/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be

/// expressed as a conjunction.

/// \param CanNegate    Set to true if we can negate the whole sub-tree just by

///                     changing the conditions on the CMP tests.

///                     (this means we can call emitConjunctionRec() with

///                      Negate==true on this sub-tree)

/// \param MustBeFirst  Set to true if this subtree needs to be negated and we

///                     cannot do the negation naturally. We are required to

///                     emit the subtree first in this case.

/// \param WillNegate   Is true if are called when the result of this

///                     subexpression must be negated. This happens when the

///                     outer expression is an OR. We can use this fact to know

///                     that we have a double negation (or (or ...) ...) that

///                     can be implemented for free.

static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,

                               bool WillNegate, MachineRegisterInfo &MRI,

                               unsigned Depth = 0) {

  if (!MRI.hasOneNonDBGUse(Val))

    return false;

  MachineInstr *ValDef = MRI.getVRegDef(Val);

  unsigned Opcode = ValDef->getOpcode();

  if (isa<GAnyCmp>(ValDef)) {

    CanNegate = true;

    MustBeFirst = false;

    return true;

  }

  // Protect against exponential runtime and stack overflow.

  if (Depth > 6)

    return false;

  if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {

    bool IsOR = Opcode == TargetOpcode::G_OR;

    Register O0 = ValDef->getOperand(1).getReg();

    Register O1 = ValDef->getOperand(2).getReg();

    bool CanNegateL;

    bool MustBeFirstL;

    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))

      return false;

    bool CanNegateR;

    bool MustBeFirstR;

    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))

      return false;


    if (MustBeFirstL && MustBeFirstR)

      return false;


    if (IsOR) {

      // For an OR expression we need to be able to naturally negate at least

      // one side or we cannot do the transformation at all.

      if (!CanNegateL && !CanNegateR)

        return false;

      // If we the result of the OR will be negated and we can naturally negate

      // the leaves, then this sub-tree as a whole negates naturally.

      CanNegate = WillNegate && CanNegateL && CanNegateR;

      // If we cannot naturally negate the whole sub-tree, then this must be

      // emitted first.

      MustBeFirst = !CanNegate;

    } else {

      assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");

      // We cannot naturally negate an AND operation.

      CanNegate = false;

      MustBeFirst = MustBeFirstL || MustBeFirstR;

    }

    return true;

  }

  return false;

}


MachineInstr *AArch64InstructionSelector::emitConditionalComparison(

    Register LHS, Register RHS, CmpInst::Predicate CC,

    AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,

    MachineIRBuilder &MIB) const {

  auto &MRI = *MIB.getMRI();

  LLT OpTy = MRI.getType(LHS);

  unsigned CCmpOpc;

  std::optional<ValueAndVReg> C;

  if (CmpInst::isIntPredicate(CC)) {

    assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);

    C = getIConstantVRegValWithLookThrough(RHS, MRI);

    if (!C || C->Value.sgt(31) || C->Value.slt(-31))

      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;

    else if (C->Value.ule(31))

      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;

    else

      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;

  } else {

    assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||

           OpTy.getSizeInBits() == 64);

    switch (OpTy.getSizeInBits()) {

    case 16:

      assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");

      CCmpOpc = AArch64::FCCMPHrr;

      break;

    case 32:

      CCmpOpc = AArch64::FCCMPSrr;

      break;

    case 64:

      CCmpOpc = AArch64::FCCMPDrr;

      break;

    default:

      return nullptr;

    }

  }

  AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);

  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);

  auto CCmp =

      MIB.buildInstr(CCmpOpc, {}, {LHS});

  if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)

    CCmp.addImm(C->Value.getZExtValue());

  else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)

    CCmp.addImm(C->Value.abs().getZExtValue());

  else

    CCmp.addReg(RHS);

  CCmp.addImm(NZCV).addImm(Predicate);

  constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);

  return &*CCmp;

}


MachineInstr *AArch64InstructionSelector::emitConjunctionRec(

    Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,

    AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {

  // We're at a tree leaf, produce a conditional comparison operation.

  auto &MRI = *MIB.getMRI();

  MachineInstr *ValDef = MRI.getVRegDef(Val);

  unsigned Opcode = ValDef->getOpcode();

  if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {

    Register LHS = Cmp->getLHSReg();

    Register RHS = Cmp->getRHSReg();

    CmpInst::Predicate CC = Cmp->getCond();

    if (Negate)

      CC = CmpInst::getInversePredicate(CC);

    if (isa<GICmp>(Cmp)) {

      OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());

    } else {

      // Handle special FP cases.

      AArch64CC::CondCode ExtraCC;

      changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);

      // Some floating point conditions can't be tested with a single condition

      // code. Construct an additional comparison in this case.

      if (ExtraCC != AArch64CC::AL) {

        MachineInstr *ExtraCmp;

        if (!CCOp)

          ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);

        else

          ExtraCmp =

              emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);

        CCOp = ExtraCmp->getOperand(0).getReg();

        Predicate = ExtraCC;

      }

    }


    // Produce a normal comparison if we are first in the chain

    if (!CCOp) {

      auto Dst = MRI.cloneVirtualRegister(LHS);

      if (isa<GICmp>(Cmp))

        return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);

      return emitFPCompare(Cmp->getOperand(2).getReg(),

                           Cmp->getOperand(3).getReg(), MIB);

    }

    // Otherwise produce a ccmp.

    return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);

  }

  assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");


  bool IsOR = Opcode == TargetOpcode::G_OR;


  Register LHS = ValDef->getOperand(1).getReg();

  bool CanNegateL;

  bool MustBeFirstL;

  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);

  assert(ValidL && "Valid conjunction/disjunction tree");

  (void)ValidL;


  Register RHS = ValDef->getOperand(2).getReg();

  bool CanNegateR;

  bool MustBeFirstR;

  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);

  assert(ValidR && "Valid conjunction/disjunction tree");

  (void)ValidR;


  // Swap sub-tree that must come first to the right side.

  if (MustBeFirstL) {

    assert(!MustBeFirstR && "Valid conjunction/disjunction tree");

    std::swap(LHS, RHS);

    std::swap(CanNegateL, CanNegateR);

    std::swap(MustBeFirstL, MustBeFirstR);

  }


  bool NegateR;

  bool NegateAfterR;

  bool NegateL;

  bool NegateAfterAll;

  if (Opcode == TargetOpcode::G_OR) {

    // Swap the sub-tree that we can negate naturally to the left.

    if (!CanNegateL) {

      assert(CanNegateR && "at least one side must be negatable");

      assert(!MustBeFirstR && "invalid conjunction/disjunction tree");

      assert(!Negate);

      std::swap(LHS, RHS);

      NegateR = false;

      NegateAfterR = true;

    } else {

      // Negate the left sub-tree if possible, otherwise negate the result.

      NegateR = CanNegateR;

      NegateAfterR = !CanNegateR;

    }

    NegateL = true;

    NegateAfterAll = !Negate;

  } else {

    assert(Opcode == TargetOpcode::G_AND &&

           "Valid conjunction/disjunction tree");

    assert(!Negate && "Valid conjunction/disjunction tree");


    NegateL = false;

    NegateR = false;

    NegateAfterR = false;

    NegateAfterAll = false;

  }


  // Emit sub-trees.

  AArch64CC::CondCode RHSCC;

  MachineInstr *CmpR =

      emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);

  if (NegateAfterR)

    RHSCC = AArch64CC::getInvertedCondCode(RHSCC);

  MachineInstr *CmpL = emitConjunctionRec(

      LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);

  if (NegateAfterAll)

    OutCC = AArch64CC::getInvertedCondCode(OutCC);

  return CmpL;

}


MachineInstr *AArch64InstructionSelector::emitConjunction(

    Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {

  bool DummyCanNegate;

  bool DummyMustBeFirst;

  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,

                          *MIB.getMRI()))

    return nullptr;

  return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);

}


bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,

                                                         MachineInstr &CondMI) {

  AArch64CC::CondCode AArch64CC;

  MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);

  if (!ConjMI)

    return false;


  emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);

  SelI.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {

  MachineRegisterInfo &MRI = *MIB.getMRI();

  // We want to recognize this pattern:

  //

  // $z = G_FCMP pred, $x, $y

  // ...

  // $w = G_SELECT $z, $a, $b

  //

  // Where the value of $z is *only* ever used by the G_SELECT (possibly with

  // some copies/truncs in between.)

  //

  // If we see this, then we can emit something like this:

  //

  // fcmp $x, $y

  // fcsel $w, $a, $b, pred

  //

  // Rather than emitting both of the rather long sequences in the standard

  // G_FCMP/G_SELECT select methods.


  // First, check if the condition is defined by a compare.

  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());


  // We can only fold if all of the defs have one use.

  Register CondDefReg = CondDef->getOperand(0).getReg();

  if (!MRI.hasOneNonDBGUse(CondDefReg)) {

    // Unless it's another select.

    for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {

      if (CondDef == &UI)

        continue;

      if (UI.getOpcode() != TargetOpcode::G_SELECT)

        return false;

    }

  }


  // Is the condition defined by a compare?

  unsigned CondOpc = CondDef->getOpcode();

  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {

    if (tryOptSelectConjunction(I, *CondDef))

      return true;

    return false;

  }


  AArch64CC::CondCode CondCode;

  if (CondOpc == TargetOpcode::G_ICMP) {

    auto &PredOp = CondDef->getOperand(1);

    emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,

                       MIB);

    auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());

    CondCode =

        changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);

  } else {

    // Get the condition code for the select.

    auto Pred =

        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());

    AArch64CC::CondCode CondCode2;

    changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);


    // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two

    // instructions to emit the comparison.

    // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be

    // unnecessary.

    if (CondCode2 != AArch64CC::AL)

      return false;


    if (!emitFPCompare(CondDef->getOperand(2).getReg(),

                       CondDef->getOperand(3).getReg(), MIB)) {

      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");

      return false;

    }

  }


  // Emit the select.

  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),

             I.getOperand(3).getReg(), CondCode, MIB);

  I.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(

    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,

    MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&

         "Unexpected MachineOperand");

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  // We want to find this sort of thing:

  // x = G_SUB 0, y

  // G_ICMP z, x

  //

  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.

  // e.g:

  //

  // cmn z, y


  // Check if the RHS or LHS of the G_ICMP is defined by a SUB

  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);

  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);

  auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());


  // Given this:

  //

  // x = G_SUB 0, y

  // G_ICMP z, x

  //

  // Produce this:

  //

  // cmn z, y

  if (isCMN(RHSDef, P, MRI))

    return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);


  // Same idea here, but with the LHS of the compare instead:

  //

  // Given this:

  //

  // x = G_SUB 0, y

  // G_ICMP x, z

  //

  // Produce this:

  //

  // cmn y, z

  //

  // But be careful! We need to swap the predicate!

  if (isCMN(LHSDef, P, MRI)) {

    if (!CmpInst::isEquality(P)) {

      P = CmpInst::getSwappedPredicate(P);

      Predicate = MachineOperand::CreatePredicate(P);

    }

    return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);

  }


  // Given this:

  //

  // z = G_AND x, y

  // G_ICMP z, 0

  //

  // Produce this if the compare is signed:

  //

  // tst x, y

  if (!CmpInst::isUnsigned(P) && LHSDef &&

      LHSDef->getOpcode() == TargetOpcode::G_AND) {

    // Make sure that the RHS is 0.

    auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);

    if (!ValAndVReg || ValAndVReg->Value != 0)

      return nullptr;


    return emitTST(LHSDef->getOperand(1),

                   LHSDef->getOperand(2), MIRBuilder);

  }


  return nullptr;

}


bool AArch64InstructionSelector::selectShuffleVector(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  Register Src1Reg = I.getOperand(1).getReg();

  const LLT Src1Ty = MRI.getType(Src1Reg);

  Register Src2Reg = I.getOperand(2).getReg();

  const LLT Src2Ty = MRI.getType(Src2Reg);

  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();


  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  LLVMContext &Ctx = MF.getFunction().getContext();


  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if

  // it's originated from a <1 x T> type. Those should have been lowered into

  // G_BUILD_VECTOR earlier.

  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {

    LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");

    return false;

  }


  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;


  SmallVector<Constant *, 64> CstIdxs;

  for (int Val : Mask) {

    // For now, any undef indexes we'll just assume to be 0. This should be

    // optimized in future, e.g. to select DUP etc.

    Val = Val < 0 ? 0 : Val;

    for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {

      unsigned Offset = Byte + Val * BytesPerElt;

      CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));

    }

  }


  // Use a constant pool to load the index vector for TBL.

  Constant *CPVal = ConstantVector::get(CstIdxs);

  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);

  if (!IndexLoad) {

    LLVM_DEBUG(dbgs() << "Could not load from a constant pool");

    return false;

  }


  if (DstTy.getSizeInBits() != 128) {

    assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");

    // This case can be done with TBL1.

    MachineInstr *Concat =

        emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);

    if (!Concat) {

      LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");

      return false;

    }


    // The constant pool load will be 64 bits, so need to convert to FPR128 reg.

    IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,

                                   IndexLoad->getOperand(0).getReg(), MIB);


    auto TBL1 = MIB.buildInstr(

        AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},

        {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});

    constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);


    auto Copy =

        MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})

            .addReg(TBL1.getReg(0), 0, AArch64::dsub);

    RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }


  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive

  // Q registers for regalloc.

  SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};

  auto RegSeq = createQTuple(Regs, MIB);

  auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},

                             {RegSeq, IndexLoad->getOperand(0)});

  constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::emitLaneInsert(

    std::optional<Register> DstReg, Register SrcReg, Register EltReg,

    unsigned LaneIdx, const RegisterBank &RB,

    MachineIRBuilder &MIRBuilder) const {

  MachineInstr *InsElt = nullptr;

  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();


  // Create a register to define with the insert if one wasn't passed in.

  if (!DstReg)

    DstReg = MRI.createVirtualRegister(DstRC);


  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();

  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;


  if (RB.getID() == AArch64::FPRRegBankID) {

    auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);

    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})

                 .addImm(LaneIdx)

                 .addUse(InsSub->getOperand(0).getReg())

                 .addImm(0);

  } else {

    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})

                 .addImm(LaneIdx)

                 .addUse(EltReg);

  }


  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);

  return InsElt;

}


bool AArch64InstructionSelector::selectUSMovFromExtend(

    MachineInstr &MI, MachineRegisterInfo &MRI) {

  if (MI.getOpcode() != TargetOpcode::G_SEXT &&

      MI.getOpcode() != TargetOpcode::G_ZEXT &&

      MI.getOpcode() != TargetOpcode::G_ANYEXT)

    return false;

  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;

  const Register DefReg = MI.getOperand(0).getReg();

  const LLT DstTy = MRI.getType(DefReg);

  unsigned DstSize = DstTy.getSizeInBits();


  if (DstSize != 32 && DstSize != 64)

    return false;


  MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,

                                       MI.getOperand(1).getReg(), MRI);

  int64_t Lane;

  if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))

    return false;

  Register Src0 = Extract->getOperand(1).getReg();


  const LLT VecTy = MRI.getType(Src0);

  if (VecTy.isScalableVector())

    return false;


  if (VecTy.getSizeInBits() != 128) {

    const MachineInstr *ScalarToVector = emitScalarToVector(

        VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);

    assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");

    Src0 = ScalarToVector->getOperand(0).getReg();

  }


  unsigned Opcode;

  if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)

    Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;

  else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)

    Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;

  else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)

    Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;

  else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)

    Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;

  else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)

    Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;

  else

    llvm_unreachable("Unexpected type combo for S/UMov!");


  // We may need to generate one of these, depending on the type and sign of the

  // input:

  //  DstReg = SMOV Src0, Lane;

  //  NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;

  MachineInstr *ExtI = nullptr;

  if (DstSize == 64 && !IsSigned) {

    Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

    MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);

    ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})

               .addImm(0)

               .addUse(NewReg)

               .addImm(AArch64::sub_32);

    RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);

  } else

    ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);


  constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);

  MI.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {

  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = AArch64::MOVIv16b_ns;

  } else {

    Op = AArch64::MOVIv8b_ns;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();


  if (AArch64_AM::isAdvSIMDModImmType9(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType9(Val);

    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);

    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

    return &*Mov;

  }

  return nullptr;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,

    bool Inv) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;

  } else {

    Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  uint64_t Shift;


  if (AArch64_AM::isAdvSIMDModImmType5(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType5(Val);

    Shift = 0;

  } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType6(Val);

    Shift = 8;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,

    bool Inv) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;

  } else {

    Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  uint64_t Shift;


  if ((AArch64_AM::isAdvSIMDModImmType1(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType1(Val);

    Shift = 0;

  } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType2(Val);

    Shift = 8;

  } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType3(Val);

    Shift = 16;

  } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType4(Val);

    Shift = 24;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = AArch64::MOVIv2d_ns;

  } else {

    Op = AArch64::MOVID;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  if (AArch64_AM::isAdvSIMDModImmType10(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType10(Val);

    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);

    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

    return &*Mov;

  }

  return nullptr;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,

    bool Inv) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;

  } else {

    Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  uint64_t Shift;


  if (AArch64_AM::isAdvSIMDModImmType7(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);

    Shift = 264;

  } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType8(Val);

    Shift = 272;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {


  unsigned int Op;

  bool IsWide = false;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = AArch64::FMOVv4f32_ns;

    IsWide = true;

  } else {

    Op = AArch64::FMOVv2f32_ns;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();


  if (AArch64_AM::isAdvSIMDModImmType11(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType11(Val);

  } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType12(Val);

    Op = AArch64::FMOVv2f64_ns;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


bool AArch64InstructionSelector::selectIndexedExtLoad(

    MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);

  Register Dst = ExtLd.getDstReg();

  Register WriteBack = ExtLd.getWritebackReg();

  Register Base = ExtLd.getBaseReg();

  Register Offset = ExtLd.getOffsetReg();

  LLT Ty = MRI.getType(Dst);

  assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.

  unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();

  bool IsPre = ExtLd.isPre();

  bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);

  unsigned InsertIntoSubReg = 0;

  bool IsDst64 = Ty.getSizeInBits() == 64;


  // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so

  // long as they are scalar.

  bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;

  if ((IsSExt && IsFPR) || Ty.isVector())

    return false;


  unsigned Opc = 0;

  LLT NewLdDstTy;

  LLT s32 = LLT::scalar(32);

  LLT s64 = LLT::scalar(64);


  if (MemSizeBits == 8) {

    if (IsSExt) {

      if (IsDst64)

        Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;

      else

        Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;

      NewLdDstTy = IsDst64 ? s64 : s32;

    } else if (IsFPR) {

      Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;

      InsertIntoSubReg = AArch64::bsub;

      NewLdDstTy = LLT::scalar(MemSizeBits);

    } else {

      Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;

      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;

      NewLdDstTy = s32;

    }

  } else if (MemSizeBits == 16) {

    if (IsSExt) {

      if (IsDst64)

        Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;

      else

        Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;

      NewLdDstTy = IsDst64 ? s64 : s32;

    } else if (IsFPR) {

      Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;

      InsertIntoSubReg = AArch64::hsub;

      NewLdDstTy = LLT::scalar(MemSizeBits);

    } else {

      Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;

      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;

      NewLdDstTy = s32;

    }

  } else if (MemSizeBits == 32) {

    if (IsSExt) {

      Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;

      NewLdDstTy = s64;

    } else if (IsFPR) {

      Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;

      InsertIntoSubReg = AArch64::ssub;

      NewLdDstTy = LLT::scalar(MemSizeBits);

    } else {

      Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;

      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;

      NewLdDstTy = s32;

    }

  } else {

    llvm_unreachable("Unexpected size for indexed load");

  }


  auto Cst = getIConstantVRegVal(Offset, MRI);

  if (!Cst)

    return false; // Shouldn't happen, but just in case.


  auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})

                  .addImm(Cst->getSExtValue());

  LdMI.cloneMemRefs(ExtLd);

  constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);

  // Make sure to select the load with the MemTy as the dest type, and then

  // insert into a larger reg if needed.

  if (InsertIntoSubReg) {

    // Generate a SUBREG_TO_REG.

    auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})

                        .addImm(0)

                        .addUse(LdMI.getReg(1))

                        .addImm(InsertIntoSubReg);

    RBI.constrainGenericRegister(

        SubToReg.getReg(0),

        *getRegClassForTypeOnBank(MRI.getType(Dst),

                                  *RBI.getRegBank(Dst, MRI, TRI)),

        MRI);

  } else {

    auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));

    selectCopy(*Copy, TII, MRI, TRI, RBI);

  }

  MI.eraseFromParent();


  return true;

}


bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,

                                                   MachineRegisterInfo &MRI) {

  auto &Ld = cast<GIndexedLoad>(MI);

  Register Dst = Ld.getDstReg();

  Register WriteBack = Ld.getWritebackReg();

  Register Base = Ld.getBaseReg();

  Register Offset = Ld.getOffsetReg();

  assert(MRI.getType(Dst).getSizeInBits() <= 128 &&

         "Unexpected type for indexed load");

  unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();


  if (MemSize < MRI.getType(Dst).getSizeInBytes())

    return selectIndexedExtLoad(MI, MRI);


  unsigned Opc = 0;

  if (Ld.isPre()) {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,

        AArch64::LDRXpre};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,

        AArch64::LDRQpre};

    if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(MemSize)];

    else

      Opc = GPROpcodes[Log2_32(MemSize)];

  } else {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,

        AArch64::LDRXpost};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,

        AArch64::LDRDpost, AArch64::LDRQpost};

    if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(MemSize)];

    else

      Opc = GPROpcodes[Log2_32(MemSize)];

  }

  auto Cst = getIConstantVRegVal(Offset, MRI);

  if (!Cst)

    return false; // Shouldn't happen, but just in case.

  auto LdMI =

      MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());

  LdMI.cloneMemRefs(Ld);

  constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);

  MI.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,

                                                    MachineRegisterInfo &MRI) {

  Register Dst = I.getWritebackReg();

  Register Val = I.getValueReg();

  Register Base = I.getBaseReg();

  Register Offset = I.getOffsetReg();

  LLT ValTy = MRI.getType(Val);

  assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");


  unsigned Opc = 0;

  if (I.isPre()) {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,

        AArch64::STRXpre};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,

        AArch64::STRQpre};


    if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];

    else

      Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];

  } else {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,

        AArch64::STRXpost};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,

        AArch64::STRDpost, AArch64::STRQpost};


    if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];

    else

      Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];

  }


  auto Cst = getIConstantVRegVal(Offset, MRI);

  if (!Cst)

    return false; // Shouldn't happen, but just in case.

  auto Str =

      MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());

  Str.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Str, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


MachineInstr *

AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,

                                               MachineIRBuilder &MIRBuilder,

                                               MachineRegisterInfo &MRI) {

  LLT DstTy = MRI.getType(Dst);

  unsigned DstSize = DstTy.getSizeInBits();

  if (CV->isNullValue()) {

    if (DstSize == 128) {

      auto Mov =

          MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);

      constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

      return &*Mov;

    }


    if (DstSize == 64) {

      auto Mov =

          MIRBuilder

              .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})

              .addImm(0);

      auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})

                      .addReg(Mov.getReg(0), 0, AArch64::dsub);

      RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);

      return &*Copy;

    }

  }


  if (Constant *SplatValue = CV->getSplatValue()) {

    APInt SplatValueAsInt =

        isa<ConstantFP>(SplatValue)

            ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()

            : SplatValue->getUniqueInteger();

    APInt DefBits = APInt::getSplat(

        DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));

    auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {

      MachineInstr *NewOp;

      bool Inv = false;

      if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||

          (NewOp =

               tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp =

               tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp =

               tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||

          (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))

        return NewOp;


      DefBits = ~DefBits;

      Inv = true;

      if ((NewOp =

               tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp =

               tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))

        return NewOp;

      return nullptr;

    };


    if (auto *NewOp = TryMOVIWithBits(DefBits))

      return NewOp;


    // See if a fneg of the constant can be materialized with a MOVI, etc

    auto TryWithFNeg = [&](APInt DefBits, int NumBits,

                           unsigned NegOpc) -> MachineInstr * {

      // FNegate each sub-element of the constant

      APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);

      APInt NegBits(DstSize, 0);

      unsigned NumElts = DstSize / NumBits;

      for (unsigned i = 0; i < NumElts; i++)

        NegBits |= Neg << (NumBits * i);

      NegBits = DefBits ^ NegBits;


      // Try to create the new constants with MOVI, and if so generate a fneg

      // for it.

      if (auto *NewOp = TryMOVIWithBits(NegBits)) {

        Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);

        NewOp->getOperand(0).setReg(NewDst);

        return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});

      }

      return nullptr;

    };

    MachineInstr *R;

    if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||

        (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||

        (STI.hasFullFP16() &&

         (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))

      return R;

  }


  auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);

  if (!CPLoad) {

    LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");

    return nullptr;

  }


  auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));

  RBI.constrainGenericRegister(

      Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);

  return &*Copy;

}


bool AArch64InstructionSelector::tryOptConstantBuildVec(

    MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

  unsigned DstSize = DstTy.getSizeInBits();

  assert(DstSize <= 128 && "Unexpected build_vec type!");

  if (DstSize < 32)

    return false;

  // Check if we're building a constant vector, in which case we want to

  // generate a constant pool load instead of a vector insert sequence.

  SmallVector<Constant *, 16> Csts;

  for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {

    // Try to find G_CONSTANT or G_FCONSTANT

    auto *OpMI =

        getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);

    if (OpMI)

      Csts.emplace_back(

          const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));

    else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,

                                  I.getOperand(Idx).getReg(), MRI)))

      Csts.emplace_back(

          const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));

    else

      return false;

  }

  Constant *CV = ConstantVector::get(Csts);

  if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))

    return false;

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  // Given:

  //  %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef

  //

  // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.

  Register Dst = I.getOperand(0).getReg();

  Register EltReg = I.getOperand(1).getReg();

  LLT EltTy = MRI.getType(EltReg);

  // If the index isn't on the same bank as its elements, then this can't be a

  // SUBREG_TO_REG.

  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);

  const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);

  if (EltRB != DstRB)

    return false;

  if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {

        return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);

      }))

    return false;

  unsigned SubReg;

  const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);

  if (!EltRC)

    return false;

  const TargetRegisterClass *DstRC =

      getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);

  if (!DstRC)

    return false;

  if (!getSubRegForClass(EltRC, TRI, SubReg))

    return false;

  auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})

                         .addImm(0)

                         .addUse(EltReg)

                         .addImm(SubReg);

  I.eraseFromParent();

  constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);

  return RBI.constrainGenericRegister(Dst, *DstRC, MRI);

}


bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,

                                                   MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

  // Until we port more of the optimized selections, for now just use a vector

  // insert sequence.

  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());

  unsigned EltSize = EltTy.getSizeInBits();


  if (tryOptConstantBuildVec(I, DstTy, MRI))

    return true;

  if (tryOptBuildVecToSubregToReg(I, MRI))

    return true;


  if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)

    return false; // Don't support all element types yet.

  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);


  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;

  MachineInstr *ScalarToVec =

      emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,

                         I.getOperand(1).getReg(), MIB);

  if (!ScalarToVec)

    return false;


  Register DstVec = ScalarToVec->getOperand(0).getReg();

  unsigned DstSize = DstTy.getSizeInBits();


  // Keep track of the last MI we inserted. Later on, we might be able to save

  // a copy using it.

  MachineInstr *PrevMI = ScalarToVec;

  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {

    // Note that if we don't do a subregister copy, we can end up making an

    // extra register.

    Register OpReg = I.getOperand(i).getReg();

    // Do not emit inserts for undefs

    if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {

      PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);

      DstVec = PrevMI->getOperand(0).getReg();

    }

  }


  // If DstTy's size in bits is less than 128, then emit a subregister copy

  // from DstVec to the last register we've defined.

  if (DstSize < 128) {

    // Force this to be FPR using the destination vector.

    const TargetRegisterClass *RC =

        getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));

    if (!RC)

      return false;

    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {

      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");

      return false;

    }


    unsigned SubReg = 0;

    if (!getSubRegForClass(RC, TRI, SubReg))

      return false;

    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {

      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize

                        << "\n");

      return false;

    }


    Register Reg = MRI.createVirtualRegister(RC);

    Register DstReg = I.getOperand(0).getReg();


    MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);

    MachineOperand &RegOp = I.getOperand(1);

    RegOp.setReg(Reg);

    RBI.constrainGenericRegister(DstReg, *RC, MRI);

  } else {

    // We either have a vector with all elements (except the first one) undef or

    // at least one non-undef non-first element. In the first case, we need to

    // constrain the output register ourselves as we may have generated an

    // INSERT_SUBREG operation which is a generic operation for which the

    // output regclass cannot be automatically chosen.

    //

    // In the second case, there is no need to do this as it may generate an

    // instruction like INSvi32gpr where the regclass can be automatically

    // chosen.

    //

    // Also, we save a copy by re-using the destination register on the final

    // insert.

    PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());

    constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);


    Register DstReg = PrevMI->getOperand(0).getReg();

    if (PrevMI == ScalarToVec && DstReg.isVirtual()) {

      const TargetRegisterClass *RC =

          getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));

      RBI.constrainGenericRegister(DstReg, *RC, MRI);

    }

  }


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,

                                                           unsigned NumVecs,

                                                           MachineInstr &I) {

  assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);

  assert(Opc && "Expected an opcode?");

  assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");

  auto &MRI = *MIB.getMRI();

  LLT Ty = MRI.getType(I.getOperand(0).getReg());

  unsigned Size = Ty.getSizeInBits();

  assert((Size == 64 || Size == 128) &&

         "Destination must be 64 bits or 128 bits?");

  unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;

  auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();

  assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");

  auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});

  Load.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);

  Register SelectedLoadDst = Load->getOperand(0).getReg();

  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

    auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})

                   .addReg(SelectedLoadDst, 0, SubReg + Idx);

    // Emit the subreg copies and immediately select them.

    // FIXME: We should refactor our copy code into an emitCopy helper and

    // clean up uses of this pattern elsewhere in the selector.

    selectCopy(*Vec, TII, MRI, TRI, RBI);

  }

  return true;

}


bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(

    unsigned Opc, unsigned NumVecs, MachineInstr &I) {

  assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);

  assert(Opc && "Expected an opcode?");

  assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");

  auto &MRI = *MIB.getMRI();

  LLT Ty = MRI.getType(I.getOperand(0).getReg());

  bool Narrow = Ty.getSizeInBits() == 64;


  auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;

  SmallVector<Register, 4> Regs(NumVecs);

  std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),

                 [](auto MO) { return MO.getReg(); });


  if (Narrow) {

    transform(Regs, Regs.begin(), [this](Register Reg) {

      return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)

          ->getOperand(0)

          .getReg();

    });

    Ty = Ty.multiplyElements(2);

  }


  Register Tuple = createQTuple(Regs, MIB);

  auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);

  if (!LaneNo)

    return false;


  Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();

  auto Load = MIB.buildInstr(Opc, {Ty}, {})

                  .addReg(Tuple)

                  .addImm(LaneNo->getZExtValue())

                  .addReg(Ptr);

  Load.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);

  Register SelectedLoadDst = Load->getOperand(0).getReg();

  unsigned SubReg = AArch64::qsub0;

  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

    auto Vec = MIB.buildInstr(TargetOpcode::COPY,

                              {Narrow ? DstOp(&AArch64::FPR128RegClass)

                                      : DstOp(I.getOperand(Idx).getReg())},

                              {})

                   .addReg(SelectedLoadDst, 0, SubReg + Idx);

    Register WideReg = Vec.getReg(0);

    // Emit the subreg copies and immediately select them.

    selectCopy(*Vec, TII, MRI, TRI, RBI);

    if (Narrow &&

        !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))

      return false;

  }

  return true;

}


void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,

                                                            unsigned NumVecs,

                                                            unsigned Opc) {

  MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();

  LLT Ty = MRI.getType(I.getOperand(1).getReg());

  Register Ptr = I.getOperand(1 + NumVecs).getReg();


  SmallVector<Register, 2> Regs(NumVecs);

  std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,

                 Regs.begin(), [](auto MO) { return MO.getReg(); });


  Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)

                                             : createDTuple(Regs, MIB);

  auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});

  Store.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);

}


bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(

    MachineInstr &I, unsigned NumVecs, unsigned Opc) {

  MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();

  LLT Ty = MRI.getType(I.getOperand(1).getReg());

  bool Narrow = Ty.getSizeInBits() == 64;


  SmallVector<Register, 2> Regs(NumVecs);

  std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,

                 Regs.begin(), [](auto MO) { return MO.getReg(); });


  if (Narrow)

    transform(Regs, Regs.begin(), [this](Register Reg) {

      return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)

          ->getOperand(0)

          .getReg();

    });


  Register Tuple = createQTuple(Regs, MIB);


  auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);

  if (!LaneNo)

    return false;

  Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();

  auto Store = MIB.buildInstr(Opc, {}, {})

                   .addReg(Tuple)

                   .addImm(LaneNo->getZExtValue())

                   .addReg(Ptr);

  Store.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);

  return true;

}


bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  // Find the intrinsic ID.

  unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();


  const LLT S8 = LLT::scalar(8);

  const LLT S16 = LLT::scalar(16);

  const LLT S32 = LLT::scalar(32);

  const LLT S64 = LLT::scalar(64);

  const LLT P0 = LLT::pointer(0, 64);

  // Select the instruction.

  switch (IntrinID) {

  default:

    return false;

  case Intrinsic::aarch64_ldxp:

  case Intrinsic::aarch64_ldaxp: {

    auto NewI = MIB.buildInstr(

        IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,

        {I.getOperand(0).getReg(), I.getOperand(1).getReg()},

        {I.getOperand(3)});

    NewI.cloneMemRefs(I);

    constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);

    break;

  }

  case Intrinsic::aarch64_neon_ld1x2: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD1Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD1Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD1Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD1Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD1Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD1Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD1Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Twov1d;

    else

      llvm_unreachable("Unexpected type for ld1x2!");

    selectVectorLoadIntrinsic(Opc, 2, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld1x3: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD1Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD1Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD1Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD1Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD1Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD1Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD1Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Threev1d;

    else

      llvm_unreachable("Unexpected type for ld1x3!");

    selectVectorLoadIntrinsic(Opc, 3, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld1x4: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD1Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD1Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD1Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD1Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD1Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD1Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD1Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Fourv1d;

    else

      llvm_unreachable("Unexpected type for ld1x4!");

    selectVectorLoadIntrinsic(Opc, 4, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld2: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD2Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD2Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD2Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD2Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD2Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD2Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD2Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Twov1d;

    else

      llvm_unreachable("Unexpected type for ld2!");

    selectVectorLoadIntrinsic(Opc, 2, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld2lane: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD2i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD2i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD2i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::LD2i64;

    else

      llvm_unreachable("Unexpected type for st2lane!");

    if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_ld2r: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD2Rv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD2Rv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD2Rv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD2Rv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD2Rv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD2Rv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD2Rv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD2Rv1d;

    else

      llvm_unreachable("Unexpected type for ld2r!");

    selectVectorLoadIntrinsic(Opc, 2, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld3: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD3Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD3Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD3Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD3Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD3Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD3Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD3Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Threev1d;

    else

      llvm_unreachable("Unexpected type for ld3!");

    selectVectorLoadIntrinsic(Opc, 3, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld3lane: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD3i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD3i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD3i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::LD3i64;

    else

      llvm_unreachable("Unexpected type for st3lane!");

    if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_ld3r: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD3Rv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD3Rv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD3Rv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD3Rv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD3Rv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD3Rv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD3Rv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD3Rv1d;

    else

      llvm_unreachable("Unexpected type for ld3r!");

    selectVectorLoadIntrinsic(Opc, 3, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld4: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD4Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD4Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD4Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD4Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD4Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD4Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD4Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Fourv1d;

    else

      llvm_unreachable("Unexpected type for ld4!");

    selectVectorLoadIntrinsic(Opc, 4, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld4lane: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD4i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD4i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD4i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::LD4i64;

    else

      llvm_unreachable("Unexpected type for st4lane!");

    if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_ld4r: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD4Rv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD4Rv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD4Rv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD4Rv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD4Rv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD4Rv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD4Rv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD4Rv1d;

    else

      llvm_unreachable("Unexpected type for ld4r!");

    selectVectorLoadIntrinsic(Opc, 4, I);

    break;

  }

  case Intrinsic::aarch64_neon_st1x2: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST1Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST1Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST1Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST1Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST1Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST1Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST1Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Twov1d;

    else

      llvm_unreachable("Unexpected type for st1x2!");

    selectVectorStoreIntrinsic(I, 2, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st1x3: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST1Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST1Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST1Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST1Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST1Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST1Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST1Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Threev1d;

    else

      llvm_unreachable("Unexpected type for st1x3!");

    selectVectorStoreIntrinsic(I, 3, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st1x4: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST1Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST1Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST1Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST1Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST1Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST1Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST1Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Fourv1d;

    else

      llvm_unreachable("Unexpected type for st1x4!");

    selectVectorStoreIntrinsic(I, 4, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st2: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST2Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST2Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST2Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST2Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST2Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST2Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST2Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Twov1d;

    else

      llvm_unreachable("Unexpected type for st2!");

    selectVectorStoreIntrinsic(I, 2, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st3: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST3Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST3Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST3Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST3Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST3Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST3Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST3Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Threev1d;

    else

      llvm_unreachable("Unexpected type for st3!");

    selectVectorStoreIntrinsic(I, 3, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st4: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST4Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST4Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST4Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST4Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST4Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST4Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST4Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Fourv1d;

    else

      llvm_unreachable("Unexpected type for st4!");

    selectVectorStoreIntrinsic(I, 4, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st2lane: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST2i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST2i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST2i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::ST2i64;

    else

      llvm_unreachable("Unexpected type for st2lane!");

    if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_st3lane: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST3i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST3i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST3i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::ST3i64;

    else

      llvm_unreachable("Unexpected type for st3lane!");

    if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_st4lane: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST4i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST4i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST4i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::ST4i64;

    else

      llvm_unreachable("Unexpected type for st4lane!");

    if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))

      return false;

    break;

  }

  case Intrinsic::aarch64_mops_memset_tag: {

    // Transform

    //    %dst:gpr(p0) = \

    //      G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),

    //      \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)

    // where %dst is updated, into

    //    %Rd:GPR64common, %Rn:GPR64) = \

    //      MOPSMemorySetTaggingPseudo \

    //      %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64

    // where Rd and Rn are tied.

    // It is expected that %val has been extended to s64 in legalization.

    // Note that the order of the size/value operands are swapped.


    Register DstDef = I.getOperand(0).getReg();

    // I.getOperand(1) is the intrinsic function

    Register DstUse = I.getOperand(2).getReg();

    Register ValUse = I.getOperand(3).getReg();

    Register SizeUse = I.getOperand(4).getReg();


    // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.

    // Therefore an additional virtual register is required for the updated size

    // operand. This value is not accessible via the semantics of the intrinsic.

    Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));


    auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,

                                 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});

    Memset.cloneMemRefs(I);

    constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);

    break;

  }

  }


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,

                                                 MachineRegisterInfo &MRI) {

  unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();


  switch (IntrinID) {

  default:

    break;

  case Intrinsic::aarch64_crypto_sha1h: {

    Register DstReg = I.getOperand(0).getReg();

    Register SrcReg = I.getOperand(2).getReg();


    // FIXME: Should this be an assert?

    if (MRI.getType(DstReg).getSizeInBits() != 32 ||

        MRI.getType(SrcReg).getSizeInBits() != 32)

      return false;


    // The operation has to happen on FPRs. Set up some new FPR registers for

    // the source and destination if they are on GPRs.

    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {

      SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);

      MIB.buildCopy({SrcReg}, {I.getOperand(2)});


      // Make sure the copy ends up getting constrained properly.

      RBI.constrainGenericRegister(I.getOperand(2).getReg(),

                                   AArch64::GPR32RegClass, MRI);

    }


    if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)

      DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);


    // Actually insert the instruction.

    auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});

    constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);


    // Did we create a new register for the destination?

    if (DstReg != I.getOperand(0).getReg()) {

      // Yep. Copy the result of the instruction back into the original

      // destination.

      MIB.buildCopy({I.getOperand(0)}, {DstReg});

      RBI.constrainGenericRegister(I.getOperand(0).getReg(),

                                   AArch64::GPR32RegClass, MRI);

    }


    I.eraseFromParent();

    return true;

  }

  case Intrinsic::ptrauth_resign: {

    Register DstReg = I.getOperand(0).getReg();

    Register ValReg = I.getOperand(2).getReg();

    uint64_t AUTKey = I.getOperand(3).getImm();

    Register AUTDisc = I.getOperand(4).getReg();

    uint64_t PACKey = I.getOperand(5).getImm();

    Register PACDisc = I.getOperand(6).getReg();


    Register AUTAddrDisc = AUTDisc;

    uint16_t AUTConstDiscC = 0;

    std::tie(AUTConstDiscC, AUTAddrDisc) =

        extractPtrauthBlendDiscriminators(AUTDisc, MRI);


    Register PACAddrDisc = PACDisc;

    uint16_t PACConstDiscC = 0;

    std::tie(PACConstDiscC, PACAddrDisc) =

        extractPtrauthBlendDiscriminators(PACDisc, MRI);


    MIB.buildCopy({AArch64::X16}, {ValReg});

    MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

    MIB.buildInstr(AArch64::AUTPAC)

        .addImm(AUTKey)

        .addImm(AUTConstDiscC)

        .addUse(AUTAddrDisc)

        .addImm(PACKey)

        .addImm(PACConstDiscC)

        .addUse(PACAddrDisc)

        .constrainAllUses(TII, TRI, RBI);

    MIB.buildCopy({DstReg}, Register(AArch64::X16));


    RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }

  case Intrinsic::ptrauth_auth: {

    Register DstReg = I.getOperand(0).getReg();

    Register ValReg = I.getOperand(2).getReg();

    uint64_t AUTKey = I.getOperand(3).getImm();

    Register AUTDisc = I.getOperand(4).getReg();


    Register AUTAddrDisc = AUTDisc;

    uint16_t AUTConstDiscC = 0;

    std::tie(AUTConstDiscC, AUTAddrDisc) =

        extractPtrauthBlendDiscriminators(AUTDisc, MRI);


    if (STI.isX16X17Safer()) {

      MIB.buildCopy({AArch64::X16}, {ValReg});

      MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

      MIB.buildInstr(AArch64::AUTx16x17)

          .addImm(AUTKey)

          .addImm(AUTConstDiscC)

          .addUse(AUTAddrDisc)

          .constrainAllUses(TII, TRI, RBI);

      MIB.buildCopy({DstReg}, Register(AArch64::X16));

    } else {

      Register ScratchReg =

          MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);

      MIB.buildInstr(AArch64::AUTxMxN)

          .addDef(DstReg)

          .addDef(ScratchReg)

          .addUse(ValReg)

          .addImm(AUTKey)

          .addImm(AUTConstDiscC)

          .addUse(AUTAddrDisc)

          .constrainAllUses(TII, TRI, RBI);

    }


    RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }

  case Intrinsic::frameaddress:

  case Intrinsic::returnaddress: {

    MachineFunction &MF = *I.getParent()->getParent();

    MachineFrameInfo &MFI = MF.getFrameInfo();


    unsigned Depth = I.getOperand(2).getImm();

    Register DstReg = I.getOperand(0).getReg();

    RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);


    if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {

      if (!MFReturnAddr) {

        // Insert the copy from LR/X30 into the entry block, before it can be

        // clobbered by anything.

        MFI.setReturnAddressIsTaken(true);

        MFReturnAddr = getFunctionLiveInPhysReg(

            MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());

      }


      if (STI.hasPAuth()) {

        MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});

      } else {

        MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});

        MIB.buildInstr(AArch64::XPACLRI);

        MIB.buildCopy({DstReg}, {Register(AArch64::LR)});

      }


      I.eraseFromParent();

      return true;

    }


    MFI.setFrameAddressIsTaken(true);

    Register FrameAddr(AArch64::FP);

    while (Depth--) {

      Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);

      auto Ldr =

          MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);

      constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);

      FrameAddr = NextFrame;

    }


    if (IntrinID == Intrinsic::frameaddress)

      MIB.buildCopy({DstReg}, {FrameAddr});

    else {

      MFI.setReturnAddressIsTaken(true);


      if (STI.hasPAuth()) {

        Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

        MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);

        MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});

      } else {

        MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})

            .addImm(1);

        MIB.buildInstr(AArch64::XPACLRI);

        MIB.buildCopy({DstReg}, {Register(AArch64::LR)});

      }

    }


    I.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_tbl2:

    SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);

    return true;

  case Intrinsic::aarch64_neon_tbl3:

    SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,

                false);

    return true;

  case Intrinsic::aarch64_neon_tbl4:

    SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);

    return true;

  case Intrinsic::aarch64_neon_tbx2:

    SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);

    return true;

  case Intrinsic::aarch64_neon_tbx3:

    SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);

    return true;

  case Intrinsic::aarch64_neon_tbx4:

    SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);

    return true;

  case Intrinsic::swift_async_context_addr:

    auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},

                              {Register(AArch64::FP)})

                   .addImm(8)

                   .addImm(0);

    constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);


    MF->getFrameInfo().setFrameAddressIsTaken(true);

    MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);

    I.eraseFromParent();

    return true;

  }

  return false;

}


// G_PTRAUTH_GLOBAL_VALUE lowering

//

// We have 3 lowering alternatives to choose from:

// - MOVaddrPAC: similar to MOVaddr, with added PAC.

//   If the GV doesn't need a GOT load (i.e., is locally defined)

//   materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.

//

// - LOADgotPAC: similar to LOADgot, with added PAC.

//   If the GV needs a GOT load, materialize the pointer using the usual

//   GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT

//   section is assumed to be read-only (for example, via relro mechanism). See

//   LowerMOVaddrPAC.

//

// - LOADauthptrstatic: similar to LOADgot, but use a

//   special stub slot instead of a GOT slot.

//   Load a signed pointer for symbol 'sym' from a stub slot named

//   'sym$auth_ptr$key$disc' filled by dynamic linker during relocation

//   resolving. This usually lowers to adrp+ldr, but also emits an entry into

//   .data with an

//   @AUTH relocation. See LowerLOADauthptrstatic.

//

// All 3 are pseudos that are expand late to longer sequences: this lets us

// provide integrity guarantees on the to-be-signed intermediate values.

//

// LOADauthptrstatic is undesirable because it requires a large section filled

// with often similarly-signed pointers, making it a good harvesting target.

// Thus, it's only used for ptrauth references to extern_weak to avoid null

// checks.


bool AArch64InstructionSelector::selectPtrAuthGlobalValue(

    MachineInstr &I, MachineRegisterInfo &MRI) const {

  Register DefReg = I.getOperand(0).getReg();

  Register Addr = I.getOperand(1).getReg();

  uint64_t Key = I.getOperand(2).getImm();

  Register AddrDisc = I.getOperand(3).getReg();

  uint64_t Disc = I.getOperand(4).getImm();

  int64_t Offset = 0;


  if (Key > AArch64PACKey::LAST)

    report_fatal_error("key in ptrauth global out of range [0, " +

                       Twine((int)AArch64PACKey::LAST) + "]");


  // Blend only works if the integer discriminator is 16-bit wide.

  if (!isUInt<16>(Disc))

    report_fatal_error(

        "constant discriminator in ptrauth global out of range [0, 0xffff]");


  // Choosing between 3 lowering alternatives is target-specific.

  if (!STI.isTargetELF() && !STI.isTargetMachO())

    report_fatal_error("ptrauth global lowering only supported on MachO/ELF");


  if (!MRI.hasOneDef(Addr))

    return false;


  // First match any offset we take from the real global.

  const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);

  if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {

    Register OffsetReg = DefMI->getOperand(2).getReg();

    if (!MRI.hasOneDef(OffsetReg))

      return false;

    const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);

    if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)

      return false;


    Addr = DefMI->getOperand(1).getReg();

    if (!MRI.hasOneDef(Addr))

      return false;


    DefMI = &*MRI.def_instr_begin(Addr);

    Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();

  }


  // We should be left with a genuine unauthenticated GlobalValue.

  const GlobalValue *GV;

  if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {

    GV = DefMI->getOperand(1).getGlobal();

    Offset += DefMI->getOperand(1).getOffset();

  } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {

    GV = DefMI->getOperand(2).getGlobal();

    Offset += DefMI->getOperand(2).getOffset();

  } else {

    return false;

  }


  MachineIRBuilder MIB(I);


  // Classify the reference to determine whether it needs a GOT load.

  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);

  const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);

  assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&

         "unsupported non-GOT op flags on ptrauth global reference");

  assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&

         "unsupported non-GOT reference to weak ptrauth global");


  std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);

  bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;


  // Non-extern_weak:

  // - No GOT load needed -> MOVaddrPAC

  // - GOT load for non-extern_weak -> LOADgotPAC

  //   Note that we disallow extern_weak refs to avoid null checks later.

  if (!GV->hasExternalWeakLinkage()) {

    MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});

    MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

    MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)

        .addGlobalAddress(GV, Offset)

        .addImm(Key)

        .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)

        .addImm(Disc)

        .constrainAllUses(TII, TRI, RBI);

    MIB.buildCopy(DefReg, Register(AArch64::X16));

    RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }


  // extern_weak -> LOADauthptrstatic


  // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the

  // offset alone as a pointer if the symbol wasn't available, which would

  // probably break null checks in users. Ptrauth complicates things further:

  // error out.

  if (Offset != 0)

    report_fatal_error(

        "unsupported non-zero offset in weak ptrauth global reference");


  if (HasAddrDisc)

    report_fatal_error("unsupported weak addr-div ptrauth global");


  MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})

      .addGlobalAddress(GV, Offset)

      .addImm(Key)

      .addImm(Disc);

  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);


  I.eraseFromParent();

  return true;

}


void AArch64InstructionSelector::SelectTable(MachineInstr &I,

                                             MachineRegisterInfo &MRI,

                                             unsigned NumVec, unsigned Opc1,

                                             unsigned Opc2, bool isExt) {

  Register DstReg = I.getOperand(0).getReg();

  unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;


  // Create the REG_SEQUENCE

  SmallVector<Register, 4> Regs;

  for (unsigned i = 0; i < NumVec; i++)

    Regs.push_back(I.getOperand(i + 2 + isExt).getReg());

  Register RegSeq = createQTuple(Regs, MIB);


  Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();

  MachineInstrBuilder Instr;

  if (isExt) {

    Register Reg = I.getOperand(2).getReg();

    Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});

  } else

    Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});

  constrainSelectedInstRegOperands(*Instr, TII, TRI, RBI);

  I.eraseFromParent();

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 31)

    return std::nullopt;

  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 31)

    return std::nullopt;

  uint64_t Enc = 31 - *MaybeImmed;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 63)

    return std::nullopt;

  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 63)

    return std::nullopt;

  uint64_t Enc = 63 - *MaybeImmed;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


/// Helper to select an immediate value that can be represented as a 12-bit

/// value shifted left by either 0 or 12. If it is possible to do so, return

/// the immediate and shift value. If not, return std::nullopt.

///

/// Used by selectArithImmed and selectNegArithImmed.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::select12BitValueWithLeftShift(

    uint64_t Immed) const {

  unsigned ShiftAmt;

  if (Immed >> 12 == 0) {

    ShiftAmt = 0;

  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {

    ShiftAmt = 12;

    Immed = Immed >> 12;

  } else

    return std::nullopt;


  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);

  return {{

      [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },

      [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },

  }};

}


/// SelectArithImmed - Select an immediate value that can be represented as

/// a 12-bit value shifted left by either 0 or 12.  If so, return true with

/// Val set to the 12-bit value and Shift set to the shifter operand.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {

  // This function is called from the addsub_shifted_imm ComplexPattern,

  // which lists [imm] as the list of opcode it's interested in, however

  // we still need to check whether the operand is actually an immediate

  // here because the ComplexPattern opcode list is only used in

  // root-level opcode matching.

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt)

    return std::nullopt;

  return select12BitValueWithLeftShift(*MaybeImmed);

}


/// SelectNegArithImmed - As above, but negates the value before trying to

/// select it.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {

  // We need a register here, because we need to know if we have a 64 or 32

  // bit immediate.

  if (!Root.isReg())

    return std::nullopt;

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt)

    return std::nullopt;

  uint64_t Immed = *MaybeImmed;


  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"

  // have the opposite effect on the C flag, so this pattern mustn't match under

  // those circumstances.

  if (Immed == 0)

    return std::nullopt;


  // Check if we're dealing with a 32-bit type on the root or a 64-bit type on

  // the root.

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();

  if (MRI.getType(Root.getReg()).getSizeInBits() == 32)

    Immed = ~((uint32_t)Immed) + 1;

  else

    Immed = ~Immed + 1ULL;


  if (Immed & 0xFFFFFFFFFF000000ULL)

    return std::nullopt;


  Immed &= 0xFFFFFFULL;

  return select12BitValueWithLeftShift(Immed);

}


/// Checks if we are sure that folding MI into load/store addressing mode is

/// beneficial or not.

///

/// Returns:

/// - true if folding MI would be beneficial.

/// - false if folding MI would be bad.

/// - std::nullopt if it is not sure whether folding MI is beneficial.

///

/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:

///

/// %13:gpr(s64) = G_CONSTANT i64 1

/// %8:gpr(s64) = G_SHL %6, %13(s64)

/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)

/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))

std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(

    MachineInstr &MI, const MachineRegisterInfo &MRI) const {

  if (MI.getOpcode() == AArch64::G_SHL) {

    // Address operands with shifts are free, except for running on subtargets

    // with AddrLSLSlow14.

    if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(

            MI.getOperand(2).getReg(), MRI)) {

      const APInt ShiftVal = ValAndVeg->Value;


      // Don't fold if we know this will be slow.

      return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));

    }

  }

  return std::nullopt;

}


/// Return true if it is worth folding MI into an extended register. That is,

/// if it's safe to pull it into the addressing mode of a load or store as a

/// shift.

/// \p IsAddrOperand whether the def of MI is used as an address operand

/// (e.g. feeding into an LDR/STR).

bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(

    MachineInstr &MI, const MachineRegisterInfo &MRI,

    bool IsAddrOperand) const {


  // Always fold if there is one use, or if we're optimizing for size.

  Register DefReg = MI.getOperand(0).getReg();

  if (MRI.hasOneNonDBGUse(DefReg) ||

      MI.getParent()->getParent()->getFunction().hasOptSize())

    return true;


  if (IsAddrOperand) {

    // If we are already sure that folding MI is good or bad, return the result.

    if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))

      return *Worth;


    // Fold G_PTR_ADD if its offset operand can be folded

    if (MI.getOpcode() == AArch64::G_PTR_ADD) {

      MachineInstr *OffsetInst =

          getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);


      // Note, we already know G_PTR_ADD is used by at least two instructions.

      // If we are also sure about whether folding is beneficial or not,

      // return the result.

      if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))

        return *Worth;

    }

  }


  // FIXME: Consider checking HasALULSLFast as appropriate.


  // We have a fastpath, so folding a shift in and potentially computing it

  // many times may be beneficial. Check if this is only used in memory ops.

  // If it is, then we should fold.

  return all_of(MRI.use_nodbg_instructions(DefReg),

                [](MachineInstr &Use) { return Use.mayLoadOrStore(); });

}


static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {

  switch (Type) {

  case AArch64_AM::SXTB:

  case AArch64_AM::SXTH:

  case AArch64_AM::SXTW:

    return true;

  default:

    return false;

  }

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectExtendedSHL(

    MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,

    unsigned SizeInBytes, bool WantsExt) const {

  assert(Base.isReg() && "Expected base to be a register operand");

  assert(Offset.isReg() && "Expected offset to be a register operand");


  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();

  MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());


  unsigned OffsetOpc = OffsetInst->getOpcode();

  bool LookedThroughZExt = false;

  if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {

    // Try to look through a ZEXT.

    if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)

      return std::nullopt;


    OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());

    OffsetOpc = OffsetInst->getOpcode();

    LookedThroughZExt = true;


    if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)

      return std::nullopt;

  }

  // Make sure that the memory op is a valid size.

  int64_t LegalShiftVal = Log2_32(SizeInBytes);

  if (LegalShiftVal == 0)

    return std::nullopt;

  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))

    return std::nullopt;


  // Now, try to find the specific G_CONSTANT. Start by assuming that the

  // register we will offset is the LHS, and the register containing the

  // constant is the RHS.

  Register OffsetReg = OffsetInst->getOperand(1).getReg();

  Register ConstantReg = OffsetInst->getOperand(2).getReg();

  auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

  if (!ValAndVReg) {

    // We didn't get a constant on the RHS. If the opcode is a shift, then

    // we're done.

    if (OffsetOpc == TargetOpcode::G_SHL)

      return std::nullopt;


    // If we have a G_MUL, we can use either register. Try looking at the RHS.

    std::swap(OffsetReg, ConstantReg);

    ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

    if (!ValAndVReg)

      return std::nullopt;

  }


  // The value must fit into 3 bits, and must be positive. Make sure that is

  // true.

  int64_t ImmVal = ValAndVReg->Value.getSExtValue();


  // Since we're going to pull this into a shift, the constant value must be

  // a power of 2. If we got a multiply, then we need to check this.

  if (OffsetOpc == TargetOpcode::G_MUL) {

    if (!llvm::has_single_bit<uint32_t>(ImmVal))

      return std::nullopt;


    // Got a power of 2. So, the amount we'll shift is the log base-2 of that.

    ImmVal = Log2_32(ImmVal);

  }


  if ((ImmVal & 0x7) != ImmVal)

    return std::nullopt;


  // We are only allowed to shift by LegalShiftVal. This shift value is built

  // into the instruction, so we can't just use whatever we want.

  if (ImmVal != LegalShiftVal)

    return std::nullopt;


  unsigned SignExtend = 0;

  if (WantsExt) {

    // Check if the offset is defined by an extend, unless we looked through a

    // G_ZEXT earlier.

    if (!LookedThroughZExt) {

      MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);

      auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);

      if (Ext == AArch64_AM::InvalidShiftExtend)

        return std::nullopt;


      SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;

      // We only support SXTW for signed extension here.

      if (SignExtend && Ext != AArch64_AM::SXTW)

        return std::nullopt;

      OffsetReg = ExtInst->getOperand(1).getReg();

    }


    // Need a 32-bit wide register here.

    MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));

    OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);

  }


  // We can use the LHS of the GEP as the base, and the LHS of the shift as an

  // offset. Signify that we are shifting by setting the shift flag to 1.

  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },

           [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },

           [=](MachineInstrBuilder &MIB) {

             // Need to add both immediates here to make sure that they are both

             // added to the instruction.

             MIB.addImm(SignExtend);

             MIB.addImm(1);

           }}};

}


/// This is used for computing addresses like this:

///

/// ldr x1, [x2, x3, lsl #3]

///

/// Where x2 is the base register, and x3 is an offset register. The shift-left

/// is a constant value specific to this load instruction. That is, we'll never

/// see anything other than a 3 here (which corresponds to the size of the

/// element being loaded.)

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(

    MachineOperand &Root, unsigned SizeInBytes) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();


  // We want to find something like this:

  //

  // val = G_CONSTANT LegalShiftVal

  // shift = G_SHL off_reg val

  // ptr = G_PTR_ADD base_reg shift

  // x = G_LOAD ptr

  //

  // And fold it into this addressing mode:

  //

  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]


  // Check if we can find the G_PTR_ADD.

  MachineInstr *PtrAdd =

      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);

  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))

    return std::nullopt;


  // Now, try to match an opcode which will match our specific offset.

  // We want a G_SHL or a G_MUL.

  MachineInstr *OffsetInst =

      getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);

  return selectExtendedSHL(Root, PtrAdd->getOperand(1),

                           OffsetInst->getOperand(0), SizeInBytes,

                           /*WantsExt=*/false);

}


/// This is used for computing addresses like this:

///

/// ldr x1, [x2, x3]

///

/// Where x2 is the base register, and x3 is an offset register.

///

/// When possible (or profitable) to fold a G_PTR_ADD into the address

/// calculation, this will do so. Otherwise, it will return std::nullopt.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeRegisterOffset(

    MachineOperand &Root) const {

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();


  // We need a GEP.

  MachineInstr *Gep = MRI.getVRegDef(Root.getReg());

  if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)

    return std::nullopt;


  // If this is used more than once, let's not bother folding.

  // TODO: Check if they are memory ops. If they are, then we can still fold

  // without having to recompute anything.

  if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))

    return std::nullopt;


  // Base is the GEP's LHS, offset is its RHS.

  return {{[=](MachineInstrBuilder &MIB) {

             MIB.addUse(Gep->getOperand(1).getReg());

           },

           [=](MachineInstrBuilder &MIB) {

             MIB.addUse(Gep->getOperand(2).getReg());

           },

           [=](MachineInstrBuilder &MIB) {

             // Need to add both immediates here to make sure that they are both

             // added to the instruction.

             MIB.addImm(0);

             MIB.addImm(0);

           }}};

}


/// This is intended to be equivalent to selectAddrModeXRO in

/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,

                                              unsigned SizeInBytes) const {

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();

  if (!Root.isReg())

    return std::nullopt;

  MachineInstr *PtrAdd =

      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);

  if (!PtrAdd)

    return std::nullopt;


  // Check for an immediates which cannot be encoded in the [base + imm]

  // addressing mode, and can't be encoded in an add/sub. If this happens, we'll

  // end up with code like:

  //

  // mov x0, wide

  // add x1 base, x0

  // ldr x2, [x1, x0]

  //

  // In this situation, we can use the [base, xreg] addressing mode to save an

  // add/sub:

  //

  // mov x0, wide

  // ldr x2, [base, x0]

  auto ValAndVReg =

      getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);

  if (ValAndVReg) {

    unsigned Scale = Log2_32(SizeInBytes);

    int64_t ImmOff = ValAndVReg->Value.getSExtValue();


    // Skip immediates that can be selected in the load/store addressing

    // mode.

    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&

        ImmOff < (0x1000 << Scale))

      return std::nullopt;


    // Helper lambda to decide whether or not it is preferable to emit an add.

    auto isPreferredADD = [](int64_t ImmOff) {

      // Constants in [0x0, 0xfff] can be encoded in an add.

      if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)

        return true;


      // Can it be encoded in an add lsl #12?

      if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)

        return false;


      // It can be encoded in an add lsl #12, but we may not want to. If it is

      // possible to select this as a single movz, then prefer that. A single

      // movz is faster than an add with a shift.

      return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&

             (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;

    };


    // If the immediate can be encoded in a single add/sub, then bail out.

    if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))

      return std::nullopt;

  }


  // Try to fold shifts into the addressing mode.

  auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);

  if (AddrModeFns)

    return AddrModeFns;


  // If that doesn't work, see if it's possible to fold in registers from

  // a GEP.

  return selectAddrModeRegisterOffset(Root);

}


/// This is used for computing addresses like this:

///

/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]

///

/// Where we have a 64-bit base register, a 32-bit offset register, and an

/// extend (which may or may not be signed).

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,

                                              unsigned SizeInBytes) const {

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();


  MachineInstr *PtrAdd =

      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);

  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))

    return std::nullopt;


  MachineOperand &LHS = PtrAdd->getOperand(1);

  MachineOperand &RHS = PtrAdd->getOperand(2);

  MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);


  // The first case is the same as selectAddrModeXRO, except we need an extend.

  // In this case, we try to find a shift and extend, and fold them into the

  // addressing mode.

  //

  // E.g.

  //

  // off_reg = G_Z/S/ANYEXT ext_reg

  // val = G_CONSTANT LegalShiftVal

  // shift = G_SHL off_reg val

  // ptr = G_PTR_ADD base_reg shift

  // x = G_LOAD ptr

  //

  // In this case we can get a load like this:

  //

  // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]

  auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),

                                       SizeInBytes, /*WantsExt=*/true);

  if (ExtendedShl)

    return ExtendedShl;


  // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.

  //

  // e.g.

  // ldr something, [base_reg, ext_reg, sxtw]

  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))

    return std::nullopt;


  // Check if this is an extend. We'll get an extend type if it is.

  AArch64_AM::ShiftExtendType Ext =

      getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);

  if (Ext == AArch64_AM::InvalidShiftExtend)

    return std::nullopt;


  // Need a 32-bit wide register.

  MachineIRBuilder MIB(*PtrAdd);

  Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),

                                       AArch64::GPR32RegClass, MIB);

  unsigned SignExtend = Ext == AArch64_AM::SXTW;


  // Base is LHS, offset is ExtReg.

  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },

           [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },

           [=](MachineInstrBuilder &MIB) {

             MIB.addImm(SignExtend);

             MIB.addImm(0);

           }}};

}


/// Select a "register plus unscaled signed 9-bit immediate" address.  This

/// should only match when there is an offset that is not valid for a scaled

/// immediate addressing mode.  The "Size" argument is the size in bytes of the

/// memory reference, which is needed here to know what is valid for a scaled

/// immediate.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,

                                                   unsigned Size) const {

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  if (!Root.isReg())

    return std::nullopt;


  if (!isBaseWithConstantOffset(Root, MRI))

    return std::nullopt;


  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());


  MachineOperand &OffImm = RootDef->getOperand(2);

  if (!OffImm.isReg())

    return std::nullopt;

  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());

  if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)

    return std::nullopt;

  int64_t RHSC;

  MachineOperand &RHSOp1 = RHS->getOperand(1);

  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)

    return std::nullopt;

  RHSC = RHSOp1.getCImm()->getSExtValue();


  if (RHSC >= -256 && RHSC < 256) {

    MachineOperand &Base = RootDef->getOperand(1);

    return {{

        [=](MachineInstrBuilder &MIB) { MIB.add(Base); },

        [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },

    }};

  }

  return std::nullopt;

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,

                                                 unsigned Size,

                                                 MachineRegisterInfo &MRI) const {

  if (RootDef.getOpcode() != AArch64::G_ADD_LOW)

    return std::nullopt;

  MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());

  if (Adrp.getOpcode() != AArch64::ADRP)

    return std::nullopt;


  // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.

  auto Offset = Adrp.getOperand(1).getOffset();

  if (Offset % Size != 0)

    return std::nullopt;


  auto GV = Adrp.getOperand(1).getGlobal();

  if (GV->isThreadLocal())

    return std::nullopt;


  auto &MF = *RootDef.getParent()->getParent();

  if (GV->getPointerAlignment(MF.getDataLayout()) < Size)

    return std::nullopt;


  unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());

  MachineIRBuilder MIRBuilder(RootDef);

  Register AdrpReg = Adrp.getOperand(0).getReg();

  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },

           [=](MachineInstrBuilder &MIB) {

             MIB.addGlobalAddress(GV, Offset,

                                  OpFlags | AArch64II::MO_PAGEOFF |

                                      AArch64II::MO_NC);

           }}};

}


/// Select a "register plus scaled unsigned 12-bit immediate" address.  The

/// "Size" argument is the size in bytes of the memory reference, which

/// determines the scale.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,

                                                  unsigned Size) const {

  MachineFunction &MF = *Root.getParent()->getParent()->getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  if (!Root.isReg())

    return std::nullopt;


  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());

  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {

    return {{

        [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },

        [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },

    }};

  }


  CodeModel::Model CM = MF.getTarget().getCodeModel();

  // Check if we can fold in the ADD of small code model ADRP + ADD address.

  // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold

  // globals into the offset.

  MachineInstr *RootParent = Root.getParent();

  if (CM == CodeModel::Small &&

      !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&

        STI.isTargetDarwin())) {

    auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);

    if (OpFns)

      return OpFns;

  }


  if (isBaseWithConstantOffset(Root, MRI)) {

    MachineOperand &LHS = RootDef->getOperand(1);

    MachineOperand &RHS = RootDef->getOperand(2);

    MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());

    MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());


    int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();

    unsigned Scale = Log2_32(Size);

    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {

      if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)

        return {{

            [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },

            [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },

        }};


      return {{

          [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },

          [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },

      }};

    }

  }


  // Before falling back to our general case, check if the unscaled

  // instructions can handle this. If so, that's preferable.

  if (selectAddrModeUnscaled(Root, Size))

    return std::nullopt;


  return {{

      [=](MachineInstrBuilder &MIB) { MIB.add(Root); },

      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },

  }};

}


/// Given a shift instruction, return the correct shift type for that

/// instruction.

static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    return AArch64_AM::InvalidShiftExtend;

  case TargetOpcode::G_SHL:

    return AArch64_AM::LSL;

  case TargetOpcode::G_LSHR:

    return AArch64_AM::LSR;

  case TargetOpcode::G_ASHR:

    return AArch64_AM::ASR;

  case TargetOpcode::G_ROTR:

    return AArch64_AM::ROR;

  }

}


/// Select a "shifted register" operand. If the value is not shifted, set the

/// shift operand to a default value of "lsl 0".

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,

                                                  bool AllowROR) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  // Check if the operand is defined by an instruction which corresponds to

  // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.

  MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());

  AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);

  if (ShType == AArch64_AM::InvalidShiftExtend)

    return std::nullopt;

  if (ShType == AArch64_AM::ROR && !AllowROR)

    return std::nullopt;

  if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))

    return std::nullopt;


  // Need an immediate on the RHS.

  MachineOperand &ShiftRHS = ShiftInst->getOperand(2);

  auto Immed = getImmedFromMO(ShiftRHS);

  if (!Immed)

    return std::nullopt;


  // We have something that we can fold. Fold in the shift's LHS and RHS into

  // the instruction.

  MachineOperand &ShiftLHS = ShiftInst->getOperand(1);

  Register ShiftReg = ShiftLHS.getReg();


  unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();

  unsigned Val = *Immed & (NumBits - 1);

  unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);


  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },

           [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};

}


AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(

    MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {

  unsigned Opc = MI.getOpcode();


  // Handle explicit extend instructions first.

  if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {

    unsigned Size;

    if (Opc == TargetOpcode::G_SEXT)

      Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

    else

      Size = MI.getOperand(2).getImm();

    assert(Size != 64 && "Extend from 64 bits?");

    switch (Size) {

    case 8:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;

    case 16:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;

    case 32:

      return AArch64_AM::SXTW;

    default:

      return AArch64_AM::InvalidShiftExtend;

    }

  }


  if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {

    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

    assert(Size != 64 && "Extend from 64 bits?");

    switch (Size) {

    case 8:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;

    case 16:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;

    case 32:

      return AArch64_AM::UXTW;

    default:

      return AArch64_AM::InvalidShiftExtend;

    }

  }


  // Don't have an explicit extend. Try to handle a G_AND with a constant mask

  // on the RHS.

  if (Opc != TargetOpcode::G_AND)

    return AArch64_AM::InvalidShiftExtend;


  std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));

  if (!MaybeAndMask)

    return AArch64_AM::InvalidShiftExtend;

  uint64_t AndMask = *MaybeAndMask;

  switch (AndMask) {

  default:

    return AArch64_AM::InvalidShiftExtend;

  case 0xFF:

    return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;

  case 0xFFFF:

    return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;

  case 0xFFFFFFFF:

    return AArch64_AM::UXTW;

  }

}


Register AArch64InstructionSelector::moveScalarRegClass(

    Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {

  MachineRegisterInfo &MRI = *MIB.getMRI();

  auto Ty = MRI.getType(Reg);

  assert(!Ty.isVector() && "Expected scalars only!");

  if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))

    return Reg;


  // Create a copy and immediately select it.

  // FIXME: We should have an emitCopy function?

  auto Copy = MIB.buildCopy({&RC}, {Reg});

  selectCopy(*Copy, TII, MRI, TRI, RBI);

  return Copy.getReg(0);

}


/// Select an "extended register" operand. This operand folds in an extend

/// followed by an optional left shift.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectArithExtendedRegister(

    MachineOperand &Root) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  uint64_t ShiftVal = 0;

  Register ExtReg;

  AArch64_AM::ShiftExtendType Ext;

  MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);

  if (!RootDef)

    return std::nullopt;


  if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))

    return std::nullopt;


  // Check if we can fold a shift and an extend.

  if (RootDef->getOpcode() == TargetOpcode::G_SHL) {

    // Look for a constant on the RHS of the shift.

    MachineOperand &RHS = RootDef->getOperand(2);

    std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);

    if (!MaybeShiftVal)

      return std::nullopt;

    ShiftVal = *MaybeShiftVal;

    if (ShiftVal > 4)

      return std::nullopt;

    // Look for a valid extend instruction on the LHS of the shift.

    MachineOperand &LHS = RootDef->getOperand(1);

    MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);

    if (!ExtDef)

      return std::nullopt;

    Ext = getExtendTypeForInst(*ExtDef, MRI);

    if (Ext == AArch64_AM::InvalidShiftExtend)

      return std::nullopt;

    ExtReg = ExtDef->getOperand(1).getReg();

  } else {

    // Didn't get a shift. Try just folding an extend.

    Ext = getExtendTypeForInst(*RootDef, MRI);

    if (Ext == AArch64_AM::InvalidShiftExtend)

      return std::nullopt;

    ExtReg = RootDef->getOperand(1).getReg();


    // If we have a 32 bit instruction which zeroes out the high half of a

    // register, we get an implicit zero extend for free. Check if we have one.

    // FIXME: We actually emit the extend right now even though we don't have

    // to.

    if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {

      MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);

      if (isDef32(*ExtInst))

        return std::nullopt;

    }

  }


  // We require a GPR32 here. Narrow the ExtReg if needed using a subregister

  // copy.

  MachineIRBuilder MIB(*RootDef);

  ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);


  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },

           [=](MachineInstrBuilder &MIB) {

             MIB.addImm(getArithExtendImm(Ext, ShiftVal));

           }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);

  while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&

         STI.isLittleEndian())

    Extract =

        getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);

  if (!Extract)

    return std::nullopt;


  if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {

    if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {

      Register ExtReg = Extract->MI->getOperand(2).getReg();

      return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};

    }

  }

  if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {

    LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());

    auto LaneIdx = getIConstantVRegValWithLookThrough(

        Extract->MI->getOperand(2).getReg(), MRI);

    if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&

        LaneIdx->Value.getSExtValue() == 1) {

      Register ExtReg = Extract->MI->getOperand(1).getReg();

      return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};

    }

  }


  return std::nullopt;

}


void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,

                                                const MachineInstr &MI,

                                                int OpIdx) const {

  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

         "Expected G_CONSTANT");

  std::optional<int64_t> CstVal =

      getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);

  assert(CstVal && "Expected constant value");

  MIB.addImm(*CstVal);

}


void AArch64InstructionSelector::renderLogicalImm32(

  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {

  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

         "Expected G_CONSTANT");

  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();

  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);

  MIB.addImm(Enc);

}


void AArch64InstructionSelector::renderLogicalImm64(

  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {

  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

         "Expected G_CONSTANT");

  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();

  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);

  MIB.addImm(Enc);

}


void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,

                                                 const MachineInstr &MI,

                                                 int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&

         "Expected G_UBSANTRAP");

  MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));

}


void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,

                                               const MachineInstr &MI,

                                               int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(

      AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));

}


void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,

                                               const MachineInstr &MI,

                                               int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(

      AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));

}


void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,

                                               const MachineInstr &MI,

                                               int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(

      AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));

}


void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(

    MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)

                                                      .getFPImm()

                                                      ->getValueAPF()

                                                      .bitcastToAPInt()

                                                      .getZExtValue()));

}


bool AArch64InstructionSelector::isLoadStoreOfNumBytes(

    const MachineInstr &MI, unsigned NumBytes) const {

  if (!MI.mayLoadOrStore())

    return false;

  assert(MI.hasOneMemOperand() &&

         "Expected load/store to have only one mem op!");

  return (*MI.memoperands_begin())->getSize() == NumBytes;

}


bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {

  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

  if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)

    return false;


  // Only return true if we know the operation will zero-out the high half of

  // the 64-bit register. Truncates can be subregister copies, which don't

  // zero out the high bits. Copies and other copy-like instructions can be

  // fed by truncates, or could be lowered as subregister copies.

  switch (MI.getOpcode()) {

  default:

    return true;

  case TargetOpcode::COPY:

  case TargetOpcode::G_BITCAST:

  case TargetOpcode::G_TRUNC:

  case TargetOpcode::G_PHI:

    return false;

  }

}


// Perform fixups on the given PHI instruction's operands to force them all

// to be the same as the destination regbank.

static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,

                            const AArch64RegisterBankInfo &RBI) {

  assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");

  Register DstReg = MI.getOperand(0).getReg();

  const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);

  assert(DstRB && "Expected PHI dst to have regbank assigned");

  MachineIRBuilder MIB(MI);


  // Go through each operand and ensure it has the same regbank.

  for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {

    if (!MO.isReg())

      continue;

    Register OpReg = MO.getReg();

    const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);

    if (RB != DstRB) {

      // Insert a cross-bank copy.

      auto *OpDef = MRI.getVRegDef(OpReg);

      const LLT &Ty = MRI.getType(OpReg);

      MachineBasicBlock &OpDefBB = *OpDef->getParent();


      // Any instruction we insert must appear after all PHIs in the block

      // for the block to be valid MIR.

      MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());

      if (InsertPt != OpDefBB.end() && InsertPt->isPHI())

        InsertPt = OpDefBB.getFirstNonPHI();

      MIB.setInsertPt(*OpDef->getParent(), InsertPt);

      auto Copy = MIB.buildCopy(Ty, OpReg);

      MRI.setRegBank(Copy.getReg(0), *DstRB);

      MO.setReg(Copy.getReg(0));

    }

  }

}


void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {

  // We're looking for PHIs, build a list so we don't invalidate iterators.

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SmallVector<MachineInstr *, 32> Phis;

  for (auto &BB : MF) {

    for (auto &MI : BB) {

      if (MI.getOpcode() == TargetOpcode::G_PHI)

        Phis.emplace_back(&MI);

    }

  }


  for (auto *MI : Phis) {

    // We need to do some work here if the operand types are < 16 bit and they

    // are split across fpr/gpr banks. Since all types <32b on gpr

    // end up being assigned gpr32 regclasses, we can end up with PHIs here

    // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't

    // be selecting heterogenous regbanks for operands if possible, but we

    // still need to be able to deal with it here.

    //

    // To fix this, if we have a gpr-bank operand < 32b in size and at least

    // one other operand is on the fpr bank, then we add cross-bank copies

    // to homogenize the operand banks. For simplicity the bank that we choose

    // to settle on is whatever bank the def operand has. For example:

    //

    // %endbb:

    //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2

    //  =>

    // %bb2:

    //   ...

    //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)

    //   ...

    // %endbb:

    //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2

    bool HasGPROp = false, HasFPROp = false;

    for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {

      if (!MO.isReg())

        continue;

      const LLT &Ty = MRI.getType(MO.getReg());

      if (!Ty.isValid() || !Ty.isScalar())

        break;

      if (Ty.getSizeInBits() >= 32)

        break;

      const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());

      // If for some reason we don't have a regbank yet. Don't try anything.

      if (!RB)

        break;


      if (RB->getID() == AArch64::GPRRegBankID)

        HasGPROp = true;

      else

        HasFPROp = true;

    }

    // We have heterogenous regbanks, need to fixup.

    if (HasGPROp && HasFPROp)

      fixupPHIOpBanks(*MI, MRI, RBI);

  }

}


namespace llvm {

InstructionSelector *

createAArch64InstructionSelector(const AArch64TargetMachine &TM,

                                 const AArch64Subtarget &Subtarget,

                                 const AArch64RegisterBankInfo &RBI) {

  return new AArch64InstructionSelector(TM, Subtarget, RBI);

}

}

AArch64AddressingModes.h

SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:102

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:103

Success
#define Success
Definition: AArch64Disassembler.cpp:42

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:112

AArch64GlobalISelUtils.h

extractPtrauthBlendDiscriminators
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
Definition: AArch64ISelDAGToDAG.cpp:1491

isPreferredADD
static bool isPreferredADD(int64_t ImmOff)
Definition: AArch64ISelDAGToDAG.cpp:1314

emitConditionalComparison
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
Definition: AArch64ISelLowering.cpp:3715

tryAdvSIMDModImm16
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition: AArch64ISelLowering.cpp:14536

tryAdvSIMDModImmFP
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:14633

tryAdvSIMDModImm64
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:14462

isCMN
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3584

tryAdvSIMDModImm8
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:14612

emitConjunctionRec
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
Definition: AArch64ISelLowering.cpp:3833

tryAdvSIMDModImm321s
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:14581

tryAdvSIMDModImm32
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition: AArch64ISelLowering.cpp:14483

emitConjunction
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
Definition: AArch64ISelLowering.cpp:3945

AArch64InstrInfo.h

GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_PREDICATES_INIT

getRegClassesForCopy
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
Definition: AArch64InstructionSelector.cpp:955

GET_GLOBALISEL_TEMPORARIES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT

changeFPCCToANDAArch64CC
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
Definition: AArch64InstructionSelector.cpp:1373

getTestBitReg
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
Definition: AArch64InstructionSelector.cpp:1400

getMinSizeForRegBank
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
Definition: AArch64InstructionSelector.cpp:684

getVectorShiftImm
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
Definition: AArch64InstructionSelector.cpp:1794

selectLoadStoreUIOp
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
Definition: AArch64InstructionSelector.cpp:891

getMinClassForRegBank
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
Definition: AArch64InstructionSelector.cpp:607

selectBinaryOp
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
Definition: AArch64InstructionSelector.cpp:820

getSubRegForClass
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
Definition: AArch64InstructionSelector.cpp:656

selectCopy
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Definition: AArch64InstructionSelector.cpp:1014

copySubReg
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
Definition: AArch64InstructionSelector.cpp:929

changeICMPPredToAArch64CC
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
Definition: AArch64InstructionSelector.cpp:1277

createDTuple
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
Definition: AArch64InstructionSelector.cpp:730

fixupPHIOpBanks
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
Definition: AArch64InstructionSelector.cpp:7979

selectDebugInstr
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
Definition: AArch64InstructionSelector.cpp:985

isSignExtendShiftType
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
Definition: AArch64InstructionSelector.cpp:7160

getShiftTypeForInst
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
Definition: AArch64InstructionSelector.cpp:7638

unsupportedBinOp
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
Definition: AArch64InstructionSelector.cpp:774

canEmitConjunction
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
Definition: AArch64InstructionSelector.cpp:4737

getLaneCopyOpcode
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
Definition: AArch64InstructionSelector.cpp:3910

createQTuple
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
Definition: AArch64InstructionSelector.cpp:739

getImmedFromMO
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
Definition: AArch64InstructionSelector.cpp:747

getInsertVecEltOpInfo
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
Definition: AArch64InstructionSelector.cpp:4268

createTuple
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
Definition: AArch64InstructionSelector.cpp:710

getVectorSHLImm
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
Definition: AArch64InstructionSelector.cpp:1803

changeFPCCToORAArch64CC
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
Definition: AArch64InstructionSelector.cpp:1316

AArch64MCTargetDesc.h

RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:165

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AArch64MachineFunctionInfo.h

AArch64RegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AArch64.

AArch64RegisterInfo.h

AArch64Subtarget.h

AArch64TargetMachine.h

S16
constexpr LLT S16
Definition: AMDGPULegalizerInfo.cpp:296

S32
constexpr LLT S32
Definition: AMDGPULegalizerInfo.cpp:297

S64
constexpr LLT S64
Definition: AMDGPULegalizerInfo.cpp:299

S8
constexpr LLT S8
Definition: AMDGPULegalizerInfo.cpp:295

isStore
static bool isStore(int Opcode)
Definition: ARCInstrInfo.cpp:58

selectMergeValues
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Definition: ARMInstructionSelector.cpp:233

selectUnmergeValues
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Definition: ARMInstructionSelector.cpp:264

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

Utils.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:347

DerivedTypes.h

Dwarf.h
This file contains constants used for implementing Dwarf debug support.

Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

GIMatchTableExecutorImpl.h

GISelValueTracking.h
Provides analysis for querying information about KnownBits during GISel passes.

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:31

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:110

Type.h

InstructionSelector.h

Instructions.h

emitLoadFromConstantPool
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Definition: LegalizerHelper.cpp:3501

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineBasicBlock.h

MachineConstantPool.h
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...

MachineFrameInfo.h

MachineFunction.h

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineInstrBuilder.h

MachineInstr.h

MachineMemOperand.h

MachineOperand.h

MachineRegisterInfo.h

Reg
Register Reg
Definition: MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:2118

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:106

OpIdx
MachineInstr unsigned OpIdx
Definition: NVPTXPrologEpilogPass.cpp:56

P
#define P(N)

Pass.h

getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20

Opc
auto Opc
Definition: RISCVRedundantCopyElimination.cpp:75

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:119

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

TargetOpcodes.h

TargetRegisterInfo.h

Concat
static constexpr int Concat[]
Definition: X86InterleavedAccess.cpp:232

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

Predicate
Definition: AMDGPURegBankLegalizeRules.cpp:376

T

llvm::AArch64FunctionInfo
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Definition: AArch64MachineFunctionInfo.h:47

llvm::AArch64FunctionInfo::getVarArgsFPRSize
unsigned getVarArgsFPRSize() const
Definition: AArch64MachineFunctionInfo.h:440

llvm::AArch64FunctionInfo::hasELFSignedGOT
bool hasELFSignedGOT() const
Definition: AArch64MachineFunctionInfo.h:552

llvm::AArch64FunctionInfo::getVarArgsFPRIndex
int getVarArgsFPRIndex() const
Definition: AArch64MachineFunctionInfo.h:437

llvm::AArch64FunctionInfo::getVarArgsStackIndex
int getVarArgsStackIndex() const
Definition: AArch64MachineFunctionInfo.h:425

llvm::AArch64FunctionInfo::getVarArgsGPRIndex
int getVarArgsGPRIndex() const
Definition: AArch64MachineFunctionInfo.h:431

llvm::AArch64FunctionInfo::getVarArgsGPRSize
unsigned getVarArgsGPRSize() const
Definition: AArch64MachineFunctionInfo.h:434

llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:180

llvm::AArch64RegisterBankInfo
This class provides the information for the target register banks.
Definition: AArch64RegisterBankInfo.h:106

llvm::AArch64RegisterInfo
Definition: AArch64RegisterInfo.h:26

llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38

llvm::AArch64Subtarget::isCallingConvWin64
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
Definition: AArch64Subtarget.h:363

llvm::AArch64TargetMachine
Definition: AArch64TargetMachine.h:24

llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1353

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::APInt::zext
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540

llvm::APInt::trunc
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936

llvm::APInt::getSplat
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:651

llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147

llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:38

llvm::CmpInst::isEquality
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:917

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678

llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:681

llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:707

llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:708

llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:684

llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:693

llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:682

llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:683

llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:702

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701

llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:705

llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:692

llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:686

llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:689

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703

llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:690

llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:685

llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:687

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:699

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:700

llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:706

llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:694

llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704

llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:691

llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:688

llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:829

llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791

llvm::CmpInst::isIntPredicate
bool isIntPredicate() const
Definition: InstrTypes.h:785

llvm::CmpInst::isUnsigned
bool isUnsigned() const
Definition: InstrTypes.h:938

llvm::CodeGenCoverage
Definition: CodeGenCoverage.h:19

llvm::ConstantDataVector::getSplat
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3066

llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277

llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:320

llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:327

llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:324

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:87

llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:169

llvm::ConstantInt::getBitWidth
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:157

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163

llvm::ConstantVector::get
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1423

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:43

llvm::Constant::getSplatValue
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1713

llvm::Constant::isNullValue
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:33

llvm::DataLayout::getTypeStoreSize
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468

llvm::DataLayout::getPrefTypeAlign
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:846

llvm::DstOp
Definition: MachineIRBuilder.h:71

llvm::Function
Definition: Function.h:64

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359

llvm::Function::isVarArg
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:227

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727

llvm::GIMatchTableExecutor::ComplexRendererFns
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Definition: GIMatchTableExecutor.h:623

llvm::GISelValueTracking
Definition: GISelValueTracking.h:34

llvm::GIndexedStore
Represents indexed stores.
Definition: GenericMachineInstrs.h:165

llvm::GLoadStore
Represents any type of generic load or store.
Definition: GenericMachineInstrs.h:84

llvm::GLoadStore::getPointerReg
Register getPointerReg() const
Get the source register of the pointer value.
Definition: GenericMachineInstrs.h:87

llvm::GMemOperation::getMMO
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
Definition: GenericMachineInstrs.h:56

llvm::GMemOperation::getMemSize
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
Definition: GenericMachineInstrs.h:73

llvm::GMemOperation::getMemSizeInBits
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Definition: GenericMachineInstrs.h:75

llvm::GSelect
Represents a G_SELECT.
Definition: GenericMachineInstrs.h:350

llvm::GSelect::getCondReg
Register getCondReg() const
Definition: GenericMachineInstrs.h:352

llvm::GSelect::getFalseReg
Register getFalseReg() const
Definition: GenericMachineInstrs.h:354

llvm::GSelect::getTrueReg
Register getTrueReg() const
Definition: GenericMachineInstrs.h:353

llvm::GenericMachineInstr::getReg
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
Definition: GenericMachineInstrs.h:38

llvm::GlobalValue
Definition: GlobalValue.h:49

llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:265

llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:531

llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1322

llvm::InstructionSelector
Definition: InstructionSelector.h:22

llvm::LLT
Definition: LowLevelType.h:40

llvm::LLT::isScalableVector
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182

llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:265

llvm::LLT::isScalar
constexpr bool isScalar() const
Definition: LowLevelType.h:147

llvm::LLT::changeElementType
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:212

llvm::LLT::multiplyElements
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:252

llvm::LLT::isPointerVector
constexpr bool isPointerVector() const
Definition: LowLevelType.h:153

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43

llvm::LLT::isValid
constexpr bool isValid() const
Definition: LowLevelType.h:146

llvm::LLT::isVector
constexpr bool isVector() const
Definition: LowLevelType.h:149

llvm::LLT::pointer
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:58

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:191

llvm::LLT::isPointer
constexpr bool isPointer() const
Definition: LowLevelType.h:150

llvm::LLT::getElementType
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:278

llvm::LLT::getAddressSpace
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:271

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:101

llvm::LLT::getSizeInBytes
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:201

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68

llvm::LocationSize::getValue
TypeSize getValue() const
Definition: MemoryLocation.h:157

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199

llvm::MIMetadata
Set of metadata that should be preserved when using BuildMI().
Definition: MachineInstrBuilder.h:74

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:122

llvm::MachineBasicBlock::getFirstNonPHI
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:200

llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:379

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:323

llvm::MachineConstantPool
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
Definition: MachineConstantPool.h:117

llvm::MachineConstantPool::getConstantPoolIndex
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
Definition: MachineFunction.cpp:1568

llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:108

llvm::MachineFrameInfo::setAdjustsStack
void setAdjustsStack(bool V)
Definition: MachineFrameInfo.h:620

llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:376

llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:382

llvm::MachineFunction
Definition: MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:762

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:536

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:778

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:772

llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:309

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:733

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:860

llvm::MachineFunction::getConstantPool
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Definition: MachineFunction.h:794

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:758

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:236

llvm::MachineIRBuilder::setInsertPt
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
Definition: MachineIRBuilder.h:339

llvm::MachineIRBuilder::setInstr
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Definition: MachineIRBuilder.h:366

llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition: MachineIRBuilder.h:418

llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition: MachineIRBuilder.h:288

llvm::MachineIRBuilder::setInstrAndDebugLoc
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Definition: MachineIRBuilder.h:377

llvm::MachineIRBuilder::getMBB
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
Definition: MachineIRBuilder.h:320

llvm::MachineIRBuilder::getMRI
MachineRegisterInfo * getMRI()
Getter for MRI.
Definition: MachineIRBuilder.h:310

llvm::MachineIRBuilder::getState
MachineIRBuilderState & getState()
Getter for the State.
Definition: MachineIRBuilder.h:314

llvm::MachineIRBuilder::buildCopy
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Definition: MachineIRBuilder.cpp:328

llvm::MachineIRBuilder::getDataLayout
const DataLayout & getDataLayout() const
Definition: MachineIRBuilder.h:298

llvm::MachineIRBuilder::setState
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
Definition: MachineIRBuilder.h:317

llvm::MachineIRBuilder::buildPtrToInt
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Definition: MachineIRBuilder.h:748

llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:98

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:123

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::addBlockAddress
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:219

llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition: MachineInstrBuilder.h:181

llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:226

llvm::MachineInstrBuilder::addGlobalAddress
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:206

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::constrainAllUses
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
Definition: MachineInstrBuilder.h:361

llvm::MachineInstrBuilder::addJumpTableIndex
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:200

llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:175

llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition: MachineInstrBuilder.h:242

llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:152

llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:301

llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:231

llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:145

llvm::MachineInstrBundleIterator< MachineInstr >

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359

llvm::MachineInstr::addOperand
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Definition: MachineInstr.cpp:206

llvm::MachineInstr::NoFPExcept
@ NoFPExcept
Definition: MachineInstr.h:114

llvm::MachineInstr::getMF
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:756

llvm::MachineInstr::eraseFromParent
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:770

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595

llvm::MachineInstr::addMemOperand
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
Definition: MachineInstr.cpp:383

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:130

llvm::MachineMemOperand::getMemoryType
LLT getMemoryType() const
Return the memory type of the memory reference.
Definition: MachineMemOperand.h:240

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:137

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:139

llvm::MachineMemOperand::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
Definition: MachineMemOperand.h:282

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:582

llvm::MachineOperand::getCImm
const ConstantInt * getCImm() const
Definition: MachineOperand.h:561

llvm::MachineOperand::isCImm
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
Definition: MachineOperand.h:332

llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:556

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:328

llvm::MachineOperand::setReg
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:60

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:330

llvm::MachineOperand::ChangeToImmediate
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition: MachineOperand.cpp:161

llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:243

llvm::MachineOperand::CreatePredicate
static MachineOperand CreatePredicate(unsigned Pred)
Definition: MachineOperand.h:980

llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:821

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:368

llvm::MachineOperand::CreateGA
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
Definition: MachineOperand.h:898

llvm::MachineOperand::CreateBA
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
Definition: MachineOperand.h:914

llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition: MachineOperand.h:566

llvm::MachineOperand::getPredicate
unsigned getPredicate() const
Definition: MachineOperand.h:617

llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition: MachineOperand.h:629

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:53

llvm::PointerUnion< const TargetRegisterClass *, const RegisterBank * >

llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:42

llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:40

llvm::RegisterBankInfo::constrainGenericRegister
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
Definition: RegisterBankInfo.cpp:131

llvm::RegisterBankInfo::getRegBank
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
Definition: RegisterBankInfo.h:440

llvm::RegisterBankInfo::getSizeInBits
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
Definition: RegisterBankInfo.cpp:497

llvm::RegisterBank
This class implements the register bank concept.
Definition: RegisterBank.h:29

llvm::RegisterBank::getID
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:46

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isValid
constexpr bool isValid() const
Definition: Register.h:107

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74

llvm::Register::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78

llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1689

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:414

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:268

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197

llvm::SrcOp
Definition: MachineIRBuilder.h:143

llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:195

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:114

llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.h:264

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.

llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition: TargetSubtargetInfo.h:103

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82

llvm::TypeSize
Definition: TypeSize.h:335

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getInt8Ty
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35

llvm::Value
LLVM Value Representation.
Definition: Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256

llvm::Value::getPointerAlignment
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:134

uint16_t

uint32_t

uint64_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:164

llvm::AArch64CC::CondCode
CondCode
Definition: AArch64BaseInfo.h:254

llvm::AArch64CC::VC
@ VC
Definition: AArch64BaseInfo.h:262

llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256

llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265

llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:260

llvm::AArch64CC::EQ
@ EQ
Definition: AArch64BaseInfo.h:255

llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:257

llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259

llvm::AArch64CC::GT
@ GT
Definition: AArch64BaseInfo.h:267

llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266

llvm::AArch64CC::VS
@ VS
Definition: AArch64BaseInfo.h:261

llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:263

llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:258

llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269

llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268

llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264

llvm::AArch64CC::getInvertedCondCode
static CondCode getInvertedCondCode(CondCode Code)
Definition: AArch64BaseInfo.h:303

llvm::AArch64CC::getNZCVToSatisfyCondCode
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
Definition: AArch64BaseInfo.h:343

llvm::AArch64GISelUtils::changeFCMPPredToAArch64CC
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
Definition: AArch64GlobalISelUtils.cpp:128

llvm::AArch64GISelUtils::getAArch64VectorSplatScalar
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition: AArch64GlobalISelUtils.cpp:33

llvm::AArch64II::MO_NC
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
Definition: AArch64BaseInfo.h:852

llvm::AArch64II::MO_G1
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
Definition: AArch64BaseInfo.h:828

llvm::AArch64II::MO_PAGEOFF
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
Definition: AArch64BaseInfo.h:816

llvm::AArch64II::MO_GOT
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
Definition: AArch64BaseInfo.h:847

llvm::AArch64II::MO_G0
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
Definition: AArch64BaseInfo.h:832

llvm::AArch64II::MO_PAGE
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Definition: AArch64BaseInfo.h:811

llvm::AArch64II::MO_TLS
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
Definition: AArch64BaseInfo.h:858

llvm::AArch64II::MO_G2
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
Definition: AArch64BaseInfo.h:824

llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition: AArch64BaseInfo.h:820

llvm::AArch64PACKey::LAST
@ LAST
Definition: AArch64BaseInfo.h:899

llvm::AArch64PACKey::IA
@ IA
Definition: AArch64BaseInfo.h:895

llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:275

llvm::AArch64_AM::encodeAdvSIMDModImmType2
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
Definition: AArch64AddressingModes.h:471

llvm::AArch64_AM::isAdvSIMDModImmType9
static bool isAdvSIMDModImmType9(uint64_t Imm)
Definition: AArch64AddressingModes.h:573

llvm::AArch64_AM::isAdvSIMDModImmType4
static bool isAdvSIMDModImmType4(uint64_t Imm)
Definition: AArch64AddressingModes.h:496

llvm::AArch64_AM::isAdvSIMDModImmType5
static bool isAdvSIMDModImmType5(uint64_t Imm)
Definition: AArch64AddressingModes.h:511

llvm::AArch64_AM::getFP32Imm
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Definition: AArch64AddressingModes.h:393

llvm::AArch64_AM::encodeAdvSIMDModImmType7
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
Definition: AArch64AddressingModes.h:548

llvm::AArch64_AM::encodeAdvSIMDModImmType12
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
Definition: AArch64AddressingModes.h:733

llvm::AArch64_AM::encodeAdvSIMDModImmType10
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
Definition: AArch64AddressingModes.h:635

llvm::AArch64_AM::encodeAdvSIMDModImmType9
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
Definition: AArch64AddressingModes.h:579

llvm::AArch64_AM::ShiftExtendType
ShiftExtendType
Definition: AArch64AddressingModes.h:32

llvm::AArch64_AM::SXTW
@ SXTW
Definition: AArch64AddressingModes.h:47

llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:34

llvm::AArch64_AM::UXTW
@ UXTW
Definition: AArch64AddressingModes.h:42

llvm::AArch64_AM::ASR
@ ASR
Definition: AArch64AddressingModes.h:36

llvm::AArch64_AM::InvalidShiftExtend
@ InvalidShiftExtend
Definition: AArch64AddressingModes.h:33

llvm::AArch64_AM::UXTB
@ UXTB
Definition: AArch64AddressingModes.h:40

llvm::AArch64_AM::LSR
@ LSR
Definition: AArch64AddressingModes.h:35

llvm::AArch64_AM::UXTH
@ UXTH
Definition: AArch64AddressingModes.h:41

llvm::AArch64_AM::ROR
@ ROR
Definition: AArch64AddressingModes.h:37

llvm::AArch64_AM::SXTB
@ SXTB
Definition: AArch64AddressingModes.h:45

llvm::AArch64_AM::SXTH
@ SXTH
Definition: AArch64AddressingModes.h:46

llvm::AArch64_AM::encodeLogicalImmediate
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
Definition: AArch64AddressingModes.h:282

llvm::AArch64_AM::isAdvSIMDModImmType7
static bool isAdvSIMDModImmType7(uint64_t Imm)
Definition: AArch64AddressingModes.h:543

llvm::AArch64_AM::encodeAdvSIMDModImmType5
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
Definition: AArch64AddressingModes.h:517

llvm::AArch64_AM::getFP64Imm
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Definition: AArch64AddressingModes.h:421

llvm::AArch64_AM::isAdvSIMDModImmType10
static bool isAdvSIMDModImmType10(uint64_t Imm)
Definition: AArch64AddressingModes.h:593

llvm::AArch64_AM::getFP16Imm
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
Definition: AArch64AddressingModes.h:367

llvm::AArch64_AM::encodeAdvSIMDModImmType8
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
Definition: AArch64AddressingModes.h:568

llvm::AArch64_AM::isAdvSIMDModImmType12
static bool isAdvSIMDModImmType12(uint64_t Imm)
Definition: AArch64AddressingModes.h:727

llvm::AArch64_AM::encodeAdvSIMDModImmType11
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
Definition: AArch64AddressingModes.h:684

llvm::AArch64_AM::isAdvSIMDModImmType11
static bool isAdvSIMDModImmType11(uint64_t Imm)
Definition: AArch64AddressingModes.h:677

llvm::AArch64_AM::encodeAdvSIMDModImmType6
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
Definition: AArch64AddressingModes.h:533

llvm::AArch64_AM::isAdvSIMDModImmType8
static bool isAdvSIMDModImmType8(uint64_t Imm)
Definition: AArch64AddressingModes.h:558

llvm::AArch64_AM::encodeAdvSIMDModImmType4
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
Definition: AArch64AddressingModes.h:501

llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:98

llvm::AArch64_AM::isAdvSIMDModImmType6
static bool isAdvSIMDModImmType6(uint64_t Imm)
Definition: AArch64AddressingModes.h:527

llvm::AArch64_AM::encodeAdvSIMDModImmType1
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
Definition: AArch64AddressingModes.h:456

llvm::AArch64_AM::encodeAdvSIMDModImmType3
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
Definition: AArch64AddressingModes.h:486

llvm::AArch64_AM::isAdvSIMDModImmType2
static bool isAdvSIMDModImmType2(uint64_t Imm)
Definition: AArch64AddressingModes.h:466

llvm::AArch64_AM::isAdvSIMDModImmType3
static bool isAdvSIMDModImmType3(uint64_t Imm)
Definition: AArch64AddressingModes.h:481

llvm::AArch64_AM::isAdvSIMDModImmType1
static bool isAdvSIMDModImmType1(uint64_t Imm)
Definition: AArch64AddressingModes.h:451

llvm::AMDGPUISD::BFM
@ BFM
Definition: AMDGPUISelLowering.h:515

llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:488

llvm::AMDGPU::Imm
@ Imm
Definition: AMDGPURegBankLegalizeRules.h:129

llvm::AMDGPU::P0
@ P0
Definition: AMDGPURegBankLegalizeRules.h:61

llvm::ARMISD::CSINC
@ CSINC
Definition: ARMISelLowering.h:322

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CodeModel::Model
Model
Definition: CodeGen.h:31

llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:31

llvm::CodeModel::Tiny
@ Tiny
Definition: CodeGen.h:31

llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:31

llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:90

llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:310

llvm::MIPatternMatch::m_GZExt
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
Definition: MIPatternMatch.h:705

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:102

llvm::MIPatternMatch::m_GAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:550

llvm::MIPatternMatch::m_GOr
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:616

llvm::MIPatternMatch::m_Neg
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
Definition: MIPatternMatch.h:928

llvm::MIPatternMatch::m_OneNonDBGUse
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
Definition: MIPatternMatch.h:70

llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition: MIPatternMatch.h:936

llvm::MIPatternMatch::m_SpecificICst
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
Definition: MIPatternMatch.h:213

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:28

llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:569

llvm::MIPatternMatch::m_GShl
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:636

llvm::MIPatternMatch::m_any_of
Or< Preds... > m_any_of(Preds &&... preds)
Definition: MIPatternMatch.h:354

llvm::MIPatternMatch::m_GAnd
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:605

llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:157

llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:158

llvm::NVPTX::LoadStore
LoadStore
Definition: NVPTX.h:145

llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:400

llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:49

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:55

llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33

llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32

llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:66

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:621

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::codeview::SimpleTypeKind::Byte
@ Byte

llvm::codeview::FrameCookieKind::Copy
@ Copy

llvm::dwarf::Index
Index
Definition: Dwarf.h:889

llvm::hlsl::rootsig::RegisterType::TReg
@ TReg

llvm::logicalview::LVElementKind::Optimized
@ Optimized

llvm::numbers::e
constexpr double e
Definition: MathExtras.h:47

llvm::rdf::Instr
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm::sframe::Flags
Flags
Definition: SFrame.h:39

llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:78

llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:79

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::getFunctionLiveInPhysReg
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition: Utils.cpp:916

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338

llvm::Offset
@ Offset
Definition: DWP.cpp:477

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744

llvm::constrainOperandRegClass
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:56

llvm::getOpcodeDef
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:651

llvm::getConstantFPVRegVal
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:459

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:369

llvm::getIConstantVRegVal
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:294

llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36

llvm::constrainSelectedInstRegOperands
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155

llvm::isPreISelGenericOpcode
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition: TargetOpcodes.h:30

llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition: AArch64InstrInfo.cpp:10755

llvm::getDefIgnoringCopies
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:492

llvm::getIConstantVRegSExtVal
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:314

llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282

llvm::createAArch64InstructionSelector
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
Definition: AArch64InstructionSelector.cpp:8072

llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167

llvm::getAnyConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition: Utils.cpp:439

llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpressionPrinter.cpp:22

llvm::getIConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433

llvm::getDefSrcRegIgnoringCopies
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:467

llvm::getSrcRegIgnoringCopies
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:499

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858

raw_ostream.h

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35

llvm::EVT::getFloatingPointVT
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59

llvm::MachineIRBuilderState
Class which stores all the state required in a MachineIRBuilder.
Definition: MachineIRBuilder.h:46

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:42

llvm::MachinePointerInfo::getConstantPool
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition: MachineOperand.cpp:1058

llvm::MemOp
Definition: TargetLowering.h:118