LLVM: lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp Source File

//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

/// \file

/// This file implements the targeting of the InstructionSelector class for

/// AArch64.

/// \todo This should be generated by TableGen.

//===----------------------------------------------------------------------===//


#include "AArch64GlobalISelUtils.h"

#include "AArch64InstrInfo.h"

#include "AArch64MachineFunctionInfo.h"

#include "AArch64RegisterBankInfo.h"

#include "AArch64RegisterInfo.h"

#include "AArch64Subtarget.h"

#include "AArch64TargetMachine.h"

#include "MCTargetDesc/AArch64AddressingModes.h"

#include "MCTargetDesc/AArch64MCTargetDesc.h"

#include "llvm/BinaryFormat/Dwarf.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineConstantPool.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineMemOperand.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicsAArch64.h"

#include "llvm/IR/Type.h"

#include "llvm/Pass.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include <optional>


#define DEBUG_TYPE "aarch64-isel"


using namespace llvm;

using namespace MIPatternMatch;

using namespace AArch64GISelUtils;


namespace llvm {

class BlockFrequencyInfo;

class ProfileSummaryInfo;

}


namespace {


#define GET_GLOBALISEL_PREDICATE_BITSET

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_PREDICATE_BITSET


class AArch64InstructionSelector : public InstructionSelector {

public:

  AArch64InstructionSelector(const AArch64TargetMachine &TM,

                             const AArch64Subtarget &STI,

                             const AArch64RegisterBankInfo &RBI);


  bool select(MachineInstr &I) override;

  static const char *getName() { return DEBUG_TYPE; }


  void setupMF(MachineFunction &MF, GISelValueTracking *VT,

               CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,

               BlockFrequencyInfo *BFI) override {

    InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);

    MIB.setMF(MF);


    // hasFnAttribute() is expensive to call on every BRCOND selection, so

    // cache it here for each run of the selector.

    ProduceNonFlagSettingCondBr =

        !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);

    MFReturnAddr = Register();


    processPHIs(MF);

  }


private:

  /// tblgen-erated 'select' implementation, used as the initial selector for

  /// the patterns that don't require complex C++.

  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;


  // A lowering phase that runs before any selection attempts.

  // Returns true if the instruction was modified.

  bool preISelLower(MachineInstr &I);


  // An early selection function that runs before the selectImpl() call.

  bool earlySelect(MachineInstr &I);


  /// Save state that is shared between select calls, call select on \p I and

  /// then restore the saved state. This can be used to recursively call select

  /// within a select call.

  bool selectAndRestoreState(MachineInstr &I);


  // Do some preprocessing of G_PHIs before we begin selection.

  void processPHIs(MachineFunction &MF);


  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);


  /// Eliminate same-sized cross-bank copies into stores before selectImpl().

  bool contractCrossBankCopyIntoStore(MachineInstr &I,

                                      MachineRegisterInfo &MRI);


  bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);


  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,

                          MachineRegisterInfo &MRI) const;

  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,

                           MachineRegisterInfo &MRI) const;


  ///@{

  /// Helper functions for selectCompareBranch.

  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,

                                    MachineIRBuilder &MIB) const;

  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,

                                    MachineIRBuilder &MIB) const;

  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,

                                    MachineIRBuilder &MIB) const;

  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,

                                  MachineBasicBlock *DstMBB,

                                  MachineIRBuilder &MIB) const;

  ///@}


  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,

                           MachineRegisterInfo &MRI);


  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);


  // Helper to generate an equivalent of scalar_to_vector into a new register,

  // returned via 'Dst'.

  MachineInstr *emitScalarToVector(unsigned EltSize,

                                   const TargetRegisterClass *DstRC,

                                   Register Scalar,

                                   MachineIRBuilder &MIRBuilder) const;

  /// Helper to narrow vector that was widened by emitScalarToVector.

  /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit

  /// vector, correspondingly.

  MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,

                                 MachineIRBuilder &MIRBuilder,

                                 MachineRegisterInfo &MRI) const;


  /// Emit a lane insert into \p DstReg, or a new vector register if

  /// std::nullopt is provided.

  ///

  /// The lane inserted into is defined by \p LaneIdx. The vector source

  /// register is given by \p SrcReg. The register containing the element is

  /// given by \p EltReg.

  MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,

                               Register EltReg, unsigned LaneIdx,

                               const RegisterBank &RB,

                               MachineIRBuilder &MIRBuilder) const;


  /// Emit a sequence of instructions representing a constant \p CV for a

  /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)

  ///

  /// \returns the last instruction in the sequence on success, and nullptr

  /// otherwise.

  MachineInstr *emitConstantVector(Register Dst, Constant *CV,

                                   MachineIRBuilder &MIRBuilder,

                                   MachineRegisterInfo &MRI);


  MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,

                                  MachineIRBuilder &MIRBuilder);


  MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder, bool Inv);


  MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder, bool Inv);

  MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder);

  MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,

                                     MachineIRBuilder &MIRBuilder, bool Inv);

  MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,

                                   MachineIRBuilder &MIRBuilder);


  bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,

                              MachineRegisterInfo &MRI);

  /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a

  /// SUBREG_TO_REG.

  bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);

  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);


  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);


  /// Helper function to select vector load intrinsics like

  /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.

  /// \p Opc is the opcode that the selected instruction should use.

  /// \p NumVecs is the number of vector destinations for the instruction.

  /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.

  bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,

                                 MachineInstr &I);

  bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,

                                     MachineInstr &I);

  void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,

                                  unsigned Opc);

  bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,

                                      unsigned Opc);

  bool selectIntrinsicWithSideEffects(MachineInstr &I,

                                      MachineRegisterInfo &MRI);

  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectPtrAuthGlobalValue(MachineInstr &I,

                                MachineRegisterInfo &MRI) const;

  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);

  void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,

                   unsigned Opc1, unsigned Opc2, bool isExt);


  bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);

  bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);


  unsigned emitConstantPoolEntry(const Constant *CPVal,

                                 MachineFunction &MF) const;

  MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,

                                         MachineIRBuilder &MIRBuilder) const;


  // Emit a vector concat operation.

  MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,

                                 Register Op2,

                                 MachineIRBuilder &MIRBuilder) const;


  // Emit an integer compare between LHS and RHS, which checks for Predicate.

  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,

                                   MachineOperand &Predicate,

                                   MachineIRBuilder &MIRBuilder) const;


  /// Emit a floating point comparison between \p LHS and \p RHS.

  /// \p Pred if given is the intended predicate to use.

  MachineInstr *

  emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,

                std::optional<CmpInst::Predicate> = std::nullopt) const;


  MachineInstr *

  emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,

            std::initializer_list<llvm::SrcOp> SrcOps,

            MachineIRBuilder &MIRBuilder,

            const ComplexRendererFns &RenderFns = std::nullopt) const;

  /// Helper function to emit an add or sub instruction.

  ///

  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above

  /// in a specific order.

  ///

  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.

  ///

  /// \code

  ///   const std::array<std::array<unsigned, 2>, 4> Table {

  ///    {{AArch64::ADDXri, AArch64::ADDWri},

  ///     {AArch64::ADDXrs, AArch64::ADDWrs},

  ///     {AArch64::ADDXrr, AArch64::ADDWrr},

  ///     {AArch64::SUBXri, AArch64::SUBWri},

  ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};

  /// \endcode

  ///

  /// Each row in the table corresponds to a different addressing mode. Each

  /// column corresponds to a different register size.

  ///

  /// \attention Rows must be structured as follows:

  ///   - Row 0: The ri opcode variants

  ///   - Row 1: The rs opcode variants

  ///   - Row 2: The rr opcode variants

  ///   - Row 3: The ri opcode variants for negative immediates

  ///   - Row 4: The rx opcode variants

  ///

  /// \attention Columns must be structured as follows:

  ///   - Column 0: The 64-bit opcode variants

  ///   - Column 1: The 32-bit opcode variants

  ///

  /// \p Dst is the destination register of the binop to emit.

  /// \p LHS is the left-hand operand of the binop to emit.

  /// \p RHS is the right-hand operand of the binop to emit.

  MachineInstr *emitAddSub(

      const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,

      Register Dst, MachineOperand &LHS, MachineOperand &RHS,

      MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,

                        MachineOperand &RHS,

                        MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,

                         MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,

                        MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,

                        MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,

                           AArch64CC::CondCode CC,

                           MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,

                                     const RegisterBank &DstRB, LLT ScalarTy,

                                     Register VecReg, unsigned LaneIdx,

                                     MachineIRBuilder &MIRBuilder) const;

  MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,

                          AArch64CC::CondCode Pred,

                          MachineIRBuilder &MIRBuilder) const;

  /// Emit a CSet for a FP compare.

  ///

  /// \p Dst is expected to be a 32-bit scalar register.

  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,

                                MachineIRBuilder &MIRBuilder) const;


  /// Emit an instruction that sets NZCV to the carry-in expected by \p I.

  /// Might elide the instruction if the previous instruction already sets NZCV

  /// correctly.

  MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);


  /// Emit the overflow op for \p Opcode.

  ///

  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,

  /// G_USUBO, etc.

  std::pair<MachineInstr *, AArch64CC::CondCode>

  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,

                 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;


  bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);


  /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).

  /// In some cases this is even possible with OR operations in the expression.

  MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,

                                MachineIRBuilder &MIB) const;

  MachineInstr *emitConditionalComparison(Register LHS, Register RHS,

                                          CmpInst::Predicate CC,

                                          AArch64CC::CondCode Predicate,

                                          AArch64CC::CondCode OutCC,

                                          MachineIRBuilder &MIB) const;

  MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,

                                   bool Negate, Register CCOp,

                                   AArch64CC::CondCode Predicate,

                                   MachineIRBuilder &MIB) const;


  /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.

  /// \p IsNegative is true if the test should be "not zero".

  /// This will also optimize the test bit instruction when possible.

  MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,

                            MachineBasicBlock *DstMBB,

                            MachineIRBuilder &MIB) const;


  /// Emit a CB(N)Z instruction which branches to \p DestMBB.

  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,

                        MachineBasicBlock *DestMBB,

                        MachineIRBuilder &MIB) const;


  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.

  // We use these manually instead of using the importer since it doesn't

  // support SDNodeXForm.

  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;

  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;

  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;

  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;


  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;

  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;

  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;


  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,

                                            unsigned Size) const;


  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 1);

  }

  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 2);

  }

  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 4);

  }

  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 8);

  }

  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {

    return selectAddrModeUnscaled(Root, 16);

  }


  /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used

  /// from complex pattern matchers like selectAddrModeIndexed().

  ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,

                                          MachineRegisterInfo &MRI) const;


  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,

                                           unsigned Size) const;

  template <int Width>

  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {

    return selectAddrModeIndexed(Root, Width / 8);

  }


  std::optional<bool>

  isWorthFoldingIntoAddrMode(MachineInstr &MI,

                             const MachineRegisterInfo &MRI) const;


  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,

                                     const MachineRegisterInfo &MRI,

                                     bool IsAddrOperand) const;

  ComplexRendererFns

  selectAddrModeShiftedExtendXReg(MachineOperand &Root,

                                  unsigned SizeInBytes) const;


  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether

  /// or not a shift + extend should be folded into an addressing mode. Returns

  /// None when this is not profitable or possible.

  ComplexRendererFns

  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,

                    MachineOperand &Offset, unsigned SizeInBytes,

                    bool WantsExt) const;

  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;

  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,

                                       unsigned SizeInBytes) const;

  template <int Width>

  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {

    return selectAddrModeXRO(Root, Width / 8);

  }


  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,

                                       unsigned SizeInBytes) const;

  template <int Width>

  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {

    return selectAddrModeWRO(Root, Width / 8);

  }


  ComplexRendererFns selectShiftedRegister(MachineOperand &Root,

                                           bool AllowROR = false) const;


  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {

    return selectShiftedRegister(Root);

  }


  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {

    return selectShiftedRegister(Root, true);

  }


  /// Given an extend instruction, determine the correct shift-extend type for

  /// that instruction.

  ///

  /// If the instruction is going to be used in a load or store, pass

  /// \p IsLoadStore = true.

  AArch64_AM::ShiftExtendType

  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,

                       bool IsLoadStore = false) const;


  /// Move \p Reg to \p RC if \p Reg is not already on \p RC.

  ///

  /// \returns Either \p Reg if no change was necessary, or the new register

  /// created by moving \p Reg.

  ///

  /// Note: This uses emitCopy right now.

  Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,

                              MachineIRBuilder &MIB) const;


  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;


  ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;


  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,

                      int OpIdx = -1) const;

  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,

                          int OpIdx = -1) const;

  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,

                          int OpIdx = -1) const;

  void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,

                       int OpIdx) const;

  void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,

                     int OpIdx = -1) const;

  void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,

                     int OpIdx = -1) const;

  void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,

                     int OpIdx = -1) const;

  void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,

                                    const MachineInstr &MI,

                                    int OpIdx = -1) const;


  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.

  void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);


  // Optimization methods.

  bool tryOptSelect(GSelect &Sel);

  bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);

  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,

                                      MachineOperand &Predicate,

                                      MachineIRBuilder &MIRBuilder) const;


  /// Return true if \p MI is a load or store of \p NumBytes bytes.

  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;


  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit

  /// register zeroed out. In other words, the result of MI has been explicitly

  /// zero extended.

  bool isDef32(const MachineInstr &MI) const;


  const AArch64TargetMachine &TM;

  const AArch64Subtarget &STI;

  const AArch64InstrInfo &TII;

  const AArch64RegisterInfo &TRI;

  const AArch64RegisterBankInfo &RBI;


  bool ProduceNonFlagSettingCondBr = false;


  // Some cached values used during selection.

  // We use LR as a live-in register, and we keep track of it here as it can be

  // clobbered by calls.

  Register MFReturnAddr;


  MachineIRBuilder MIB;


#define GET_GLOBALISEL_PREDICATES_DECL

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_PREDICATES_DECL


// We declare the temporaries used by selectImpl() in the class to minimize the

// cost of constructing placeholder values.

#define GET_GLOBALISEL_TEMPORARIES_DECL

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_TEMPORARIES_DECL

};


} // end anonymous namespace


#define GET_GLOBALISEL_IMPL

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_IMPL


AArch64InstructionSelector::AArch64InstructionSelector(

    const AArch64TargetMachine &TM, const AArch64Subtarget &STI,

    const AArch64RegisterBankInfo &RBI)

    : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),

      RBI(RBI),

#define GET_GLOBALISEL_PREDICATES_INIT

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_PREDICATES_INIT

#define GET_GLOBALISEL_TEMPORARIES_INIT

#include "AArch64GenGlobalISel.inc"

#undef GET_GLOBALISEL_TEMPORARIES_INIT

{

}


// FIXME: This should be target-independent, inferred from the types declared

// for each class in the bank.

//

/// Given a register bank, and a type, return the smallest register class that

/// can represent that combination.

static const TargetRegisterClass *

getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,

                         bool GetAllRegSet = false) {

  if (RB.getID() == AArch64::GPRRegBankID) {

    if (Ty.getSizeInBits() <= 32)

      return GetAllRegSet ? &AArch64::GPR32allRegClass

                          : &AArch64::GPR32RegClass;

    if (Ty.getSizeInBits() == 64)

      return GetAllRegSet ? &AArch64::GPR64allRegClass

                          : &AArch64::GPR64RegClass;

    if (Ty.getSizeInBits() == 128)

      return &AArch64::XSeqPairsClassRegClass;

    return nullptr;

  }


  if (RB.getID() == AArch64::FPRRegBankID) {

    switch (Ty.getSizeInBits()) {

    case 8:

      return &AArch64::FPR8RegClass;

    case 16:

      return &AArch64::FPR16RegClass;

    case 32:

      return &AArch64::FPR32RegClass;

    case 64:

      return &AArch64::FPR64RegClass;

    case 128:

      return &AArch64::FPR128RegClass;

    }

    return nullptr;

  }


  return nullptr;

}


/// Given a register bank, and size in bits, return the smallest register class

/// that can represent that combination.

static const TargetRegisterClass *


getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,

                      bool GetAllRegSet = false) {

  if (SizeInBits.isScalable()) {

    assert(RB.getID() == AArch64::FPRRegBankID &&

           "Expected FPR regbank for scalable type size");

    return &AArch64::ZPRRegClass;

  }


  unsigned RegBankID = RB.getID();


  if (RegBankID == AArch64::GPRRegBankID) {

    assert(!SizeInBits.isScalable() && "Unexpected scalable register size");

    if (SizeInBits <= 32)

      return GetAllRegSet ? &AArch64::GPR32allRegClass

                          : &AArch64::GPR32RegClass;

    if (SizeInBits == 64)

      return GetAllRegSet ? &AArch64::GPR64allRegClass

                          : &AArch64::GPR64RegClass;

    if (SizeInBits == 128)

      return &AArch64::XSeqPairsClassRegClass;

  }


  if (RegBankID == AArch64::FPRRegBankID) {

    if (SizeInBits.isScalable()) {

      assert(SizeInBits == TypeSize::getScalable(128) &&

             "Unexpected scalable register size");

      return &AArch64::ZPRRegClass;

    }


    switch (SizeInBits) {

    default:

      return nullptr;

    case 8:

      return &AArch64::FPR8RegClass;

    case 16:

      return &AArch64::FPR16RegClass;

    case 32:

      return &AArch64::FPR32RegClass;

    case 64:

      return &AArch64::FPR64RegClass;

    case 128:

      return &AArch64::FPR128RegClass;

    }

  }


  return nullptr;

}


/// Returns the correct subregister to use for a given register class.


static bool getSubRegForClass(const TargetRegisterClass *RC,

                              const TargetRegisterInfo &TRI, unsigned &SubReg) {

  switch (TRI.getRegSizeInBits(*RC)) {

  case 8:

    SubReg = AArch64::bsub;

    break;

  case 16:

    SubReg = AArch64::hsub;

    break;

  case 32:

    if (RC != &AArch64::FPR32RegClass)

      SubReg = AArch64::sub_32;

    else

      SubReg = AArch64::ssub;

    break;

  case 64:

    SubReg = AArch64::dsub;

    break;

  default:

    LLVM_DEBUG(

        dbgs() << "Couldn't find appropriate subregister for register class.");

    return false;

  }


  return true;

}


/// Returns the minimum size the given register bank can hold.


static unsigned getMinSizeForRegBank(const RegisterBank &RB) {

  switch (RB.getID()) {

  case AArch64::GPRRegBankID:

    return 32;

  case AArch64::FPRRegBankID:

    return 8;

  default:

    llvm_unreachable("Tried to get minimum size for unknown register bank.");

  }

}


/// Create a REG_SEQUENCE instruction using the registers in \p Regs.

/// Helper function for functions like createDTuple and createQTuple.

///

/// \p RegClassIDs - The list of register class IDs available for some tuple of

/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is

/// expected to contain between 2 and 4 tuple classes.

///

/// \p SubRegs - The list of subregister classes associated with each register

/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0

/// subregister class. The index of each subregister class is expected to

/// correspond with the index of each register class.

///

/// \returns Either the destination register of REG_SEQUENCE instruction that

/// was created, or the 0th element of \p Regs if \p Regs contains a single

/// element.


static Register createTuple(ArrayRef<Register> Regs,

                            const unsigned RegClassIDs[],

                            const unsigned SubRegs[], MachineIRBuilder &MIB) {

  unsigned NumRegs = Regs.size();

  if (NumRegs == 1)

    return Regs[0];

  assert(NumRegs >= 2 && NumRegs <= 4 &&

         "Only support between two and 4 registers in a tuple!");

  const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();

  auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);

  auto RegSequence =

      MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});

  for (unsigned I = 0, E = Regs.size(); I < E; ++I) {

    RegSequence.addUse(Regs[I]);

    RegSequence.addImm(SubRegs[I]);

  }

  return RegSequence.getReg(0);

}


/// Create a tuple of D-registers using the registers in \p Regs.


static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {

  static const unsigned RegClassIDs[] = {

      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};

  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,

                                     AArch64::dsub2, AArch64::dsub3};

  return createTuple(Regs, RegClassIDs, SubRegs, MIB);

}


/// Create a tuple of Q-registers using the registers in \p Regs.


static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {

  static const unsigned RegClassIDs[] = {

      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};

  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,

                                     AArch64::qsub2, AArch64::qsub3};

  return createTuple(Regs, RegClassIDs, SubRegs, MIB);

}


static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {

  auto &MI = *Root.getParent();

  auto &MBB = *MI.getParent();

  auto &MF = *MBB.getParent();

  auto &MRI = MF.getRegInfo();

  uint64_t Immed;

  if (Root.isImm())

    Immed = Root.getImm();

  else if (Root.isCImm())

    Immed = Root.getCImm()->getZExtValue();

  else if (Root.isReg()) {

    auto ValAndVReg =

        getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);

    if (!ValAndVReg)

      return std::nullopt;

    Immed = ValAndVReg->Value.getSExtValue();

  } else

    return std::nullopt;

  return Immed;

}


/// Check whether \p I is a currently unsupported binary operation:

/// - it has an unsized type

/// - an operand is not a vreg

/// - all operands are not in the same bank

/// These are checks that should someday live in the verifier, but right now,

/// these are mostly limitations of the aarch64 selector.


static bool unsupportedBinOp(const MachineInstr &I,

                             const AArch64RegisterBankInfo &RBI,

                             const MachineRegisterInfo &MRI,

                             const AArch64RegisterInfo &TRI) {

  LLT Ty = MRI.getType(I.getOperand(0).getReg());

  if (!Ty.isValid()) {

    LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");

    return true;

  }


  const RegisterBank *PrevOpBank = nullptr;

  for (auto &MO : I.operands()) {

    // FIXME: Support non-register operands.

    if (!MO.isReg()) {

      LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");

      return true;

    }


    // FIXME: Can generic operations have physical registers operands? If

    // so, this will need to be taught about that, and we'll need to get the

    // bank out of the minimal class for the register.

    // Either way, this needs to be documented (and possibly verified).

    if (!MO.getReg().isVirtual()) {

      LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");

      return true;

    }


    const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);

    if (!OpBank) {

      LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");

      return true;

    }


    if (PrevOpBank && OpBank != PrevOpBank) {

      LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");

      return true;

    }

    PrevOpBank = OpBank;

  }

  return false;

}


/// Select the AArch64 opcode for the basic binary operation \p GenericOpc

/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID

/// and of size \p OpSize.

/// \returns \p GenericOpc if the combination is unsupported.


static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,

                               unsigned OpSize) {

  switch (RegBankID) {

  case AArch64::GPRRegBankID:

    if (OpSize == 32) {

      switch (GenericOpc) {

      case TargetOpcode::G_SHL:

        return AArch64::LSLVWr;

      case TargetOpcode::G_LSHR:

        return AArch64::LSRVWr;

      case TargetOpcode::G_ASHR:

        return AArch64::ASRVWr;

      default:

        return GenericOpc;

      }

    } else if (OpSize == 64) {

      switch (GenericOpc) {

      case TargetOpcode::G_PTR_ADD:

        return AArch64::ADDXrr;

      case TargetOpcode::G_SHL:

        return AArch64::LSLVXr;

      case TargetOpcode::G_LSHR:

        return AArch64::LSRVXr;

      case TargetOpcode::G_ASHR:

        return AArch64::ASRVXr;

      default:

        return GenericOpc;

      }

    }

    break;

  case AArch64::FPRRegBankID:

    switch (OpSize) {

    case 32:

      switch (GenericOpc) {

      case TargetOpcode::G_FADD:

        return AArch64::FADDSrr;

      case TargetOpcode::G_FSUB:

        return AArch64::FSUBSrr;

      case TargetOpcode::G_FMUL:

        return AArch64::FMULSrr;

      case TargetOpcode::G_FDIV:

        return AArch64::FDIVSrr;

      default:

        return GenericOpc;

      }

    case 64:

      switch (GenericOpc) {

      case TargetOpcode::G_FADD:

        return AArch64::FADDDrr;

      case TargetOpcode::G_FSUB:

        return AArch64::FSUBDrr;

      case TargetOpcode::G_FMUL:

        return AArch64::FMULDrr;

      case TargetOpcode::G_FDIV:

        return AArch64::FDIVDrr;

      case TargetOpcode::G_OR:

        return AArch64::ORRv8i8;

      default:

        return GenericOpc;

      }

    }

    break;

  }

  return GenericOpc;

}


/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,

/// appropriate for the (value) register bank \p RegBankID and of memory access

/// size \p OpSize.  This returns the variant with the base+unsigned-immediate

/// addressing mode (e.g., LDRXui).

/// \returns \p GenericOpc if the combination is unsupported.


static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,

                                    unsigned OpSize) {

  const bool isStore = GenericOpc == TargetOpcode::G_STORE;

  switch (RegBankID) {

  case AArch64::GPRRegBankID:

    switch (OpSize) {

    case 8:

      return isStore ? AArch64::STRBBui : AArch64::LDRBBui;

    case 16:

      return isStore ? AArch64::STRHHui : AArch64::LDRHHui;

    case 32:

      return isStore ? AArch64::STRWui : AArch64::LDRWui;

    case 64:

      return isStore ? AArch64::STRXui : AArch64::LDRXui;

    }

    break;

  case AArch64::FPRRegBankID:

    switch (OpSize) {

    case 8:

      return isStore ? AArch64::STRBui : AArch64::LDRBui;

    case 16:

      return isStore ? AArch64::STRHui : AArch64::LDRHui;

    case 32:

      return isStore ? AArch64::STRSui : AArch64::LDRSui;

    case 64:

      return isStore ? AArch64::STRDui : AArch64::LDRDui;

    case 128:

      return isStore ? AArch64::STRQui : AArch64::LDRQui;

    }

    break;

  }

  return GenericOpc;

}


/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg

/// to \p *To.

///

/// E.g "To = COPY SrcReg:SubReg"


static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,

                       const RegisterBankInfo &RBI, Register SrcReg,

                       const TargetRegisterClass *To, unsigned SubReg) {

  assert(SrcReg.isValid() && "Expected a valid source register?");

  assert(To && "Destination register class cannot be null");

  assert(SubReg && "Expected a valid subregister");


  MachineIRBuilder MIB(I);

  auto SubRegCopy =

      MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);

  MachineOperand &RegOp = I.getOperand(1);

  RegOp.setReg(SubRegCopy.getReg(0));


  // It's possible that the destination register won't be constrained. Make

  // sure that happens.

  if (!I.getOperand(0).getReg().isPhysical())

    RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);


  return true;

}


/// Helper function to get the source and destination register classes for a

/// copy. Returns a std::pair containing the source register class for the

/// copy, and the destination register class for the copy. If a register class

/// cannot be determined, then it will be nullptr.

static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>


getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,

                     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,

                     const RegisterBankInfo &RBI) {

  Register DstReg = I.getOperand(0).getReg();

  Register SrcReg = I.getOperand(1).getReg();

  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);


  TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);

  TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);


  // Special casing for cross-bank copies of s1s. We can technically represent

  // a 1-bit value with any size of register. The minimum size for a GPR is 32

  // bits. So, we need to put the FPR on 32 bits as well.

  //

  // FIXME: I'm not sure if this case holds true outside of copies. If it does,

  // then we can pull it into the helpers that get the appropriate class for a

  // register bank. Or make a new helper that carries along some constraint

  // information.

  if (SrcRegBank != DstRegBank &&

      (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))

    SrcSize = DstSize = TypeSize::getFixed(32);


  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),

          getMinClassForRegBank(DstRegBank, DstSize, true)};

}


// FIXME: We need some sort of API in RBI/TRI to allow generic code to

// constrain operands of simple instructions given a TargetRegisterClass

// and LLT


static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,

                             const RegisterBankInfo &RBI) {

  for (MachineOperand &MO : I.operands()) {

    if (!MO.isReg())

      continue;

    Register Reg = MO.getReg();

    if (!Reg)

      continue;

    if (Reg.isPhysical())

      continue;

    LLT Ty = MRI.getType(Reg);

    const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);

    const TargetRegisterClass *RC =

        dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

    if (!RC) {

      const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);

      RC = getRegClassForTypeOnBank(Ty, RB);

      if (!RC) {

        LLVM_DEBUG(

            dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");

        break;

      }

    }

    RBI.constrainGenericRegister(Reg, *RC, MRI);

  }


  return true;

}


static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,

                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,

                       const RegisterBankInfo &RBI) {

  Register DstReg = I.getOperand(0).getReg();

  Register SrcReg = I.getOperand(1).getReg();

  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);

  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);


  // Find the correct register classes for the source and destination registers.

  const TargetRegisterClass *SrcRC;

  const TargetRegisterClass *DstRC;

  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);


  if (!DstRC) {

    LLVM_DEBUG(dbgs() << "Unexpected dest size "

                      << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');

    return false;

  }


  // Is this a copy? If so, then we may need to insert a subregister copy.

  if (I.isCopy()) {

    // Yes. Check if there's anything to fix up.

    if (!SrcRC) {

      LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");

      return false;

    }


    const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);

    const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);

    unsigned SubReg;


    // If the source bank doesn't support a subregister copy small enough,

    // then we first need to copy to the destination bank.

    if (getMinSizeForRegBank(SrcRegBank) > DstSize) {

      const TargetRegisterClass *DstTempRC =

          getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);

      getSubRegForClass(DstRC, TRI, SubReg);


      MachineIRBuilder MIB(I);

      auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});

      copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);

    } else if (SrcSize > DstSize) {

      // If the source register is bigger than the destination we need to

      // perform a subregister copy.

      const TargetRegisterClass *SubRegRC =

          getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);

      getSubRegForClass(SubRegRC, TRI, SubReg);

      copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);

    } else if (DstSize > SrcSize) {

      // If the destination register is bigger than the source we need to do

      // a promotion using SUBREG_TO_REG.

      const TargetRegisterClass *PromotionRC =

          getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);

      getSubRegForClass(SrcRC, TRI, SubReg);


      Register PromoteReg = MRI.createVirtualRegister(PromotionRC);

      BuildMI(*I.getParent(), I, I.getDebugLoc(),

              TII.get(AArch64::SUBREG_TO_REG), PromoteReg)

          .addImm(0)

          .addUse(SrcReg)

          .addImm(SubReg);

      MachineOperand &RegOp = I.getOperand(1);

      RegOp.setReg(PromoteReg);

    }


    // If the destination is a physical register, then there's nothing to

    // change, so we're done.

    if (DstReg.isPhysical())

      return true;

  }


  // No need to constrain SrcReg. It will get constrained when we hit another

  // of its use or its defs. Copies do not have constraints.

  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {

    LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())

                      << " operand\n");

    return false;

  }


  // If this a GPR ZEXT that we want to just reduce down into a copy.

  // The sizes will be mismatched with the source < 32b but that's ok.

  if (I.getOpcode() == TargetOpcode::G_ZEXT) {

    I.setDesc(TII.get(AArch64::COPY));

    assert(SrcRegBank.getID() == AArch64::GPRRegBankID);

    return selectCopy(I, TII, MRI, TRI, RBI);

  }


  I.setDesc(TII.get(AArch64::COPY));

  return true;

}


MachineInstr *

AArch64InstructionSelector::emitSelect(Register Dst, Register True,

                                       Register False, AArch64CC::CondCode CC,

                                       MachineIRBuilder &MIB) const {

  MachineRegisterInfo &MRI = *MIB.getMRI();

  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==

             RBI.getRegBank(True, MRI, TRI)->getID() &&

         "Expected both select operands to have the same regbank?");

  LLT Ty = MRI.getType(True);

  if (Ty.isVector())

    return nullptr;

  const unsigned Size = Ty.getSizeInBits();

  assert((Size == 32 || Size == 64) &&

         "Expected 32 bit or 64 bit select only?");

  const bool Is32Bit = Size == 32;

  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {

    unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;

    auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);

    constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);

    return &*FCSel;

  }


  // By default, we'll try and emit a CSEL.

  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;

  bool Optimized = false;

  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,

                                 &Optimized](Register &Reg, Register &OtherReg,

                                             bool Invert) {

    if (Optimized)

      return false;


    // Attempt to fold:

    //

    // %sub = G_SUB 0, %x

    // %select = G_SELECT cc, %reg, %sub

    //

    // Into:

    // %select = CSNEG %reg, %x, cc

    Register MatchReg;

    if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {

      Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;

      Reg = MatchReg;

      if (Invert) {

        CC = AArch64CC::getInvertedCondCode(CC);

        std::swap(Reg, OtherReg);

      }

      return true;

    }


    // Attempt to fold:

    //

    // %xor = G_XOR %x, -1

    // %select = G_SELECT cc, %reg, %xor

    //

    // Into:

    // %select = CSINV %reg, %x, cc

    if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {

      Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

      Reg = MatchReg;

      if (Invert) {

        CC = AArch64CC::getInvertedCondCode(CC);

        std::swap(Reg, OtherReg);

      }

      return true;

    }


    // Attempt to fold:

    //

    // %add = G_ADD %x, 1

    // %select = G_SELECT cc, %reg, %add

    //

    // Into:

    // %select = CSINC %reg, %x, cc

    if (mi_match(Reg, MRI,

                 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),

                          m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {

      Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

      Reg = MatchReg;

      if (Invert) {

        CC = AArch64CC::getInvertedCondCode(CC);

        std::swap(Reg, OtherReg);

      }

      return true;

    }


    return false;

  };


  // Helper lambda which tries to use CSINC/CSINV for the instruction when its

  // true/false values are constants.

  // FIXME: All of these patterns already exist in tablegen. We should be

  // able to import these.

  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,

                          &Optimized]() {

    if (Optimized)

      return false;

    auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);

    auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);

    if (!TrueCst && !FalseCst)

      return false;


    Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;

    if (TrueCst && FalseCst) {

      int64_t T = TrueCst->Value.getSExtValue();

      int64_t F = FalseCst->Value.getSExtValue();


      if (T == 0 && F == 1) {

        // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc

        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

        True = ZReg;

        False = ZReg;

        return true;

      }


      if (T == 0 && F == -1) {

        // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc

        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

        True = ZReg;

        False = ZReg;

        return true;

      }

    }


    if (TrueCst) {

      int64_t T = TrueCst->Value.getSExtValue();

      if (T == 1) {

        // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc

        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

        True = False;

        False = ZReg;

        CC = AArch64CC::getInvertedCondCode(CC);

        return true;

      }


      if (T == -1) {

        // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc

        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

        True = False;

        False = ZReg;

        CC = AArch64CC::getInvertedCondCode(CC);

        return true;

      }

    }


    if (FalseCst) {

      int64_t F = FalseCst->Value.getSExtValue();

      if (F == 1) {

        // G_SELECT cc, t, 1 -> CSINC t, zreg, cc

        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;

        False = ZReg;

        return true;

      }


      if (F == -1) {

        // G_SELECT cc, t, -1 -> CSINC t, zreg, cc

        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;

        False = ZReg;

        return true;

      }

    }

    return false;

  };


  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);

  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);

  Optimized |= TryOptSelectCst();

  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);

  constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);

  return &*SelectInst;

}


static AArch64CC::CondCode


changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS = {},

                          MachineRegisterInfo *MRI = nullptr) {

  switch (P) {

  default:

    llvm_unreachable("Unknown condition code!");

  case CmpInst::ICMP_NE:

    return AArch64CC::NE;

  case CmpInst::ICMP_EQ:

    return AArch64CC::EQ;

  case CmpInst::ICMP_SGT:

    return AArch64CC::GT;

  case CmpInst::ICMP_SGE:

    if (RHS && MRI) {

      auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);

      if (ValAndVReg && ValAndVReg->Value == 0)

        return AArch64CC::PL;

    }

    return AArch64CC::GE;

  case CmpInst::ICMP_SLT:

    if (RHS && MRI) {

      auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);

      if (ValAndVReg && ValAndVReg->Value == 0)

        return AArch64CC::MI;

    }

    return AArch64CC::LT;

  case CmpInst::ICMP_SLE:

    return AArch64CC::LE;

  case CmpInst::ICMP_UGT:

    return AArch64CC::HI;

  case CmpInst::ICMP_UGE:

    return AArch64CC::HS;

  case CmpInst::ICMP_ULT:

    return AArch64CC::LO;

  case CmpInst::ICMP_ULE:

    return AArch64CC::LS;

  }

}


/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.


static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,

                                    AArch64CC::CondCode &CondCode,

                                    AArch64CC::CondCode &CondCode2) {

  CondCode2 = AArch64CC::AL;

  switch (CC) {

  default:

    llvm_unreachable("Unknown FP condition!");

  case CmpInst::FCMP_OEQ:

    CondCode = AArch64CC::EQ;

    break;

  case CmpInst::FCMP_OGT:

    CondCode = AArch64CC::GT;

    break;

  case CmpInst::FCMP_OGE:

    CondCode = AArch64CC::GE;

    break;

  case CmpInst::FCMP_OLT:

    CondCode = AArch64CC::MI;

    break;

  case CmpInst::FCMP_OLE:

    CondCode = AArch64CC::LS;

    break;

  case CmpInst::FCMP_ONE:

    CondCode = AArch64CC::MI;

    CondCode2 = AArch64CC::GT;

    break;

  case CmpInst::FCMP_ORD:

    CondCode = AArch64CC::VC;

    break;

  case CmpInst::FCMP_UNO:

    CondCode = AArch64CC::VS;

    break;

  case CmpInst::FCMP_UEQ:

    CondCode = AArch64CC::EQ;

    CondCode2 = AArch64CC::VS;

    break;

  case CmpInst::FCMP_UGT:

    CondCode = AArch64CC::HI;

    break;

  case CmpInst::FCMP_UGE:

    CondCode = AArch64CC::PL;

    break;

  case CmpInst::FCMP_ULT:

    CondCode = AArch64CC::LT;

    break;

  case CmpInst::FCMP_ULE:

    CondCode = AArch64CC::LE;

    break;

  case CmpInst::FCMP_UNE:

    CondCode = AArch64CC::NE;

    break;

  }

}


/// Convert an IR fp condition code to an AArch64 CC.

/// This differs from changeFPCCToAArch64CC in that it returns cond codes that

/// should be AND'ed instead of OR'ed.


static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,

                                     AArch64CC::CondCode &CondCode,

                                     AArch64CC::CondCode &CondCode2) {

  CondCode2 = AArch64CC::AL;

  switch (CC) {

  default:

    changeFPCCToORAArch64CC(CC, CondCode, CondCode2);

    assert(CondCode2 == AArch64CC::AL);

    break;

  case CmpInst::FCMP_ONE:

    // (a one b)

    // == ((a olt b) || (a ogt b))

    // == ((a ord b) && (a une b))

    CondCode = AArch64CC::VC;

    CondCode2 = AArch64CC::NE;

    break;

  case CmpInst::FCMP_UEQ:

    // (a ueq b)

    // == ((a uno b) || (a oeq b))

    // == ((a ule b) && (a uge b))

    CondCode = AArch64CC::PL;

    CondCode2 = AArch64CC::LE;

    break;

  }

}


/// Return a register which can be used as a bit to test in a TB(N)Z.


static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,

                              MachineRegisterInfo &MRI) {

  assert(Reg.isValid() && "Expected valid register!");

  bool HasZext = false;

  while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {

    unsigned Opc = MI->getOpcode();


    if (!MI->getOperand(0).isReg() ||

        !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))

      break;


    // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.

    //

    // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number

    // on the truncated x is the same as the bit number on x.

    if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||

        Opc == TargetOpcode::G_TRUNC) {

      if (Opc == TargetOpcode::G_ZEXT)

        HasZext = true;


      Register NextReg = MI->getOperand(1).getReg();

      // Did we find something worth folding?

      if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))

        break;


      // NextReg is worth folding. Keep looking.

      Reg = NextReg;

      continue;

    }


    // Attempt to find a suitable operation with a constant on one side.

    std::optional<uint64_t> C;

    Register TestReg;

    switch (Opc) {

    default:

      break;

    case TargetOpcode::G_AND:

    case TargetOpcode::G_XOR: {

      TestReg = MI->getOperand(1).getReg();

      Register ConstantReg = MI->getOperand(2).getReg();

      auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

      if (!VRegAndVal) {

        // AND commutes, check the other side for a constant.

        // FIXME: Can we canonicalize the constant so that it's always on the

        // same side at some point earlier?

        std::swap(ConstantReg, TestReg);

        VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

      }

      if (VRegAndVal) {

        if (HasZext)

          C = VRegAndVal->Value.getZExtValue();

        else

          C = VRegAndVal->Value.getSExtValue();

      }

      break;

    }

    case TargetOpcode::G_ASHR:

    case TargetOpcode::G_LSHR:

    case TargetOpcode::G_SHL: {

      TestReg = MI->getOperand(1).getReg();

      auto VRegAndVal =

          getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);

      if (VRegAndVal)

        C = VRegAndVal->Value.getSExtValue();

      break;

    }

    }


    // Didn't find a constant or viable register. Bail out of the loop.

    if (!C || !TestReg.isValid())

      break;


    // We found a suitable instruction with a constant. Check to see if we can

    // walk through the instruction.

    Register NextReg;

    unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();

    switch (Opc) {

    default:

      break;

    case TargetOpcode::G_AND:

      // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.

      if ((*C >> Bit) & 1)

        NextReg = TestReg;

      break;

    case TargetOpcode::G_SHL:

      // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in

      // the type of the register.

      if (*C <= Bit && (Bit - *C) < TestRegSize) {

        NextReg = TestReg;

        Bit = Bit - *C;

      }

      break;

    case TargetOpcode::G_ASHR:

      // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits

      // in x

      NextReg = TestReg;

      Bit = Bit + *C;

      if (Bit >= TestRegSize)

        Bit = TestRegSize - 1;

      break;

    case TargetOpcode::G_LSHR:

      // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x

      if ((Bit + *C) < TestRegSize) {

        NextReg = TestReg;

        Bit = Bit + *C;

      }

      break;

    case TargetOpcode::G_XOR:

      // We can walk through a G_XOR by inverting whether we use tbz/tbnz when

      // appropriate.

      //

      // e.g. If x' = xor x, c, and the b-th bit is set in c then

      //

      // tbz x', b -> tbnz x, b

      //

      // Because x' only has the b-th bit set if x does not.

      if ((*C >> Bit) & 1)

        Invert = !Invert;

      NextReg = TestReg;

      break;

    }


    // Check if we found anything worth folding.

    if (!NextReg.isValid())

      return Reg;

    Reg = NextReg;

  }


  return Reg;

}


MachineInstr *AArch64InstructionSelector::emitTestBit(

    Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,

    MachineIRBuilder &MIB) const {

  assert(TestReg.isValid());

  assert(ProduceNonFlagSettingCondBr &&

         "Cannot emit TB(N)Z with speculation tracking!");

  MachineRegisterInfo &MRI = *MIB.getMRI();


  // Attempt to optimize the test bit by walking over instructions.

  TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);

  LLT Ty = MRI.getType(TestReg);

  unsigned Size = Ty.getSizeInBits();

  assert(!Ty.isVector() && "Expected a scalar!");

  assert(Bit < 64 && "Bit is too large!");


  // When the test register is a 64-bit register, we have to narrow to make

  // TBNZW work.

  bool UseWReg = Bit < 32;

  unsigned NecessarySize = UseWReg ? 32 : 64;

  if (Size != NecessarySize)

    TestReg = moveScalarRegClass(

        TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,

        MIB);


  static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},

                                          {AArch64::TBZW, AArch64::TBNZW}};

  unsigned Opc = OpcTable[UseWReg][IsNegative];

  auto TestBitMI =

      MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);

  constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);

  return &*TestBitMI;

}


bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(

    MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,

    MachineIRBuilder &MIB) const {

  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");

  // Given something like this:

  //

  //  %x = ...Something...

  //  %one = G_CONSTANT i64 1

  //  %zero = G_CONSTANT i64 0

  //  %and = G_AND %x, %one

  //  %cmp = G_ICMP intpred(ne), %and, %zero

  //  %cmp_trunc = G_TRUNC %cmp

  //  G_BRCOND %cmp_trunc, %bb.3

  //

  // We want to try and fold the AND into the G_BRCOND and produce either a

  // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).

  //

  // In this case, we'd get

  //

  // TBNZ %x %bb.3

  //


  // Check if the AND has a constant on its RHS which we can use as a mask.

  // If it's a power of 2, then it's the same as checking a specific bit.

  // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)

  auto MaybeBit = getIConstantVRegValWithLookThrough(

      AndInst.getOperand(2).getReg(), *MIB.getMRI());

  if (!MaybeBit)

    return false;


  int32_t Bit = MaybeBit->Value.exactLogBase2();

  if (Bit < 0)

    return false;


  Register TestReg = AndInst.getOperand(1).getReg();


  // Emit a TB(N)Z.

  emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);

  return true;

}


MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,

                                                  bool IsNegative,

                                                  MachineBasicBlock *DestMBB,

                                                  MachineIRBuilder &MIB) const {

  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");

  MachineRegisterInfo &MRI = *MIB.getMRI();

  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==

             AArch64::GPRRegBankID &&

         "Expected GPRs only?");

  auto Ty = MRI.getType(CompareReg);

  unsigned Width = Ty.getSizeInBits();

  assert(!Ty.isVector() && "Expected scalar only?");

  assert(Width <= 64 && "Expected width to be at most 64?");

  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},

                                          {AArch64::CBNZW, AArch64::CBNZX}};

  unsigned Opc = OpcTable[IsNegative][Width == 64];

  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);

  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);

  return &*BranchMI;

}


bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(

    MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {

  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);

  assert(I.getOpcode() == TargetOpcode::G_BRCOND);

  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't

  // totally clean.  Some of them require two branches to implement.

  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();

  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,

                Pred);

  AArch64CC::CondCode CC1, CC2;

  changeFCMPPredToAArch64CC(Pred, CC1, CC2);

  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();

  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);

  if (CC2 != AArch64CC::AL)

    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(

    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {

  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);

  assert(I.getOpcode() == TargetOpcode::G_BRCOND);

  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.

  //

  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z

  // instructions will not be produced, as they are conditional branch

  // instructions that do not set flags.

  if (!ProduceNonFlagSettingCondBr)

    return false;


  MachineRegisterInfo &MRI = *MIB.getMRI();

  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();

  auto Pred =

      static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());

  Register LHS = ICmp.getOperand(2).getReg();

  Register RHS = ICmp.getOperand(3).getReg();


  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.

  auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);

  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);


  // When we can emit a TB(N)Z, prefer that.

  //

  // Handle non-commutative condition codes first.

  // Note that we don't want to do this when we have a G_AND because it can

  // become a tst. The tst will make the test bit in the TB(N)Z redundant.

  if (VRegAndVal && !AndInst) {

    int64_t C = VRegAndVal->Value.getSExtValue();


    // When we have a greater-than comparison, we can just test if the msb is

    // zero.

    if (C == -1 && Pred == CmpInst::ICMP_SGT) {

      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;

      emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);

      I.eraseFromParent();

      return true;

    }


    // When we have a less than comparison, we can just test if the msb is not

    // zero.

    if (C == 0 && Pred == CmpInst::ICMP_SLT) {

      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;

      emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);

      I.eraseFromParent();

      return true;

    }


    // Inversely, if we have a signed greater-than-or-equal comparison to zero,

    // we can test if the msb is zero.

    if (C == 0 && Pred == CmpInst::ICMP_SGE) {

      uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;

      emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);

      I.eraseFromParent();

      return true;

    }

  }


  // Attempt to handle commutative condition codes. Right now, that's only

  // eq/ne.

  if (ICmpInst::isEquality(Pred)) {

    if (!VRegAndVal) {

      std::swap(RHS, LHS);

      VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);

      AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);

    }


    if (VRegAndVal && VRegAndVal->Value == 0) {

      // If there's a G_AND feeding into this branch, try to fold it away by

      // emitting a TB(N)Z instead.

      //

      // Note: If we have LT, then it *is* possible to fold, but it wouldn't be

      // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding

      // would be redundant.

      if (AndInst &&

          tryOptAndIntoCompareBranch(

              *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {

        I.eraseFromParent();

        return true;

      }


      // Otherwise, try to emit a CB(N)Z instead.

      auto LHSTy = MRI.getType(LHS);

      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {

        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);

        I.eraseFromParent();

        return true;

      }

    }

  }


  return false;

}


bool AArch64InstructionSelector::selectCompareBranchFedByICmp(

    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {

  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);

  assert(I.getOpcode() == TargetOpcode::G_BRCOND);

  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))

    return true;


  // Couldn't optimize. Emit a compare + a Bcc.

  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();

  auto &PredOp = ICmp.getOperand(1);

  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);

  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(

      static_cast<CmpInst::Predicate>(PredOp.getPredicate()),

      ICmp.getOperand(3).getReg(), MIB.getMRI());

  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectCompareBranch(

    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {

  Register CondReg = I.getOperand(0).getReg();

  MachineInstr *CCMI = MRI.getVRegDef(CondReg);

  // Try to select the G_BRCOND using whatever is feeding the condition if

  // possible.

  unsigned CCMIOpc = CCMI->getOpcode();

  if (CCMIOpc == TargetOpcode::G_FCMP)

    return selectCompareBranchFedByFCmp(I, *CCMI, MIB);

  if (CCMIOpc == TargetOpcode::G_ICMP)

    return selectCompareBranchFedByICmp(I, *CCMI, MIB);


  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z

  // instructions will not be produced, as they are conditional branch

  // instructions that do not set flags.

  if (ProduceNonFlagSettingCondBr) {

    emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,

                I.getOperand(1).getMBB(), MIB);

    I.eraseFromParent();

    return true;

  }


  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.

  auto TstMI =

      MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);

  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);

  auto Bcc = MIB.buildInstr(AArch64::Bcc)

                 .addImm(AArch64CC::NE)

                 .addMBB(I.getOperand(1).getMBB());

  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);

}


/// Returns the element immediate value of a vector shift operand if found.

/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.


static std::optional<int64_t> getVectorShiftImm(Register Reg,

                                                MachineRegisterInfo &MRI) {

  assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");

  MachineInstr *OpMI = MRI.getVRegDef(Reg);

  return getAArch64VectorSplatScalar(*OpMI, MRI);

}


/// Matches and returns the shift immediate value for a SHL instruction given

/// a shift operand.


static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,

                                              MachineRegisterInfo &MRI) {

  std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);

  if (!ShiftImm)

    return std::nullopt;

  // Check the immediate is in range for a SHL.

  int64_t Imm = *ShiftImm;

  if (Imm < 0)

    return std::nullopt;

  switch (SrcTy.getElementType().getSizeInBits()) {

  default:

    LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");

    return std::nullopt;

  case 8:

    if (Imm > 7)

      return std::nullopt;

    break;

  case 16:

    if (Imm > 15)

      return std::nullopt;

    break;

  case 32:

    if (Imm > 31)

      return std::nullopt;

    break;

  case 64:

    if (Imm > 63)

      return std::nullopt;

    break;

  }

  return Imm;

}


bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,

                                                 MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_SHL);

  Register DstReg = I.getOperand(0).getReg();

  const LLT Ty = MRI.getType(DstReg);

  Register Src1Reg = I.getOperand(1).getReg();

  Register Src2Reg = I.getOperand(2).getReg();


  if (!Ty.isVector())

    return false;


  // Check if we have a vector of constants on RHS that we can select as the

  // immediate form.

  std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);


  unsigned Opc = 0;

  if (Ty == LLT::fixed_vector(2, 64)) {

    Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;

  } else if (Ty == LLT::fixed_vector(4, 32)) {

    Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;

  } else if (Ty == LLT::fixed_vector(2, 32)) {

    Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;

  } else if (Ty == LLT::fixed_vector(4, 16)) {

    Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;

  } else if (Ty == LLT::fixed_vector(8, 16)) {

    Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;

  } else if (Ty == LLT::fixed_vector(16, 8)) {

    Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;

  } else if (Ty == LLT::fixed_vector(8, 8)) {

    Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;

  } else {

    LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");

    return false;

  }


  auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});

  if (ImmVal)

    Shl.addImm(*ImmVal);

  else

    Shl.addUse(Src2Reg);

  constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVectorAshrLshr(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_ASHR ||

         I.getOpcode() == TargetOpcode::G_LSHR);

  Register DstReg = I.getOperand(0).getReg();

  const LLT Ty = MRI.getType(DstReg);

  Register Src1Reg = I.getOperand(1).getReg();

  Register Src2Reg = I.getOperand(2).getReg();


  if (!Ty.isVector())

    return false;


  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;


  // We expect the immediate case to be lowered in the PostLegalCombiner to

  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.


  // There is not a shift right register instruction, but the shift left

  // register instruction takes a signed value, where negative numbers specify a

  // right shift.


  unsigned Opc = 0;

  unsigned NegOpc = 0;

  const TargetRegisterClass *RC =

      getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));

  if (Ty == LLT::fixed_vector(2, 64)) {

    Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;

    NegOpc = AArch64::NEGv2i64;

  } else if (Ty == LLT::fixed_vector(4, 32)) {

    Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;

    NegOpc = AArch64::NEGv4i32;

  } else if (Ty == LLT::fixed_vector(2, 32)) {

    Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;

    NegOpc = AArch64::NEGv2i32;

  } else if (Ty == LLT::fixed_vector(4, 16)) {

    Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;

    NegOpc = AArch64::NEGv4i16;

  } else if (Ty == LLT::fixed_vector(8, 16)) {

    Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;

    NegOpc = AArch64::NEGv8i16;

  } else if (Ty == LLT::fixed_vector(16, 8)) {

    Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;

    NegOpc = AArch64::NEGv16i8;

  } else if (Ty == LLT::fixed_vector(8, 8)) {

    Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;

    NegOpc = AArch64::NEGv8i8;

  } else {

    LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");

    return false;

  }


  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});

  constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);

  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});

  constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVaStartAAPCS(

    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {


  if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),

                             MF.getFunction().isVarArg()))

    return false;


  // The layout of the va_list struct is specified in the AArch64 Procedure Call

  // Standard, section 10.1.5.


  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

  const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;

  const auto *PtrRegClass =

      STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;


  const MCInstrDesc &MCIDAddAddr =

      TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);

  const MCInstrDesc &MCIDStoreAddr =

      TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);


  /*

   * typedef struct va_list {

   *  void * stack; // next stack param

   *  void * gr_top; // end of GP arg reg save area

   *  void * vr_top; // end of FP/SIMD arg reg save area

   *  int gr_offs; // offset from gr_top to next GP register arg

   *  int vr_offs; // offset from vr_top to next FP/SIMD register arg

   * } va_list;

   */

  const auto VAList = I.getOperand(0).getReg();


  // Our current offset in bytes from the va_list struct (VAList).

  unsigned OffsetBytes = 0;


  // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes

  // and increment OffsetBytes by PtrSize.

  const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {

    const Register Top = MRI.createVirtualRegister(PtrRegClass);

    auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)

                   .addDef(Top)

                   .addFrameIndex(FrameIndex)

                   .addImm(Imm)

                   .addImm(0);

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


    const auto *MMO = *I.memoperands_begin();

    MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)

              .addUse(Top)

              .addUse(VAList)

              .addImm(OffsetBytes / PtrSize)

              .addMemOperand(MF.getMachineMemOperand(

                  MMO->getPointerInfo().getWithOffset(OffsetBytes),

                  MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


    OffsetBytes += PtrSize;

  };


  // void* stack at offset 0

  PushAddress(FuncInfo->getVarArgsStackIndex(), 0);


  // void* gr_top at offset 8 (4 on ILP32)

  const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();

  PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);


  // void* vr_top at offset 16 (8 on ILP32)

  const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();

  PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);


  // Helper function to store a 4-byte integer constant to VAList at offset

  // OffsetBytes, and increment OffsetBytes by 4.

  const auto PushIntConstant = [&](const int32_t Value) {

    constexpr int IntSize = 4;

    const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

    auto MIB =

        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))

            .addDef(Temp)

            .addImm(Value);

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


    const auto *MMO = *I.memoperands_begin();

    MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))

              .addUse(Temp)

              .addUse(VAList)

              .addImm(OffsetBytes / IntSize)

              .addMemOperand(MF.getMachineMemOperand(

                  MMO->getPointerInfo().getWithOffset(OffsetBytes),

                  MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));

    constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);

    OffsetBytes += IntSize;

  };


  // int gr_offs at offset 24 (12 on ILP32)

  PushIntConstant(-static_cast<int32_t>(GPRSize));


  // int vr_offs at offset 28 (16 on ILP32)

  PushIntConstant(-static_cast<int32_t>(FPRSize));


  assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVaStartDarwin(

    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {

  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

  Register ListReg = I.getOperand(0).getReg();


  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);


  int FrameIdx = FuncInfo->getVarArgsStackIndex();

  if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(

          MF.getFunction().getCallingConv(), MF.getFunction().isVarArg())) {

    FrameIdx = FuncInfo->getVarArgsGPRSize() > 0

                   ? FuncInfo->getVarArgsGPRIndex()

                   : FuncInfo->getVarArgsStackIndex();

  }


  auto MIB =

      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))

          .addDef(ArgsAddrReg)

          .addFrameIndex(FrameIdx)

          .addImm(0)

          .addImm(0);


  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);


  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))

            .addUse(ArgsAddrReg)

            .addUse(ListReg)

            .addImm(0)

            .addMemOperand(*I.memoperands_begin());


  constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


void AArch64InstructionSelector::materializeLargeCMVal(

    MachineInstr &I, const Value *V, unsigned OpFlags) {

  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});

  MovZ->addOperand(MF, I.getOperand(1));

  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |

                                     AArch64II::MO_NC);

  MovZ->addOperand(MF, MachineOperand::CreateImm(0));

  constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);


  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,

                       Register ForceDstReg) {

    Register DstReg = ForceDstReg

                          ? ForceDstReg

                          : MRI.createVirtualRegister(&AArch64::GPR64RegClass);

    auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);

    if (auto *GV = dyn_cast<GlobalValue>(V)) {

      MovI->addOperand(MF, MachineOperand::CreateGA(

                               GV, MovZ->getOperand(1).getOffset(), Flags));

    } else {

      MovI->addOperand(

          MF, MachineOperand::CreateBA(cast<BlockAddress>(V),

                                       MovZ->getOperand(1).getOffset(), Flags));

    }

    MovI->addOperand(MF, MachineOperand::CreateImm(Offset));

    constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);

    return DstReg;

  };

  Register DstReg = BuildMovK(MovZ.getReg(0),

                              AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);

  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);

  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());

}


bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {

  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  switch (I.getOpcode()) {

  case TargetOpcode::G_STORE: {

    bool Changed = contractCrossBankCopyIntoStore(I, MRI);

    MachineOperand &SrcOp = I.getOperand(0);

    if (MRI.getType(SrcOp.getReg()).isPointer()) {

      // Allow matching with imported patterns for stores of pointers. Unlike

      // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy

      // and constrain.

      auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);

      Register NewSrc = Copy.getReg(0);

      SrcOp.setReg(NewSrc);

      RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);

      Changed = true;

    }

    return Changed;

  }

  case TargetOpcode::G_PTR_ADD: {

    // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer

    // arithmetic semantics instead of falling back to regular arithmetic.

    const auto &TL = STI.getTargetLowering();

    if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))

      return false;

    return convertPtrAddToAdd(I, MRI);

  }

  case TargetOpcode::G_LOAD: {

    // For scalar loads of pointers, we try to convert the dest type from p0

    // to s64 so that our imported patterns can match. Like with the G_PTR_ADD

    // conversion, this should be ok because all users should have been

    // selected already, so the type doesn't matter for them.

    Register DstReg = I.getOperand(0).getReg();

    const LLT DstTy = MRI.getType(DstReg);

    if (!DstTy.isPointer())

      return false;

    MRI.setType(DstReg, LLT::scalar(64));

    return true;

  }

  case AArch64::G_DUP: {

    // Convert the type from p0 to s64 to help selection.

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    if (!DstTy.isPointerVector())

      return false;

    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());

    MRI.setType(I.getOperand(0).getReg(),

                DstTy.changeElementType(LLT::scalar(64)));

    MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);

    I.getOperand(1).setReg(NewSrc.getReg(0));

    return true;

  }

  case AArch64::G_INSERT_VECTOR_ELT: {

    // Convert the type from p0 to s64 to help selection.

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());

    if (!SrcVecTy.isPointerVector())

      return false;

    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());

    MRI.setType(I.getOperand(1).getReg(),

                DstTy.changeElementType(LLT::scalar(64)));

    MRI.setType(I.getOperand(0).getReg(),

                DstTy.changeElementType(LLT::scalar(64)));

    MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);

    I.getOperand(2).setReg(NewSrc.getReg(0));

    return true;

  }

  case TargetOpcode::G_UITOFP:

  case TargetOpcode::G_SITOFP: {

    // If both source and destination regbanks are FPR, then convert the opcode

    // to G_SITOF so that the importer can select it to an fpr variant.

    // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank

    // copy.

    Register SrcReg = I.getOperand(1).getReg();

    LLT SrcTy = MRI.getType(SrcReg);

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())

      return false;


    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {

      if (I.getOpcode() == TargetOpcode::G_SITOFP)

        I.setDesc(TII.get(AArch64::G_SITOF));

      else

        I.setDesc(TII.get(AArch64::G_UITOF));

      return true;

    }

    return false;

  }

  default:

    return false;

  }

}


/// This lowering tries to look for G_PTR_ADD instructions and then converts

/// them to a standard G_ADD with a COPY on the source.

///

/// The motivation behind this is to expose the add semantics to the imported

/// tablegen patterns. We shouldn't need to check for uses being loads/stores,

/// because the selector works bottom up, uses before defs. By the time we

/// end up trying to select a G_PTR_ADD, we should have already attempted to

/// fold this into addressing modes and were therefore unsuccessful.

bool AArch64InstructionSelector::convertPtrAddToAdd(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");

  Register DstReg = I.getOperand(0).getReg();

  Register AddOp1Reg = I.getOperand(1).getReg();

  const LLT PtrTy = MRI.getType(DstReg);

  if (PtrTy.getAddressSpace() != 0)

    return false;


  const LLT CastPtrTy =

      PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);

  auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);

  // Set regbanks on the registers.

  if (PtrTy.isVector())

    MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));

  else

    MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));


  // Now turn the %dst(p0) = G_PTR_ADD %base, off into:

  // %dst(intty) = G_ADD %intbase, off

  I.setDesc(TII.get(TargetOpcode::G_ADD));

  MRI.setType(DstReg, CastPtrTy);

  I.getOperand(1).setReg(PtrToInt.getReg(0));

  if (!select(*PtrToInt)) {

    LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");

    return false;

  }


  // Also take the opportunity here to try to do some optimization.

  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.

  Register NegatedReg;

  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))

    return true;

  I.getOperand(2).setReg(NegatedReg);

  I.setDesc(TII.get(TargetOpcode::G_SUB));

  return true;

}


bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,

                                                MachineRegisterInfo &MRI) {

  // We try to match the immediate variant of LSL, which is actually an alias

  // for a special case of UBFM. Otherwise, we fall back to the imported

  // selector which will match the register variant.

  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");

  const auto &MO = I.getOperand(2);

  auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);

  if (!VRegAndVal)

    return false;


  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  if (DstTy.isVector())

    return false;

  bool Is64Bit = DstTy.getSizeInBits() == 64;

  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);

  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);


  if (!Imm1Fn || !Imm2Fn)

    return false;


  auto NewI =

      MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,

                     {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});


  for (auto &RenderFn : *Imm1Fn)

    RenderFn(NewI);

  for (auto &RenderFn : *Imm2Fn)

    RenderFn(NewI);


  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);

}


bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");

  // If we're storing a scalar, it doesn't matter what register bank that

  // scalar is on. All that matters is the size.

  //

  // So, if we see something like this (with a 32-bit scalar as an example):

  //

  // %x:gpr(s32) = ... something ...

  // %y:fpr(s32) = COPY %x:gpr(s32)

  // G_STORE %y:fpr(s32)

  //

  // We can fix this up into something like this:

  //

  // G_STORE %x:gpr(s32)

  //

  // And then continue the selection process normally.

  Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);

  if (!DefDstReg.isValid())

    return false;

  LLT DefDstTy = MRI.getType(DefDstReg);

  Register StoreSrcReg = I.getOperand(0).getReg();

  LLT StoreSrcTy = MRI.getType(StoreSrcReg);


  // If we get something strange like a physical register, then we shouldn't

  // go any further.

  if (!DefDstTy.isValid())

    return false;


  // Are the source and dst types the same size?

  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())

    return false;


  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==

      RBI.getRegBank(DefDstReg, MRI, TRI))

    return false;


  // We have a cross-bank copy, which is entering a store. Let's fold it.

  I.getOperand(0).setReg(DefDstReg);

  return true;

}


bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {

  assert(I.getParent() && "Instruction should be in a basic block!");

  assert(I.getParent()->getParent() && "Instruction should be in a function!");


  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  switch (I.getOpcode()) {

  case AArch64::G_DUP: {

    // Before selecting a DUP instruction, check if it is better selected as a

    // MOV or load from a constant pool.

    Register Src = I.getOperand(1).getReg();

    auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);

    if (!ValAndVReg)

      return false;

    LLVMContext &Ctx = MF.getFunction().getContext();

    Register Dst = I.getOperand(0).getReg();

    auto *CV = ConstantDataVector::getSplat(

        MRI.getType(Dst).getNumElements(),

        ConstantInt::get(

            Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),

            ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));

    if (!emitConstantVector(Dst, CV, MIB, MRI))

      return false;

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_SEXT:

    // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV

    // over a normal extend.

    if (selectUSMovFromExtend(I, MRI))

      return true;

    return false;

  case TargetOpcode::G_BR:

    return false;

  case TargetOpcode::G_SHL:

    return earlySelectSHL(I, MRI);

  case TargetOpcode::G_CONSTANT: {

    bool IsZero = false;

    if (I.getOperand(1).isCImm())

      IsZero = I.getOperand(1).getCImm()->isZero();

    else if (I.getOperand(1).isImm())

      IsZero = I.getOperand(1).getImm() == 0;


    if (!IsZero)

      return false;


    Register DefReg = I.getOperand(0).getReg();

    LLT Ty = MRI.getType(DefReg);

    if (Ty.getSizeInBits() == 64) {

      I.getOperand(1).ChangeToRegister(AArch64::XZR, false);

      RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);

    } else if (Ty.getSizeInBits() == 32) {

      I.getOperand(1).ChangeToRegister(AArch64::WZR, false);

      RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);

    } else

      return false;


    I.setDesc(TII.get(TargetOpcode::COPY));

    return true;

  }


  case TargetOpcode::G_ADD: {

    // Check if this is being fed by a G_ICMP on either side.

    //

    // (cmp pred, x, y) + z

    //

    // In the above case, when the cmp is true, we increment z by 1. So, we can

    // fold the add into the cset for the cmp by using cinc.

    //

    // FIXME: This would probably be a lot nicer in PostLegalizerLowering.

    Register AddDst = I.getOperand(0).getReg();

    Register AddLHS = I.getOperand(1).getReg();

    Register AddRHS = I.getOperand(2).getReg();

    // Only handle scalars.

    LLT Ty = MRI.getType(AddLHS);

    if (Ty.isVector())

      return false;

    // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64

    // bits.

    unsigned Size = Ty.getSizeInBits();

    if (Size != 32 && Size != 64)

      return false;

    auto MatchCmp = [&](Register Reg) -> MachineInstr * {

      if (!MRI.hasOneNonDBGUse(Reg))

        return nullptr;

      // If the LHS of the add is 32 bits, then we want to fold a 32-bit

      // compare.

      if (Size == 32)

        return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);

      // We model scalar compares using 32-bit destinations right now.

      // If it's a 64-bit compare, it'll have 64-bit sources.

      Register ZExt;

      if (!mi_match(Reg, MRI,

                    m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))

        return nullptr;

      auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);

      if (!Cmp ||

          MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)

        return nullptr;

      return Cmp;

    };

    // Try to match

    // z + (cmp pred, x, y)

    MachineInstr *Cmp = MatchCmp(AddRHS);

    if (!Cmp) {

      // (cmp pred, x, y) + z

      std::swap(AddLHS, AddRHS);

      Cmp = MatchCmp(AddRHS);

      if (!Cmp)

        return false;

    }

    auto &PredOp = Cmp->getOperand(1);

    MIB.setInstrAndDebugLoc(I);

    emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),

                       /*RHS=*/Cmp->getOperand(3), PredOp, MIB);

    auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());

    const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(

        CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);

    emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_OR: {

    // Look for operations that take the lower `Width=Size-ShiftImm` bits of

    // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via

    // shifting and masking that we can replace with a BFI (encoded as a BFM).

    Register Dst = I.getOperand(0).getReg();

    LLT Ty = MRI.getType(Dst);


    if (!Ty.isScalar())

      return false;


    unsigned Size = Ty.getSizeInBits();

    if (Size != 32 && Size != 64)

      return false;


    Register ShiftSrc;

    int64_t ShiftImm;

    Register MaskSrc;

    int64_t MaskImm;

    if (!mi_match(

            Dst, MRI,

            m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),

                  m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))

      return false;


    if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))

      return false;


    int64_t Immr = Size - ShiftImm;

    int64_t Imms = Size - ShiftImm - 1;

    unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;

    emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_FENCE: {

    if (I.getOperand(1).getImm() == 0)

      BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));

    else

      BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))

          .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);

    I.eraseFromParent();

    return true;

  }

  default:

    return false;

  }

}


bool AArch64InstructionSelector::select(MachineInstr &I) {

  assert(I.getParent() && "Instruction should be in a basic block!");

  assert(I.getParent()->getParent() && "Instruction should be in a function!");


  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();

  if (Subtarget->requiresStrictAlign()) {

    // We don't support this feature yet.

    LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");

    return false;

  }


  MIB.setInstrAndDebugLoc(I);


  unsigned Opcode = I.getOpcode();

  // G_PHI requires same handling as PHI

  if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {

    // Certain non-generic instructions also need some special handling.


    if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);


    if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {

      const Register DefReg = I.getOperand(0).getReg();

      const LLT DefTy = MRI.getType(DefReg);


      const RegClassOrRegBank &RegClassOrBank =

        MRI.getRegClassOrRegBank(DefReg);


      const TargetRegisterClass *DefRC =

          dyn_cast<const TargetRegisterClass *>(RegClassOrBank);

      if (!DefRC) {

        if (!DefTy.isValid()) {

          LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");

          return false;

        }

        const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);

        DefRC = getRegClassForTypeOnBank(DefTy, RB);

        if (!DefRC) {

          LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");

          return false;

        }

      }


      I.setDesc(TII.get(TargetOpcode::PHI));


      return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);

    }


    if (I.isCopy())

      return selectCopy(I, TII, MRI, TRI, RBI);


    if (I.isDebugInstr())

      return selectDebugInstr(I, MRI, RBI);


    return true;

  }


  if (I.getNumOperands() != I.getNumExplicitOperands()) {

    LLVM_DEBUG(

        dbgs() << "Generic instruction has unexpected implicit operands\n");

    return false;

  }


  // Try to do some lowering before we start instruction selecting. These

  // lowerings are purely transformations on the input G_MIR and so selection

  // must continue after any modification of the instruction.

  if (preISelLower(I)) {

    Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.

  }


  // There may be patterns where the importer can't deal with them optimally,

  // but does select it to a suboptimal sequence so our custom C++ selection

  // code later never has a chance to work on it. Therefore, we have an early

  // selection attempt here to give priority to certain selection routines

  // over the imported ones.

  if (earlySelect(I))

    return true;


  if (selectImpl(I, *CoverageInfo))

    return true;


  LLT Ty =

      I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};


  switch (Opcode) {

  case TargetOpcode::G_SBFX:

  case TargetOpcode::G_UBFX: {

    static const unsigned OpcTable[2][2] = {

        {AArch64::UBFMWri, AArch64::UBFMXri},

        {AArch64::SBFMWri, AArch64::SBFMXri}};

    bool IsSigned = Opcode == TargetOpcode::G_SBFX;

    unsigned Size = Ty.getSizeInBits();

    unsigned Opc = OpcTable[IsSigned][Size == 64];

    auto Cst1 =

        getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);

    assert(Cst1 && "Should have gotten a constant for src 1?");

    auto Cst2 =

        getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);

    assert(Cst2 && "Should have gotten a constant for src 2?");

    auto LSB = Cst1->Value.getZExtValue();

    auto Width = Cst2->Value.getZExtValue();

    auto BitfieldInst =

        MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})

            .addImm(LSB)

            .addImm(LSB + Width - 1);

    I.eraseFromParent();

    return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);

  }

  case TargetOpcode::G_BRCOND:

    return selectCompareBranch(I, MF, MRI);


  case TargetOpcode::G_BRINDIRECT: {

    const Function &Fn = MF.getFunction();

    if (std::optional<uint16_t> BADisc =

            STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {

      auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});

      MI.addImm(AArch64PACKey::IA);

      MI.addImm(*BADisc);

      MI.addReg(/*AddrDisc=*/AArch64::XZR);

      I.eraseFromParent();

      return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);

    }

    I.setDesc(TII.get(AArch64::BR));

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_BRJT:

    return selectBrJT(I, MRI);


  case AArch64::G_ADD_LOW: {

    // This op may have been separated from it's ADRP companion by the localizer

    // or some other code motion pass. Given that many CPUs will try to

    // macro fuse these operations anyway, select this into a MOVaddr pseudo

    // which will later be expanded into an ADRP+ADD pair after scheduling.

    MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());

    if (BaseMI->getOpcode() != AArch64::ADRP) {

      I.setDesc(TII.get(AArch64::ADDXri));

      I.addOperand(MachineOperand::CreateImm(0));

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    }

    assert(TM.getCodeModel() == CodeModel::Small &&

           "Expected small code model");

    auto Op1 = BaseMI->getOperand(1);

    auto Op2 = I.getOperand(2);

    auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})

                       .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),

                                         Op1.getTargetFlags())

                       .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),

                                         Op2.getTargetFlags());

    I.eraseFromParent();

    return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);

  }


  case TargetOpcode::G_FCONSTANT:

  case TargetOpcode::G_CONSTANT: {

    const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;


    const LLT s8 = LLT::scalar(8);

    const LLT s16 = LLT::scalar(16);

    const LLT s32 = LLT::scalar(32);

    const LLT s64 = LLT::scalar(64);

    const LLT s128 = LLT::scalar(128);

    const LLT p0 = LLT::pointer(0, 64);


    const Register DefReg = I.getOperand(0).getReg();

    const LLT DefTy = MRI.getType(DefReg);

    const unsigned DefSize = DefTy.getSizeInBits();

    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);


    // FIXME: Redundant check, but even less readable when factored out.

    if (isFP) {

      if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {

        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty

                          << " constant, expected: " << s16 << " or " << s32

                          << " or " << s64 << " or " << s128 << '\n');

        return false;

      }


      if (RB.getID() != AArch64::FPRRegBankID) {

        LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty

                          << " constant on bank: " << RB

                          << ", expected: FPR\n");

        return false;

      }


      // The case when we have 0.0 is covered by tablegen. Reject it here so we

      // can be sure tablegen works correctly and isn't rescued by this code.

      // 0.0 is not covered by tablegen for FP128. So we will handle this

      // scenario in the code here.

      if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))

        return false;

    } else {

      // s32 and s64 are covered by tablegen.

      if (Ty != p0 && Ty != s8 && Ty != s16) {

        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty

                          << " constant, expected: " << s32 << ", " << s64

                          << ", or " << p0 << '\n');

        return false;

      }


      if (RB.getID() != AArch64::GPRRegBankID) {

        LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty

                          << " constant on bank: " << RB

                          << ", expected: GPR\n");

        return false;

      }

    }


    if (isFP) {

      const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);

      // For 16, 64, and 128b values, emit a constant pool load.

      switch (DefSize) {

      default:

        llvm_unreachable("Unexpected destination size for G_FCONSTANT?");

      case 32:

      case 64: {

        bool OptForSize = shouldOptForSize(&MF);

        const auto &TLI = MF.getSubtarget().getTargetLowering();

        // If TLI says that this fpimm is illegal, then we'll expand to a

        // constant pool load.

        if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),

                              EVT::getFloatingPointVT(DefSize), OptForSize))

          break;

        [[fallthrough]];

      }

      case 16:

      case 128: {

        auto *FPImm = I.getOperand(1).getFPImm();

        auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);

        if (!LoadMI) {

          LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");

          return false;

        }

        MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});

        I.eraseFromParent();

        return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);

      }

      }


      assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");

      // Either emit a FMOV, or emit a copy to emit a normal mov.

      const Register DefGPRReg = MRI.createVirtualRegister(

          DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);

      MachineOperand &RegOp = I.getOperand(0);

      RegOp.setReg(DefGPRReg);

      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));

      MIB.buildCopy({DefReg}, {DefGPRReg});


      if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {

        LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");

        return false;

      }


      MachineOperand &ImmOp = I.getOperand(1);

      // FIXME: Is going through int64_t always correct?

      ImmOp.ChangeToImmediate(

          ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());

    } else if (I.getOperand(1).isCImm()) {

      uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();

      I.getOperand(1).ChangeToImmediate(Val);

    } else if (I.getOperand(1).isImm()) {

      uint64_t Val = I.getOperand(1).getImm();

      I.getOperand(1).ChangeToImmediate(Val);

    }


    const unsigned MovOpc =

        DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;

    I.setDesc(TII.get(MovOpc));

    constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    return true;

  }

  case TargetOpcode::G_EXTRACT: {

    Register DstReg = I.getOperand(0).getReg();

    Register SrcReg = I.getOperand(1).getReg();

    LLT SrcTy = MRI.getType(SrcReg);

    LLT DstTy = MRI.getType(DstReg);

    (void)DstTy;

    unsigned SrcSize = SrcTy.getSizeInBits();


    if (SrcTy.getSizeInBits() > 64) {

      // This should be an extract of an s128, which is like a vector extract.

      if (SrcTy.getSizeInBits() != 128)

        return false;

      // Only support extracting 64 bits from an s128 at the moment.

      if (DstTy.getSizeInBits() != 64)

        return false;


      unsigned Offset = I.getOperand(2).getImm();

      if (Offset % 64 != 0)

        return false;


      // Check we have the right regbank always.

      const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);

      const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

      assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");


      if (SrcRB.getID() == AArch64::GPRRegBankID) {

        auto NewI =

            MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})

                .addUse(SrcReg, 0,

                        Offset == 0 ? AArch64::sube64 : AArch64::subo64);

        constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,

                                 AArch64::GPR64RegClass, NewI->getOperand(0));

        I.eraseFromParent();

        return true;

      }


      // Emit the same code as a vector extract.

      // Offset must be a multiple of 64.

      unsigned LaneIdx = Offset / 64;

      MachineInstr *Extract = emitExtractVectorElt(

          DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);

      if (!Extract)

        return false;

      I.eraseFromParent();

      return true;

    }


    I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));

    MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +

                                      Ty.getSizeInBits() - 1);


    if (SrcSize < 64) {

      assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&

             "unexpected G_EXTRACT types");

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    }


    DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));

    MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));

    MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})

        .addReg(DstReg, 0, AArch64::sub_32);

    RBI.constrainGenericRegister(I.getOperand(0).getReg(),

                                 AArch64::GPR32RegClass, MRI);

    I.getOperand(0).setReg(DstReg);


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_INSERT: {

    LLT SrcTy = MRI.getType(I.getOperand(2).getReg());

    LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    unsigned DstSize = DstTy.getSizeInBits();

    // Larger inserts are vectors, same-size ones should be something else by

    // now (split up or turned into COPYs).

    if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)

      return false;


    I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));

    unsigned LSB = I.getOperand(3).getImm();

    unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();

    I.getOperand(3).setImm((DstSize - LSB) % DstSize);

    MachineInstrBuilder(MF, I).addImm(Width - 1);


    if (DstSize < 64) {

      assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&

             "unexpected G_INSERT types");

      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

    }


    Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));

    BuildMI(MBB, I.getIterator(), I.getDebugLoc(),

            TII.get(AArch64::SUBREG_TO_REG))

        .addDef(SrcReg)

        .addImm(0)

        .addUse(I.getOperand(2).getReg())

        .addImm(AArch64::sub_32);

    RBI.constrainGenericRegister(I.getOperand(2).getReg(),

                                 AArch64::GPR32RegClass, MRI);

    I.getOperand(2).setReg(SrcReg);


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }

  case TargetOpcode::G_FRAME_INDEX: {

    // allocas and G_FRAME_INDEX are only supported in addrspace(0).

    if (Ty != LLT::pointer(0, 64)) {

      LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty

                        << ", expected: " << LLT::pointer(0, 64) << '\n');

      return false;

    }

    I.setDesc(TII.get(AArch64::ADDXri));


    // MOs for a #0 shifted immediate.

    I.addOperand(MachineOperand::CreateImm(0));

    I.addOperand(MachineOperand::CreateImm(0));


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_GLOBAL_VALUE: {

    const GlobalValue *GV = nullptr;

    unsigned OpFlags;

    if (I.getOperand(1).isSymbol()) {

      OpFlags = I.getOperand(1).getTargetFlags();

      // Currently only used by "RtLibUseGOT".

      assert(OpFlags == AArch64II::MO_GOT);

    } else {

      GV = I.getOperand(1).getGlobal();

      if (GV->isThreadLocal()) {

        // We don't support instructions with emulated TLS variables yet

        if (TM.useEmulatedTLS())

          return false;

        return selectTLSGlobalValue(I, MRI);

      }

      OpFlags = STI.ClassifyGlobalReference(GV, TM);

    }


    if (OpFlags & AArch64II::MO_GOT) {

      I.setDesc(TII.get(MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()

                            ? AArch64::LOADgotAUTH

                            : AArch64::LOADgot));

      I.getOperand(1).setTargetFlags(OpFlags);

    } else if (TM.getCodeModel() == CodeModel::Large &&

               !TM.isPositionIndependent()) {

      // Materialize the global using movz/movk instructions.

      materializeLargeCMVal(I, GV, OpFlags);

      I.eraseFromParent();

      return true;

    } else if (TM.getCodeModel() == CodeModel::Tiny) {

      I.setDesc(TII.get(AArch64::ADR));

      I.getOperand(1).setTargetFlags(OpFlags);

    } else {

      I.setDesc(TII.get(AArch64::MOVaddr));

      I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);

      MachineInstrBuilder MIB(MF, I);

      MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),

                           OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);

    }

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:

    return selectPtrAuthGlobalValue(I, MRI);


  case TargetOpcode::G_ZEXTLOAD:

  case TargetOpcode::G_LOAD:

  case TargetOpcode::G_STORE: {

    GLoadStore &LdSt = cast<GLoadStore>(I);

    bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;

    LLT PtrTy = MRI.getType(LdSt.getPointerReg());


    // Can only handle AddressSpace 0, 64-bit pointers.

    if (PtrTy != LLT::pointer(0, 64)) {

      return false;

    }


    uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();

    unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();

    AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();


    // Need special instructions for atomics that affect ordering.

    if (Order != AtomicOrdering::NotAtomic &&

        Order != AtomicOrdering::Unordered &&

        Order != AtomicOrdering::Monotonic) {

      assert(!isa<GZExtLoad>(LdSt));

      assert(MemSizeInBytes <= 8 &&

             "128-bit atomics should already be custom-legalized");


      if (isa<GLoad>(LdSt)) {

        static constexpr unsigned LDAPROpcodes[] = {

            AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};

        static constexpr unsigned LDAROpcodes[] = {

            AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};

        ArrayRef<unsigned> Opcodes =

            STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent

                ? LDAPROpcodes

                : LDAROpcodes;

        I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));

      } else {

        static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,

                                               AArch64::STLRW, AArch64::STLRX};

        Register ValReg = LdSt.getReg(0);

        if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {

          // Emit a subreg copy of 32 bits.

          Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

          MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})

              .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);

          I.getOperand(0).setReg(NewVal);

        }

        I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));

      }

      constrainSelectedInstRegOperands(I, TII, TRI, RBI);

      return true;

    }


#ifndef NDEBUG

    const Register PtrReg = LdSt.getPointerReg();

    const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);

    // Check that the pointer register is valid.

    assert(PtrRB.getID() == AArch64::GPRRegBankID &&

           "Load/Store pointer operand isn't a GPR");

    assert(MRI.getType(PtrReg).isPointer() &&

           "Load/Store pointer operand isn't a pointer");

#endif


    const Register ValReg = LdSt.getReg(0);

    const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);

    LLT ValTy = MRI.getType(ValReg);


    // The code below doesn't support truncating stores, so we need to split it

    // again.

    if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {

      unsigned SubReg;

      LLT MemTy = LdSt.getMMO().getMemoryType();

      auto *RC = getRegClassForTypeOnBank(MemTy, RB);

      if (!getSubRegForClass(RC, TRI, SubReg))

        return false;


      // Generate a subreg copy.

      auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})

                      .addReg(ValReg, 0, SubReg)

                      .getReg(0);

      RBI.constrainGenericRegister(Copy, *RC, MRI);

      LdSt.getOperand(0).setReg(Copy);

    } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {

      // If this is an any-extending load from the FPR bank, split it into a regular

      // load + extend.

      if (RB.getID() == AArch64::FPRRegBankID) {

        unsigned SubReg;

        LLT MemTy = LdSt.getMMO().getMemoryType();

        auto *RC = getRegClassForTypeOnBank(MemTy, RB);

        if (!getSubRegForClass(RC, TRI, SubReg))

          return false;

        Register OldDst = LdSt.getReg(0);

        Register NewDst =

            MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());

        LdSt.getOperand(0).setReg(NewDst);

        MRI.setRegBank(NewDst, RB);

        // Generate a SUBREG_TO_REG to extend it.

        MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));

        MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})

            .addImm(0)

            .addUse(NewDst)

            .addImm(SubReg);

        auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);

        RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);

        MIB.setInstr(LdSt);

        ValTy = MemTy; // This is no longer an extending load.

      }

    }


    // Helper lambda for partially selecting I. Either returns the original

    // instruction with an updated opcode, or a new instruction.

    auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {

      bool IsStore = isa<GStore>(I);

      const unsigned NewOpc =

          selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);

      if (NewOpc == I.getOpcode())

        return nullptr;

      // Check if we can fold anything into the addressing mode.

      auto AddrModeFns =

          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);

      if (!AddrModeFns) {

        // Can't fold anything. Use the original instruction.

        I.setDesc(TII.get(NewOpc));

        I.addOperand(MachineOperand::CreateImm(0));

        return &I;

      }


      // Folded something. Create a new instruction and return it.

      auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());

      Register CurValReg = I.getOperand(0).getReg();

      IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);

      NewInst.cloneMemRefs(I);

      for (auto &Fn : *AddrModeFns)

        Fn(NewInst);

      I.eraseFromParent();

      return &*NewInst;

    };


    MachineInstr *LoadStore = SelectLoadStoreAddressingMode();

    if (!LoadStore)

      return false;


    // If we're storing a 0, use WZR/XZR.

    if (Opcode == TargetOpcode::G_STORE) {

      auto CVal = getIConstantVRegValWithLookThrough(

          LoadStore->getOperand(0).getReg(), MRI);

      if (CVal && CVal->Value == 0) {

        switch (LoadStore->getOpcode()) {

        case AArch64::STRWui:

        case AArch64::STRHHui:

        case AArch64::STRBBui:

          LoadStore->getOperand(0).setReg(AArch64::WZR);

          break;

        case AArch64::STRXui:

          LoadStore->getOperand(0).setReg(AArch64::XZR);

          break;

        }

      }

    }


    if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&

                       ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {

      // The any/zextload from a smaller type to i32 should be handled by the

      // importer.

      if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)

        return false;

      // If we have an extending load then change the load's type to be a

      // narrower reg and zero_extend with SUBREG_TO_REG.

      Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

      Register DstReg = LoadStore->getOperand(0).getReg();

      LoadStore->getOperand(0).setReg(LdReg);


      MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));

      MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})

          .addImm(0)

          .addUse(LdReg)

          .addImm(AArch64::sub_32);

      constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);

      return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,

                                          MRI);

    }

    return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);

  }


  case TargetOpcode::G_INDEXED_ZEXTLOAD:

  case TargetOpcode::G_INDEXED_SEXTLOAD:

    return selectIndexedExtLoad(I, MRI);

  case TargetOpcode::G_INDEXED_LOAD:

    return selectIndexedLoad(I, MRI);

  case TargetOpcode::G_INDEXED_STORE:

    return selectIndexedStore(cast<GIndexedStore>(I), MRI);


  case TargetOpcode::G_LSHR:

  case TargetOpcode::G_ASHR:

    if (MRI.getType(I.getOperand(0).getReg()).isVector())

      return selectVectorAshrLshr(I, MRI);

    [[fallthrough]];

  case TargetOpcode::G_SHL:

    if (Opcode == TargetOpcode::G_SHL &&

        MRI.getType(I.getOperand(0).getReg()).isVector())

      return selectVectorSHL(I, MRI);


    // These shifts were legalized to have 64 bit shift amounts because we

    // want to take advantage of the selection patterns that assume the

    // immediates are s64s, however, selectBinaryOp will assume both operands

    // will have the same bit size.

    {

      Register SrcReg = I.getOperand(1).getReg();

      Register ShiftReg = I.getOperand(2).getReg();

      const LLT ShiftTy = MRI.getType(ShiftReg);

      const LLT SrcTy = MRI.getType(SrcReg);

      if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&

          ShiftTy.getSizeInBits() == 64) {

        assert(!ShiftTy.isVector() && "unexpected vector shift ty");

        // Insert a subregister copy to implement a 64->32 trunc

        auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})

                         .addReg(ShiftReg, 0, AArch64::sub_32);

        MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));

        I.getOperand(2).setReg(Trunc.getReg(0));

      }

    }

    [[fallthrough]];

  case TargetOpcode::G_OR: {

    // Reject the various things we don't support yet.

    if (unsupportedBinOp(I, RBI, MRI, TRI))

      return false;


    const unsigned OpSize = Ty.getSizeInBits();


    const Register DefReg = I.getOperand(0).getReg();

    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);


    const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);

    if (NewOpc == I.getOpcode())

      return false;


    I.setDesc(TII.get(NewOpc));

    // FIXME: Should the type be always reset in setDesc?


    // Now that we selected an opcode, we need to constrain the register

    // operands to use appropriate classes.

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }


  case TargetOpcode::G_PTR_ADD: {

    emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);

    I.eraseFromParent();

    return true;

  }


  case TargetOpcode::G_SADDE:

  case TargetOpcode::G_UADDE:

  case TargetOpcode::G_SSUBE:

  case TargetOpcode::G_USUBE:

  case TargetOpcode::G_SADDO:

  case TargetOpcode::G_UADDO:

  case TargetOpcode::G_SSUBO:

  case TargetOpcode::G_USUBO:

    return selectOverflowOp(I, MRI);


  case TargetOpcode::G_PTRMASK: {

    Register MaskReg = I.getOperand(2).getReg();

    std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);

    // TODO: Implement arbitrary cases

    if (!MaskVal || !isShiftedMask_64(*MaskVal))

      return false;


    uint64_t Mask = *MaskVal;

    I.setDesc(TII.get(AArch64::ANDXri));

    I.getOperand(2).ChangeToImmediate(

        AArch64_AM::encodeLogicalImmediate(Mask, 64));


    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }

  case TargetOpcode::G_PTRTOINT:

  case TargetOpcode::G_TRUNC: {

    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());


    const Register DstReg = I.getOperand(0).getReg();

    const Register SrcReg = I.getOperand(1).getReg();


    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

    const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);


    if (DstRB.getID() != SrcRB.getID()) {

      LLVM_DEBUG(

          dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");

      return false;

    }


    if (DstRB.getID() == AArch64::GPRRegBankID) {

      const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);

      if (!DstRC)

        return false;


      const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);

      if (!SrcRC)

        return false;


      if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||

          !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {

        LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");

        return false;

      }


      if (DstRC == SrcRC) {

        // Nothing to be done

      } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&

                 SrcTy == LLT::scalar(64)) {

        llvm_unreachable("TableGen can import this case");

        return false;

      } else if (DstRC == &AArch64::GPR32RegClass &&

                 SrcRC == &AArch64::GPR64RegClass) {

        I.getOperand(1).setSubReg(AArch64::sub_32);

      } else {

        LLVM_DEBUG(

            dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");

        return false;

      }


      I.setDesc(TII.get(TargetOpcode::COPY));

      return true;

    } else if (DstRB.getID() == AArch64::FPRRegBankID) {

      if (DstTy == LLT::fixed_vector(4, 16) &&

          SrcTy == LLT::fixed_vector(4, 32)) {

        I.setDesc(TII.get(AArch64::XTNv4i16));

        constrainSelectedInstRegOperands(I, TII, TRI, RBI);

        return true;

      }


      if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {

        MachineInstr *Extract = emitExtractVectorElt(

            DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);

        if (!Extract)

          return false;

        I.eraseFromParent();

        return true;

      }


      // We might have a vector G_PTRTOINT, in which case just emit a COPY.

      if (Opcode == TargetOpcode::G_PTRTOINT) {

        assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");

        I.setDesc(TII.get(TargetOpcode::COPY));

        return selectCopy(I, TII, MRI, TRI, RBI);

      }

    }


    return false;

  }


  case TargetOpcode::G_ANYEXT: {

    if (selectUSMovFromExtend(I, MRI))

      return true;


    const Register DstReg = I.getOperand(0).getReg();

    const Register SrcReg = I.getOperand(1).getReg();


    const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);

    if (RBDst.getID() != AArch64::GPRRegBankID) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst

                        << ", expected: GPR\n");

      return false;

    }


    const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);

    if (RBSrc.getID() != AArch64::GPRRegBankID) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc

                        << ", expected: GPR\n");

      return false;

    }


    const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();


    if (DstSize == 0) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");

      return false;

    }


    if (DstSize != 64 && DstSize > 32) {

      LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize

                        << ", expected: 32 or 64\n");

      return false;

    }

    // At this point G_ANYEXT is just like a plain COPY, but we need

    // to explicitly form the 64-bit value if any.

    if (DstSize > 32) {

      Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);

      BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))

          .addDef(ExtSrc)

          .addImm(0)

          .addUse(SrcReg)

          .addImm(AArch64::sub_32);

      I.getOperand(1).setReg(ExtSrc);

    }

    return selectCopy(I, TII, MRI, TRI, RBI);

  }


  case TargetOpcode::G_ZEXT:

  case TargetOpcode::G_SEXT_INREG:

  case TargetOpcode::G_SEXT: {

    if (selectUSMovFromExtend(I, MRI))

      return true;


    unsigned Opcode = I.getOpcode();

    const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;

    const Register DefReg = I.getOperand(0).getReg();

    Register SrcReg = I.getOperand(1).getReg();

    const LLT DstTy = MRI.getType(DefReg);

    const LLT SrcTy = MRI.getType(SrcReg);

    unsigned DstSize = DstTy.getSizeInBits();

    unsigned SrcSize = SrcTy.getSizeInBits();


    // SEXT_INREG has the same src reg size as dst, the size of the value to be

    // extended is encoded in the imm.

    if (Opcode == TargetOpcode::G_SEXT_INREG)

      SrcSize = I.getOperand(2).getImm();


    if (DstTy.isVector())

      return false; // Should be handled by imported patterns.


    assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==

               AArch64::GPRRegBankID &&

           "Unexpected ext regbank");


    MachineInstr *ExtI;


    // First check if we're extending the result of a load which has a dest type

    // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest

    // GPR register on AArch64 and all loads which are smaller automatically

    // zero-extend the upper bits. E.g.

    // %v(s8) = G_LOAD %p, :: (load 1)

    // %v2(s32) = G_ZEXT %v(s8)

    if (!IsSigned) {

      auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);

      bool IsGPR =

          RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;

      if (LoadMI && IsGPR) {

        const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();

        unsigned BytesLoaded = MemOp->getSize().getValue();

        if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)

          return selectCopy(I, TII, MRI, TRI, RBI);

      }


      // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)

      // + SUBREG_TO_REG.

      if (IsGPR && SrcSize == 32 && DstSize == 64) {

        Register SubregToRegSrc =

            MRI.createVirtualRegister(&AArch64::GPR32RegClass);

        const Register ZReg = AArch64::WZR;

        MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})

            .addImm(0);


        MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})

            .addImm(0)

            .addUse(SubregToRegSrc)

            .addImm(AArch64::sub_32);


        if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,

                                          MRI)) {

          LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");

          return false;

        }


        if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,

                                          MRI)) {

          LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");

          return false;

        }


        I.eraseFromParent();

        return true;

      }

    }


    if (DstSize == 64) {

      if (Opcode != TargetOpcode::G_SEXT_INREG) {

        // FIXME: Can we avoid manually doing this?

        if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,

                                          MRI)) {

          LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)

                            << " operand\n");

          return false;

        }

        SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,

                                {&AArch64::GPR64RegClass}, {})

                     .addImm(0)

                     .addUse(SrcReg)

                     .addImm(AArch64::sub_32)

                     .getReg(0);

      }


      ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,

                             {DefReg}, {SrcReg})

                  .addImm(0)

                  .addImm(SrcSize - 1);

    } else if (DstSize <= 32) {

      ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,

                             {DefReg}, {SrcReg})

                  .addImm(0)

                  .addImm(SrcSize - 1);

    } else {

      return false;

    }


    constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);

    I.eraseFromParent();

    return true;

  }


  case TargetOpcode::G_FREEZE:

    return selectCopy(I, TII, MRI, TRI, RBI);


  case TargetOpcode::G_INTTOPTR:

    // The importer is currently unable to import pointer types since they

    // didn't exist in SelectionDAG.

    return selectCopy(I, TII, MRI, TRI, RBI);


  case TargetOpcode::G_BITCAST:

    // Imported SelectionDAG rules can handle every bitcast except those that

    // bitcast from a type to the same type. Ideally, these shouldn't occur

    // but we might not run an optimizer that deletes them. The other exception

    // is bitcasts involving pointer types, as SelectionDAG has no knowledge

    // of them.

    return selectCopy(I, TII, MRI, TRI, RBI);


  case TargetOpcode::G_SELECT: {

    auto &Sel = cast<GSelect>(I);

    const Register CondReg = Sel.getCondReg();

    const Register TReg = Sel.getTrueReg();

    const Register FReg = Sel.getFalseReg();


    if (tryOptSelect(Sel))

      return true;


    // Make sure to use an unused vreg instead of wzr, so that the peephole

    // optimizations will be able to optimize these.

    Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

    auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})

                     .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));

    constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);

    if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))

      return false;

    Sel.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_ICMP: {

    if (Ty.isVector())

      return false;


    if (Ty != LLT::scalar(32)) {

      LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty

                        << ", expected: " << LLT::scalar(32) << '\n');

      return false;

    }


    auto &PredOp = I.getOperand(1);

    emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);

    auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());

    const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(

        CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);

    emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,

              /*Src2=*/AArch64::WZR, InvCC, MIB);

    I.eraseFromParent();

    return true;

  }


  case TargetOpcode::G_FCMP: {

    CmpInst::Predicate Pred =

        static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());

    if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,

                       Pred) ||

        !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))

      return false;

    I.eraseFromParent();

    return true;

  }

  case TargetOpcode::G_VASTART:

    return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)

                                : selectVaStartAAPCS(I, MF, MRI);

  case TargetOpcode::G_INTRINSIC:

    return selectIntrinsic(I, MRI);

  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:

    return selectIntrinsicWithSideEffects(I, MRI);

  case TargetOpcode::G_IMPLICIT_DEF: {

    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));

    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

    const Register DstReg = I.getOperand(0).getReg();

    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

    const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);

    RBI.constrainGenericRegister(DstReg, *DstRC, MRI);

    return true;

  }

  case TargetOpcode::G_BLOCK_ADDR: {

    Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();

    if (std::optional<uint16_t> BADisc =

            STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {

      MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});

      MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

      MIB.buildInstr(AArch64::MOVaddrPAC)

          .addBlockAddress(I.getOperand(1).getBlockAddress())

          .addImm(AArch64PACKey::IA)

          .addReg(/*AddrDisc=*/AArch64::XZR)

          .addImm(*BADisc)

          .constrainAllUses(TII, TRI, RBI);

      MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));

      RBI.constrainGenericRegister(I.getOperand(0).getReg(),

                                   AArch64::GPR64RegClass, MRI);

      I.eraseFromParent();

      return true;

    }

    if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {

      materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);

      I.eraseFromParent();

      return true;

    } else {

      I.setDesc(TII.get(AArch64::MOVaddrBA));

      auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),

                           I.getOperand(0).getReg())

                       .addBlockAddress(I.getOperand(1).getBlockAddress(),

                                        /* Offset */ 0, AArch64II::MO_PAGE)

                       .addBlockAddress(

                           I.getOperand(1).getBlockAddress(), /* Offset */ 0,

                           AArch64II::MO_NC | AArch64II::MO_PAGEOFF);

      I.eraseFromParent();

      return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);

    }

  }

  case AArch64::G_DUP: {

    // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by

    // imported patterns. Do it manually here. Avoiding generating s16 gpr is

    // difficult because at RBS we may end up pessimizing the fpr case if we

    // decided to add an anyextend to fix this. Manual selection is the most

    // robust solution for now.

    if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=

        AArch64::GPRRegBankID)

      return false; // We expect the fpr regbank case to be imported.

    LLT VecTy = MRI.getType(I.getOperand(0).getReg());

    if (VecTy == LLT::fixed_vector(8, 8))

      I.setDesc(TII.get(AArch64::DUPv8i8gpr));

    else if (VecTy == LLT::fixed_vector(16, 8))

      I.setDesc(TII.get(AArch64::DUPv16i8gpr));

    else if (VecTy == LLT::fixed_vector(4, 16))

      I.setDesc(TII.get(AArch64::DUPv4i16gpr));

    else if (VecTy == LLT::fixed_vector(8, 16))

      I.setDesc(TII.get(AArch64::DUPv8i16gpr));

    else

      return false;

    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);

  }

  case TargetOpcode::G_BUILD_VECTOR:

    return selectBuildVector(I, MRI);

  case TargetOpcode::G_MERGE_VALUES:

    return selectMergeValues(I, MRI);

  case TargetOpcode::G_UNMERGE_VALUES:

    return selectUnmergeValues(I, MRI);

  case TargetOpcode::G_SHUFFLE_VECTOR:

    return selectShuffleVector(I, MRI);

  case TargetOpcode::G_EXTRACT_VECTOR_ELT:

    return selectExtractElt(I, MRI);

  case TargetOpcode::G_CONCAT_VECTORS:

    return selectConcatVectors(I, MRI);

  case TargetOpcode::G_JUMP_TABLE:

    return selectJumpTable(I, MRI);

  case TargetOpcode::G_MEMCPY:

  case TargetOpcode::G_MEMCPY_INLINE:

  case TargetOpcode::G_MEMMOVE:

  case TargetOpcode::G_MEMSET:

    assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");

    return selectMOPS(I, MRI);

  }


  return false;

}


bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {

  MachineIRBuilderState OldMIBState = MIB.getState();

  bool Success = select(I);

  MIB.setState(OldMIBState);

  return Success;

}


bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,

                                            MachineRegisterInfo &MRI) {

  unsigned Mopcode;

  switch (GI.getOpcode()) {

  case TargetOpcode::G_MEMCPY:

  case TargetOpcode::G_MEMCPY_INLINE:

    Mopcode = AArch64::MOPSMemoryCopyPseudo;

    break;

  case TargetOpcode::G_MEMMOVE:

    Mopcode = AArch64::MOPSMemoryMovePseudo;

    break;

  case TargetOpcode::G_MEMSET:

    // For tagged memset see llvm.aarch64.mops.memset.tag

    Mopcode = AArch64::MOPSMemorySetPseudo;

    break;

  }


  auto &DstPtr = GI.getOperand(0);

  auto &SrcOrVal = GI.getOperand(1);

  auto &Size = GI.getOperand(2);


  // Create copies of the registers that can be clobbered.

  const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());

  const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());

  const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());


  const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;

  const auto &SrcValRegClass =

      IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;


  // Constrain to specific registers

  RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);

  RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);

  RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);


  MIB.buildCopy(DstPtrCopy, DstPtr);

  MIB.buildCopy(SrcValCopy, SrcOrVal);

  MIB.buildCopy(SizeCopy, Size);


  // New instruction uses the copied registers because it must update them.

  // The defs are not used since they don't exist in G_MEM*. They are still

  // tied.

  // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE

  Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);

  Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

  if (IsSet) {

    MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},

                   {DstPtrCopy, SizeCopy, SrcValCopy});

  } else {

    Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);

    MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},

                   {DstPtrCopy, SrcValCopy, SizeCopy});

  }


  GI.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,

                                            MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");

  Register JTAddr = I.getOperand(0).getReg();

  unsigned JTI = I.getOperand(1).getIndex();

  Register Index = I.getOperand(2).getReg();


  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);


  // With aarch64-jump-table-hardening, we only expand the jump table dispatch

  // sequence later, to guarantee the integrity of the intermediate values.

  if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {

    CodeModel::Model CM = TM.getCodeModel();

    if (STI.isTargetMachO()) {

      if (CM != CodeModel::Small && CM != CodeModel::Large)

        report_fatal_error("Unsupported code-model for hardened jump-table");

    } else {

      // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.

      assert(STI.isTargetELF() &&

             "jump table hardening only supported on MachO/ELF");

      if (CM != CodeModel::Small)

        report_fatal_error("Unsupported code-model for hardened jump-table");

    }


    MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());

    MIB.buildInstr(AArch64::BR_JumpTable)

        .addJumpTableIndex(I.getOperand(1).getIndex());

    I.eraseFromParent();

    return true;

  }


  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);


  auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,

                                      {TargetReg, ScratchReg}, {JTAddr, Index})

                           .addJumpTableIndex(JTI);

  // Save the jump table info.

  MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},

                 {static_cast<int64_t>(JTI)});

  // Build the indirect branch.

  MIB.buildInstr(AArch64::BR, {}, {TargetReg});

  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);

}


bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,

                                                 MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");

  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");


  Register DstReg = I.getOperand(0).getReg();

  unsigned JTI = I.getOperand(1).getIndex();

  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.

  auto MovMI =

    MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})

          .addJumpTableIndex(JTI, AArch64II::MO_PAGE)

          .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);

  I.eraseFromParent();

  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);

}


bool AArch64InstructionSelector::selectTLSGlobalValue(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  if (!STI.isTargetMachO())

    return false;

  MachineFunction &MF = *I.getParent()->getParent();

  MF.getFrameInfo().setAdjustsStack(true);


  const auto &GlobalOp = I.getOperand(1);

  assert(GlobalOp.getOffset() == 0 &&

         "Shouldn't have an offset on TLS globals!");

  const GlobalValue &GV = *GlobalOp.getGlobal();


  auto LoadGOT =

      MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})

          .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);


  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},

                             {LoadGOT.getReg(0)})

                  .addImm(0);


  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));

  // TLS calls preserve all registers except those that absolutely must be

  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be

  // silly).

  unsigned Opcode = getBLRCallOpcode(MF);


  // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).

  if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {

    assert(Opcode == AArch64::BLR);

    Opcode = AArch64::BLRAAZ;

  }


  MIB.buildInstr(Opcode, {}, {Load})

      .addUse(AArch64::X0, RegState::Implicit)

      .addDef(AArch64::X0, RegState::Implicit)

      .addRegMask(TRI.getTLSCallPreservedMask());


  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));

  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,

                               MRI);

  I.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::emitScalarToVector(

    unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,

    MachineIRBuilder &MIRBuilder) const {

  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});


  auto BuildFn = [&](unsigned SubregIndex) {

    auto Ins =

        MIRBuilder

            .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})

            .addImm(SubregIndex);

    constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);

    constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);

    return &*Ins;

  };


  switch (EltSize) {

  case 8:

    return BuildFn(AArch64::bsub);

  case 16:

    return BuildFn(AArch64::hsub);

  case 32:

    return BuildFn(AArch64::ssub);

  case 64:

    return BuildFn(AArch64::dsub);

  default:

    return nullptr;

  }

}


MachineInstr *

AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,

                                             MachineIRBuilder &MIB,

                                             MachineRegisterInfo &MRI) const {

  LLT DstTy = MRI.getType(DstReg);

  const TargetRegisterClass *RC =

      getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));

  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {

    LLVM_DEBUG(dbgs() << "Unsupported register class!\n");

    return nullptr;

  }

  unsigned SubReg = 0;

  if (!getSubRegForClass(RC, TRI, SubReg))

    return nullptr;

  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {

    LLVM_DEBUG(dbgs() << "Unsupported destination size! ("

                      << DstTy.getSizeInBits() << "\n");

    return nullptr;

  }

  auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})

                  .addReg(SrcReg, 0, SubReg);

  RBI.constrainGenericRegister(DstReg, *RC, MRI);

  return Copy;

}


bool AArch64InstructionSelector::selectMergeValues(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");

  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());

  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");

  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);


  if (I.getNumOperands() != 3)

    return false;


  // Merging 2 s64s into an s128.

  if (DstTy == LLT::scalar(128)) {

    if (SrcTy.getSizeInBits() != 64)

      return false;

    Register DstReg = I.getOperand(0).getReg();

    Register Src1Reg = I.getOperand(1).getReg();

    Register Src2Reg = I.getOperand(2).getReg();

    auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});

    MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,

                                         /* LaneIdx */ 0, RB, MIB);

    if (!InsMI)

      return false;

    MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),

                                          Src2Reg, /* LaneIdx */ 1, RB, MIB);

    if (!Ins2MI)

      return false;

    constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);

    constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);

    I.eraseFromParent();

    return true;

  }


  if (RB.getID() != AArch64::GPRRegBankID)

    return false;


  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)

    return false;


  auto *DstRC = &AArch64::GPR64RegClass;

  Register SubToRegDef = MRI.createVirtualRegister(DstRC);

  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),

                                    TII.get(TargetOpcode::SUBREG_TO_REG))

                                .addDef(SubToRegDef)

                                .addImm(0)

                                .addUse(I.getOperand(1).getReg())

                                .addImm(AArch64::sub_32);

  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);

  // Need to anyext the second scalar before we can use bfm

  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),

                                    TII.get(TargetOpcode::SUBREG_TO_REG))

                                .addDef(SubToRegDef2)

                                .addImm(0)

                                .addUse(I.getOperand(2).getReg())

                                .addImm(AArch64::sub_32);

  MachineInstr &BFM =

      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))

           .addDef(I.getOperand(0).getReg())

           .addUse(SubToRegDef)

           .addUse(SubToRegDef2)

           .addImm(32)

           .addImm(31);

  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);

  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);

  constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,

                              const unsigned EltSize) {

  // Choose a lane copy opcode and subregister based off of the size of the

  // vector's elements.

  switch (EltSize) {

  case 8:

    CopyOpc = AArch64::DUPi8;

    ExtractSubReg = AArch64::bsub;

    break;

  case 16:

    CopyOpc = AArch64::DUPi16;

    ExtractSubReg = AArch64::hsub;

    break;

  case 32:

    CopyOpc = AArch64::DUPi32;

    ExtractSubReg = AArch64::ssub;

    break;

  case 64:

    CopyOpc = AArch64::DUPi64;

    ExtractSubReg = AArch64::dsub;

    break;

  default:

    // Unknown size, bail out.

    LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");

    return false;

  }

  return true;

}


MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(

    std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,

    Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  unsigned CopyOpc = 0;

  unsigned ExtractSubReg = 0;

  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {

    LLVM_DEBUG(

        dbgs() << "Couldn't determine lane copy opcode for instruction.\n");

    return nullptr;

  }


  const TargetRegisterClass *DstRC =

      getRegClassForTypeOnBank(ScalarTy, DstRB, true);

  if (!DstRC) {

    LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");

    return nullptr;

  }


  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);

  const LLT &VecTy = MRI.getType(VecReg);

  const TargetRegisterClass *VecRC =

      getRegClassForTypeOnBank(VecTy, VecRB, true);

  if (!VecRC) {

    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");

    return nullptr;

  }


  // The register that we're going to copy into.

  Register InsertReg = VecReg;

  if (!DstReg)

    DstReg = MRI.createVirtualRegister(DstRC);

  // If the lane index is 0, we just use a subregister COPY.

  if (LaneIdx == 0) {

    auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})

                    .addReg(VecReg, 0, ExtractSubReg);

    RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);

    return &*Copy;

  }


  // Lane copies require 128-bit wide registers. If we're dealing with an

  // unpacked vector, then we need to move up to that width. Insert an implicit

  // def and a subregister insert to get us there.

  if (VecTy.getSizeInBits() != 128) {

    MachineInstr *ScalarToVector = emitScalarToVector(

        VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);

    if (!ScalarToVector)

      return nullptr;

    InsertReg = ScalarToVector->getOperand(0).getReg();

  }


  MachineInstr *LaneCopyMI =

      MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);

  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);


  // Make sure that we actually constrain the initial copy.

  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);

  return LaneCopyMI;

}


bool AArch64InstructionSelector::selectExtractElt(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&

         "unexpected opcode!");

  Register DstReg = I.getOperand(0).getReg();

  const LLT NarrowTy = MRI.getType(DstReg);

  const Register SrcReg = I.getOperand(1).getReg();

  const LLT WideTy = MRI.getType(SrcReg);

  (void)WideTy;

  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&

         "source register size too small!");

  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");


  // Need the lane index to determine the correct copy opcode.

  MachineOperand &LaneIdxOp = I.getOperand(2);

  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");


  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {

    LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");

    return false;

  }


  // Find the index to extract from.

  auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);

  if (!VRegAndVal)

    return false;

  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();


  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);

  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,

                                               LaneIdx, MIB);

  if (!Extract)

    return false;


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectSplitVectorUnmerge(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  unsigned NumElts = I.getNumOperands() - 1;

  Register SrcReg = I.getOperand(NumElts).getReg();

  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());

  const LLT SrcTy = MRI.getType(SrcReg);


  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");

  if (SrcTy.getSizeInBits() > 128) {

    LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");

    return false;

  }


  // We implement a split vector operation by treating the sub-vectors as

  // scalars and extracting them.

  const RegisterBank &DstRB =

      *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);

  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {

    Register Dst = I.getOperand(OpIdx).getReg();

    MachineInstr *Extract =

        emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);

    if (!Extract)

      return false;

  }

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,

                                                     MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&

         "unexpected opcode");


  // TODO: Handle unmerging into GPRs and from scalars to scalars.

  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=

          AArch64::FPRRegBankID ||

      RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=

          AArch64::FPRRegBankID) {

    LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "

                         "currently unsupported.\n");

    return false;

  }


  // The last operand is the vector source register, and every other operand is

  // a register to unpack into.

  unsigned NumElts = I.getNumOperands() - 1;

  Register SrcReg = I.getOperand(NumElts).getReg();

  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());

  const LLT WideTy = MRI.getType(SrcReg);

  (void)WideTy;

  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&

         "can only unmerge from vector or s128 types!");

  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&

         "source register size too small!");


  if (!NarrowTy.isScalar())

    return selectSplitVectorUnmerge(I, MRI);


  // Choose a lane copy opcode and subregister based off of the size of the

  // vector's elements.

  unsigned CopyOpc = 0;

  unsigned ExtractSubReg = 0;

  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))

    return false;


  // Set up for the lane copies.

  MachineBasicBlock &MBB = *I.getParent();


  // Stores the registers we'll be copying from.

  SmallVector<Register, 4> InsertRegs;


  // We'll use the first register twice, so we only need NumElts-1 registers.

  unsigned NumInsertRegs = NumElts - 1;


  // If our elements fit into exactly 128 bits, then we can copy from the source

  // directly. Otherwise, we need to do a bit of setup with some subregister

  // inserts.

  if (NarrowTy.getSizeInBits() * NumElts == 128) {

    InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);

  } else {

    // No. We have to perform subregister inserts. For each insert, create an

    // implicit def and a subregister insert, and save the register we create.

    const TargetRegisterClass *RC = getRegClassForTypeOnBank(

        LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),

        *RBI.getRegBank(SrcReg, MRI, TRI));

    unsigned SubReg = 0;

    bool Found = getSubRegForClass(RC, TRI, SubReg);

    (void)Found;

    assert(Found && "expected to find last operand's subeg idx");

    for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {

      Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);

      MachineInstr &ImpDefMI =

          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),

                   ImpDefReg);


      // Now, create the subregister insert from SrcReg.

      Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);

      MachineInstr &InsMI =

          *BuildMI(MBB, I, I.getDebugLoc(),

                   TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)

               .addUse(ImpDefReg)

               .addUse(SrcReg)

               .addImm(SubReg);


      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);

      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);


      // Save the register so that we can copy from it after.

      InsertRegs.push_back(InsertReg);

    }

  }


  // Now that we've created any necessary subregister inserts, we can

  // create the copies.

  //

  // Perform the first copy separately as a subregister copy.

  Register CopyTo = I.getOperand(0).getReg();

  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})

                       .addReg(InsertRegs[0], 0, ExtractSubReg);

  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);


  // Now, perform the remaining copies as vector lane copies.

  unsigned LaneIdx = 1;

  for (Register InsReg : InsertRegs) {

    Register CopyTo = I.getOperand(LaneIdx).getReg();

    MachineInstr &CopyInst =

        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)

             .addUse(InsReg)

             .addImm(LaneIdx);

    constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);

    ++LaneIdx;

  }


  // Separately constrain the first copy's destination. Because of the

  // limitation in constrainOperandRegClass, we can't guarantee that this will

  // actually be constrained. So, do it ourselves using the second operand.

  const TargetRegisterClass *RC =

      MRI.getRegClassOrNull(I.getOperand(1).getReg());

  if (!RC) {

    LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");

    return false;

  }


  RBI.constrainGenericRegister(CopyTo, *RC, MRI);

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectConcatVectors(

    MachineInstr &I, MachineRegisterInfo &MRI)  {

  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&

         "Unexpected opcode");

  Register Dst = I.getOperand(0).getReg();

  Register Op1 = I.getOperand(1).getReg();

  Register Op2 = I.getOperand(2).getReg();

  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);

  if (!ConcatMI)

    return false;

  I.eraseFromParent();

  return true;

}


unsigned

AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,

                                                  MachineFunction &MF) const {

  Type *CPTy = CPVal->getType();

  Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);


  MachineConstantPool *MCP = MF.getConstantPool();

  return MCP->getConstantPoolIndex(CPVal, Alignment);

}


MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(

    const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {

  const TargetRegisterClass *RC;

  unsigned Opc;

  bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;

  unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());

  switch (Size) {

  case 16:

    RC = &AArch64::FPR128RegClass;

    Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;

    break;

  case 8:

    RC = &AArch64::FPR64RegClass;

    Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;

    break;

  case 4:

    RC = &AArch64::FPR32RegClass;

    Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;

    break;

  case 2:

    RC = &AArch64::FPR16RegClass;

    Opc = AArch64::LDRHui;

    break;

  default:

    LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "

                      << *CPVal->getType());

    return nullptr;

  }


  MachineInstr *LoadMI = nullptr;

  auto &MF = MIRBuilder.getMF();

  unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);

  if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {

    // Use load(literal) for tiny code model.

    LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);

  } else {

    auto Adrp =

        MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})

            .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);


    LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})

                   .addConstantPoolIndex(

                       CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);


    constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);

  }


  MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);

  LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,

                                                    MachineMemOperand::MOLoad,

                                                    Size, Align(Size)));

  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);

  return LoadMI;

}


/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given

/// size and RB.

static std::pair<unsigned, unsigned>


getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {

  unsigned Opc, SubregIdx;

  if (RB.getID() == AArch64::GPRRegBankID) {

    if (EltSize == 8) {

      Opc = AArch64::INSvi8gpr;

      SubregIdx = AArch64::bsub;

    } else if (EltSize == 16) {

      Opc = AArch64::INSvi16gpr;

      SubregIdx = AArch64::ssub;

    } else if (EltSize == 32) {

      Opc = AArch64::INSvi32gpr;

      SubregIdx = AArch64::ssub;

    } else if (EltSize == 64) {

      Opc = AArch64::INSvi64gpr;

      SubregIdx = AArch64::dsub;

    } else {

      llvm_unreachable("invalid elt size!");

    }

  } else {

    if (EltSize == 8) {

      Opc = AArch64::INSvi8lane;

      SubregIdx = AArch64::bsub;

    } else if (EltSize == 16) {

      Opc = AArch64::INSvi16lane;

      SubregIdx = AArch64::hsub;

    } else if (EltSize == 32) {

      Opc = AArch64::INSvi32lane;

      SubregIdx = AArch64::ssub;

    } else if (EltSize == 64) {

      Opc = AArch64::INSvi64lane;

      SubregIdx = AArch64::dsub;

    } else {

      llvm_unreachable("invalid elt size!");

    }

  }

  return std::make_pair(Opc, SubregIdx);

}


MachineInstr *AArch64InstructionSelector::emitInstr(

    unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,

    std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,

    const ComplexRendererFns &RenderFns) const {

  assert(Opcode && "Expected an opcode?");

  assert(!isPreISelGenericOpcode(Opcode) &&

         "Function should only be used to produce selected instructions!");

  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);

  if (RenderFns)

    for (auto &Fn : *RenderFns)

      Fn(MI);

  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);

  return &*MI;

}


MachineInstr *AArch64InstructionSelector::emitAddSub(

    const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,

    Register Dst, MachineOperand &LHS, MachineOperand &RHS,

    MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  auto Ty = MRI.getType(LHS.getReg());

  assert(!Ty.isVector() && "Expected a scalar or pointer?");

  unsigned Size = Ty.getSizeInBits();

  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");

  bool Is32Bit = Size == 32;


  // INSTRri form with positive arithmetic immediate.

  if (auto Fns = selectArithImmed(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);


  // INSTRri form with negative arithmetic immediate.

  if (auto Fns = selectNegArithImmed(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);


  // INSTRrx form.

  if (auto Fns = selectArithExtendedRegister(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);


  // INSTRrs form.

  if (auto Fns = selectShiftedRegister(RHS))

    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},

                     MIRBuilder, Fns);

  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},

                   MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,

                                    MachineOperand &RHS,

                                    MachineIRBuilder &MIRBuilder) const {

  const std::array<std::array<unsigned, 2>, 5> OpcTable{

      {{AArch64::ADDXri, AArch64::ADDWri},

       {AArch64::ADDXrs, AArch64::ADDWrs},

       {AArch64::ADDXrr, AArch64::ADDWrr},

       {AArch64::SUBXri, AArch64::SUBWri},

       {AArch64::ADDXrx, AArch64::ADDWrx}}};

  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  const std::array<std::array<unsigned, 2>, 5> OpcTable{

      {{AArch64::ADDSXri, AArch64::ADDSWri},

       {AArch64::ADDSXrs, AArch64::ADDSWrs},

       {AArch64::ADDSXrr, AArch64::ADDSWrr},

       {AArch64::SUBSXri, AArch64::SUBSWri},

       {AArch64::ADDSXrx, AArch64::ADDSWrx}}};

  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  const std::array<std::array<unsigned, 2>, 5> OpcTable{

      {{AArch64::SUBSXri, AArch64::SUBSWri},

       {AArch64::SUBSXrs, AArch64::SUBSWrs},

       {AArch64::SUBSXrr, AArch64::SUBSWrr},

       {AArch64::ADDSXri, AArch64::ADDSWri},

       {AArch64::SUBSXrx, AArch64::SUBSWrx}}};

  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  MachineRegisterInfo *MRI = MIRBuilder.getMRI();

  bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);

  static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};

  return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,

                                     MachineOperand &RHS,

                                     MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  MachineRegisterInfo *MRI = MIRBuilder.getMRI();

  bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);

  static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};

  return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,

                                    MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);

  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;

  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);

}


MachineInstr *

AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,

                                    MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  LLT Ty = MRI.getType(LHS.getReg());

  unsigned RegSize = Ty.getSizeInBits();

  bool Is32Bit = (RegSize == 32);

  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},

                                   {AArch64::ANDSXrs, AArch64::ANDSWrs},

                                   {AArch64::ANDSXrr, AArch64::ANDSWrr}};

  // ANDS needs a logical immediate for its immediate form. Check if we can

  // fold one in.

  if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {

    int64_t Imm = ValAndVReg->Value.getSExtValue();


    if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {

      auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});

      TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));

      constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);

      return &*TstMI;

    }

  }


  if (auto Fns = selectLogicalShiftedRegister(RHS))

    return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);

  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);

}


MachineInstr *AArch64InstructionSelector::emitIntegerCompare(

    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,

    MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");

  assert(Predicate.isPredicate() && "Expected predicate?");

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();

  LLT CmpTy = MRI.getType(LHS.getReg());

  assert(!CmpTy.isVector() && "Expected scalar or pointer");

  unsigned Size = CmpTy.getSizeInBits();

  (void)Size;

  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");

  // Fold the compare into a cmn or tst if possible.

  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))

    return FoldCmp;

  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());

  return emitSUBS(Dst, LHS, RHS, MIRBuilder);

}


MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(

    Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

#ifndef NDEBUG

  LLT Ty = MRI.getType(Dst);

  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&

         "Expected a 32-bit scalar register?");

#endif

  const Register ZReg = AArch64::WZR;

  AArch64CC::CondCode CC1, CC2;

  changeFCMPPredToAArch64CC(Pred, CC1, CC2);

  auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);

  if (CC2 == AArch64CC::AL)

    return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,

                     MIRBuilder);

  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;

  Register Def1Reg = MRI.createVirtualRegister(RC);

  Register Def2Reg = MRI.createVirtualRegister(RC);

  auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);

  emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);

  emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);

  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});

  constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);

  return &*OrMI;

}


MachineInstr *AArch64InstructionSelector::emitFPCompare(

    Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,

    std::optional<CmpInst::Predicate> Pred) const {

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  LLT Ty = MRI.getType(LHS);

  if (Ty.isVector())

    return nullptr;

  unsigned OpSize = Ty.getSizeInBits();

  assert(OpSize == 16 || OpSize == 32 || OpSize == 64);


  // If this is a compare against +0.0, then we don't have

  // to explicitly materialize a constant.

  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);

  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());


  auto IsEqualityPred = [](CmpInst::Predicate P) {

    return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||

           P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;

  };

  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {

    // Try commutating the operands.

    const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);

    if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {

      ShouldUseImm = true;

      std::swap(LHS, RHS);

    }

  }

  unsigned CmpOpcTbl[2][3] = {

      {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},

      {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};

  unsigned CmpOpc =

      CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];


  // Partially build the compare. Decide if we need to add a use for the

  // third operand based off whether or not we're comparing against 0.0.

  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);

  CmpMI.setMIFlags(MachineInstr::NoFPExcept);

  if (!ShouldUseImm)

    CmpMI.addUse(RHS);

  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);

  return &*CmpMI;

}


MachineInstr *AArch64InstructionSelector::emitVectorConcat(

    std::optional<Register> Dst, Register Op1, Register Op2,

    MachineIRBuilder &MIRBuilder) const {

  // We implement a vector concat by:

  // 1. Use scalar_to_vector to insert the lower vector into the larger dest

  // 2. Insert the upper vector into the destination's upper element

  // TODO: some of this code is common with G_BUILD_VECTOR handling.

  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();


  const LLT Op1Ty = MRI.getType(Op1);

  const LLT Op2Ty = MRI.getType(Op2);


  if (Op1Ty != Op2Ty) {

    LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");

    return nullptr;

  }

  assert(Op1Ty.isVector() && "Expected a vector for vector concat");


  if (Op1Ty.getSizeInBits() >= 128) {

    LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");

    return nullptr;

  }


  // At the moment we just support 64 bit vector concats.

  if (Op1Ty.getSizeInBits() != 64) {

    LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");

    return nullptr;

  }


  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());

  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);

  const TargetRegisterClass *DstRC =

      getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);


  MachineInstr *WidenedOp1 =

      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);

  MachineInstr *WidenedOp2 =

      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);

  if (!WidenedOp1 || !WidenedOp2) {

    LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");

    return nullptr;

  }


  // Now do the insert of the upper element.

  unsigned InsertOpc, InsSubRegIdx;

  std::tie(InsertOpc, InsSubRegIdx) =

      getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());


  if (!Dst)

    Dst = MRI.createVirtualRegister(DstRC);

  auto InsElt =

      MIRBuilder

          .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})

          .addImm(1) /* Lane index */

          .addUse(WidenedOp2->getOperand(0).getReg())

          .addImm(0);

  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);

  return &*InsElt;

}


MachineInstr *

AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,

                                      Register Src2, AArch64CC::CondCode Pred,

                                      MachineIRBuilder &MIRBuilder) const {

  auto &MRI = *MIRBuilder.getMRI();

  const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);

  // If we used a register class, then this won't necessarily have an LLT.

  // Compute the size based off whether or not we have a class or bank.

  unsigned Size;

  if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))

    Size = TRI.getRegSizeInBits(*RC);

  else

    Size = MRI.getType(Dst).getSizeInBits();

  // Some opcodes use s1.

  assert(Size <= 64 && "Expected 64 bits or less only!");

  static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};

  unsigned Opc = OpcTable[Size == 64];

  auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);

  constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);

  return &*CSINC;

}


MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,

                                                      Register CarryReg) {

  MachineRegisterInfo *MRI = MIB.getMRI();

  unsigned Opcode = I.getOpcode();


  // If the instruction is a SUB, we need to negate the carry,

  // because borrowing is indicated by carry-flag == 0.

  bool NeedsNegatedCarry =

      (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);


  // If the previous instruction will already produce the correct carry, do not

  // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences

  // generated during legalization of wide add/sub. This optimization depends on

  // these sequences not being interrupted by other instructions.

  // We have to select the previous instruction before the carry-using

  // instruction is deleted by the calling function, otherwise the previous

  // instruction might become dead and would get deleted.

  MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);

  if (SrcMI == I.getPrevNode()) {

    if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {

      bool ProducesNegatedCarry = CarrySrcMI->isSub();

      if (NeedsNegatedCarry == ProducesNegatedCarry &&

          CarrySrcMI->isUnsigned() &&

          CarrySrcMI->getCarryOutReg() == CarryReg &&

          selectAndRestoreState(*SrcMI))

        return nullptr;

    }

  }


  Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);


  if (NeedsNegatedCarry) {

    // (0 - Carry) sets !C in NZCV when Carry == 1

    Register ZReg = AArch64::WZR;

    return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);

  }


  // (Carry - 1) sets !C in NZCV when Carry == 0

  auto Fns = select12BitValueWithLeftShift(1);

  return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);

}


bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,

                                                  MachineRegisterInfo &MRI) {

  auto &CarryMI = cast<GAddSubCarryOut>(I);


  if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {

    // Set NZCV carry according to carry-in VReg

    emitCarryIn(I, CarryInMI->getCarryInReg());

  }


  // Emit the operation and get the correct condition code.

  auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),

                                CarryMI.getLHS(), CarryMI.getRHS(), MIB);


  Register CarryOutReg = CarryMI.getCarryOutReg();


  // Don't convert carry-out to VReg if it is never used

  if (!MRI.use_nodbg_empty(CarryOutReg)) {

    // Now, put the overflow result in the register given by the first operand

    // to the overflow op. CSINC increments the result when the predicate is

    // false, so to get the increment when it's true, we need to use the

    // inverse. In this case, we want to increment when carry is set.

    Register ZReg = AArch64::WZR;

    emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,

              getInvertedCondCode(OpAndCC.second), MIB);

  }


  I.eraseFromParent();

  return true;

}


std::pair<MachineInstr *, AArch64CC::CondCode>

AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,

                                           MachineOperand &LHS,

                                           MachineOperand &RHS,

                                           MachineIRBuilder &MIRBuilder) const {

  switch (Opcode) {

  default:

    llvm_unreachable("Unexpected opcode!");

  case TargetOpcode::G_SADDO:

    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_UADDO:

    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);

  case TargetOpcode::G_SSUBO:

    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_USUBO:

    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);

  case TargetOpcode::G_SADDE:

    return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_UADDE:

    return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);

  case TargetOpcode::G_SSUBE:

    return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);

  case TargetOpcode::G_USUBE:

    return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);

  }

}


/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be

/// expressed as a conjunction.

/// \param CanNegate    Set to true if we can negate the whole sub-tree just by

///                     changing the conditions on the CMP tests.

///                     (this means we can call emitConjunctionRec() with

///                      Negate==true on this sub-tree)

/// \param MustBeFirst  Set to true if this subtree needs to be negated and we

///                     cannot do the negation naturally. We are required to

///                     emit the subtree first in this case.

/// \param WillNegate   Is true if are called when the result of this

///                     subexpression must be negated. This happens when the

///                     outer expression is an OR. We can use this fact to know

///                     that we have a double negation (or (or ...) ...) that

///                     can be implemented for free.


static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,

                               bool WillNegate, MachineRegisterInfo &MRI,

                               unsigned Depth = 0) {

  if (!MRI.hasOneNonDBGUse(Val))

    return false;

  MachineInstr *ValDef = MRI.getVRegDef(Val);

  unsigned Opcode = ValDef->getOpcode();

  if (isa<GAnyCmp>(ValDef)) {

    CanNegate = true;

    MustBeFirst = false;

    return true;

  }

  // Protect against exponential runtime and stack overflow.

  if (Depth > 6)

    return false;

  if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {

    bool IsOR = Opcode == TargetOpcode::G_OR;

    Register O0 = ValDef->getOperand(1).getReg();

    Register O1 = ValDef->getOperand(2).getReg();

    bool CanNegateL;

    bool MustBeFirstL;

    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))

      return false;

    bool CanNegateR;

    bool MustBeFirstR;

    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))

      return false;


    if (MustBeFirstL && MustBeFirstR)

      return false;


    if (IsOR) {

      // For an OR expression we need to be able to naturally negate at least

      // one side or we cannot do the transformation at all.

      if (!CanNegateL && !CanNegateR)

        return false;

      // If we the result of the OR will be negated and we can naturally negate

      // the leaves, then this sub-tree as a whole negates naturally.

      CanNegate = WillNegate && CanNegateL && CanNegateR;

      // If we cannot naturally negate the whole sub-tree, then this must be

      // emitted first.

      MustBeFirst = !CanNegate;

    } else {

      assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");

      // We cannot naturally negate an AND operation.

      CanNegate = false;

      MustBeFirst = MustBeFirstL || MustBeFirstR;

    }

    return true;

  }

  return false;

}


MachineInstr *AArch64InstructionSelector::emitConditionalComparison(

    Register LHS, Register RHS, CmpInst::Predicate CC,

    AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,

    MachineIRBuilder &MIB) const {

  auto &MRI = *MIB.getMRI();

  LLT OpTy = MRI.getType(LHS);

  unsigned CCmpOpc;

  std::optional<ValueAndVReg> C;

  if (CmpInst::isIntPredicate(CC)) {

    assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);

    C = getIConstantVRegValWithLookThrough(RHS, MRI);

    if (!C || C->Value.sgt(31) || C->Value.slt(-31))

      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;

    else if (C->Value.ule(31))

      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;

    else

      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;

  } else {

    assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||

           OpTy.getSizeInBits() == 64);

    switch (OpTy.getSizeInBits()) {

    case 16:

      assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");

      CCmpOpc = AArch64::FCCMPHrr;

      break;

    case 32:

      CCmpOpc = AArch64::FCCMPSrr;

      break;

    case 64:

      CCmpOpc = AArch64::FCCMPDrr;

      break;

    default:

      return nullptr;

    }

  }

  AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);

  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);

  auto CCmp =

      MIB.buildInstr(CCmpOpc, {}, {LHS});

  if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)

    CCmp.addImm(C->Value.getZExtValue());

  else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)

    CCmp.addImm(C->Value.abs().getZExtValue());

  else

    CCmp.addReg(RHS);

  CCmp.addImm(NZCV).addImm(Predicate);

  constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);

  return &*CCmp;

}


MachineInstr *AArch64InstructionSelector::emitConjunctionRec(

    Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,

    AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {

  // We're at a tree leaf, produce a conditional comparison operation.

  auto &MRI = *MIB.getMRI();

  MachineInstr *ValDef = MRI.getVRegDef(Val);

  unsigned Opcode = ValDef->getOpcode();

  if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {

    Register LHS = Cmp->getLHSReg();

    Register RHS = Cmp->getRHSReg();

    CmpInst::Predicate CC = Cmp->getCond();

    if (Negate)

      CC = CmpInst::getInversePredicate(CC);

    if (isa<GICmp>(Cmp)) {

      OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());

    } else {

      // Handle special FP cases.

      AArch64CC::CondCode ExtraCC;

      changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);

      // Some floating point conditions can't be tested with a single condition

      // code. Construct an additional comparison in this case.

      if (ExtraCC != AArch64CC::AL) {

        MachineInstr *ExtraCmp;

        if (!CCOp)

          ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);

        else

          ExtraCmp =

              emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);

        CCOp = ExtraCmp->getOperand(0).getReg();

        Predicate = ExtraCC;

      }

    }


    // Produce a normal comparison if we are first in the chain

    if (!CCOp) {

      auto Dst = MRI.cloneVirtualRegister(LHS);

      if (isa<GICmp>(Cmp))

        return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);

      return emitFPCompare(Cmp->getOperand(2).getReg(),

                           Cmp->getOperand(3).getReg(), MIB);

    }

    // Otherwise produce a ccmp.

    return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);

  }

  assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");


  bool IsOR = Opcode == TargetOpcode::G_OR;


  Register LHS = ValDef->getOperand(1).getReg();

  bool CanNegateL;

  bool MustBeFirstL;

  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);

  assert(ValidL && "Valid conjunction/disjunction tree");

  (void)ValidL;


  Register RHS = ValDef->getOperand(2).getReg();

  bool CanNegateR;

  bool MustBeFirstR;

  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);

  assert(ValidR && "Valid conjunction/disjunction tree");

  (void)ValidR;


  // Swap sub-tree that must come first to the right side.

  if (MustBeFirstL) {

    assert(!MustBeFirstR && "Valid conjunction/disjunction tree");

    std::swap(LHS, RHS);

    std::swap(CanNegateL, CanNegateR);

    std::swap(MustBeFirstL, MustBeFirstR);

  }


  bool NegateR;

  bool NegateAfterR;

  bool NegateL;

  bool NegateAfterAll;

  if (Opcode == TargetOpcode::G_OR) {

    // Swap the sub-tree that we can negate naturally to the left.

    if (!CanNegateL) {

      assert(CanNegateR && "at least one side must be negatable");

      assert(!MustBeFirstR && "invalid conjunction/disjunction tree");

      assert(!Negate);

      std::swap(LHS, RHS);

      NegateR = false;

      NegateAfterR = true;

    } else {

      // Negate the left sub-tree if possible, otherwise negate the result.

      NegateR = CanNegateR;

      NegateAfterR = !CanNegateR;

    }

    NegateL = true;

    NegateAfterAll = !Negate;

  } else {

    assert(Opcode == TargetOpcode::G_AND &&

           "Valid conjunction/disjunction tree");

    assert(!Negate && "Valid conjunction/disjunction tree");


    NegateL = false;

    NegateR = false;

    NegateAfterR = false;

    NegateAfterAll = false;

  }


  // Emit sub-trees.

  AArch64CC::CondCode RHSCC;

  MachineInstr *CmpR =

      emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);

  if (NegateAfterR)

    RHSCC = AArch64CC::getInvertedCondCode(RHSCC);

  MachineInstr *CmpL = emitConjunctionRec(

      LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);

  if (NegateAfterAll)

    OutCC = AArch64CC::getInvertedCondCode(OutCC);

  return CmpL;

}


MachineInstr *AArch64InstructionSelector::emitConjunction(

    Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {

  bool DummyCanNegate;

  bool DummyMustBeFirst;

  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,

                          *MIB.getMRI()))

    return nullptr;

  return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);

}


bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,

                                                         MachineInstr &CondMI) {

  AArch64CC::CondCode AArch64CC;

  MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);

  if (!ConjMI)

    return false;


  emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);

  SelI.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {

  MachineRegisterInfo &MRI = *MIB.getMRI();

  // We want to recognize this pattern:

  //

  // $z = G_FCMP pred, $x, $y

  // ...

  // $w = G_SELECT $z, $a, $b

  //

  // Where the value of $z is *only* ever used by the G_SELECT (possibly with

  // some copies/truncs in between.)

  //

  // If we see this, then we can emit something like this:

  //

  // fcmp $x, $y

  // fcsel $w, $a, $b, pred

  //

  // Rather than emitting both of the rather long sequences in the standard

  // G_FCMP/G_SELECT select methods.


  // First, check if the condition is defined by a compare.

  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());


  // We can only fold if all of the defs have one use.

  Register CondDefReg = CondDef->getOperand(0).getReg();

  if (!MRI.hasOneNonDBGUse(CondDefReg)) {

    // Unless it's another select.

    for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {

      if (CondDef == &UI)

        continue;

      if (UI.getOpcode() != TargetOpcode::G_SELECT)

        return false;

    }

  }


  // Is the condition defined by a compare?

  unsigned CondOpc = CondDef->getOpcode();

  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {

    if (tryOptSelectConjunction(I, *CondDef))

      return true;

    return false;

  }


  AArch64CC::CondCode CondCode;

  if (CondOpc == TargetOpcode::G_ICMP) {

    auto &PredOp = CondDef->getOperand(1);

    emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,

                       MIB);

    auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());

    CondCode =

        changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);

  } else {

    // Get the condition code for the select.

    auto Pred =

        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());

    AArch64CC::CondCode CondCode2;

    changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);


    // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two

    // instructions to emit the comparison.

    // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be

    // unnecessary.

    if (CondCode2 != AArch64CC::AL)

      return false;


    if (!emitFPCompare(CondDef->getOperand(2).getReg(),

                       CondDef->getOperand(3).getReg(), MIB)) {

      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");

      return false;

    }

  }


  // Emit the select.

  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),

             I.getOperand(3).getReg(), CondCode, MIB);

  I.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(

    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,

    MachineIRBuilder &MIRBuilder) const {

  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&

         "Unexpected MachineOperand");

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

  // We want to find this sort of thing:

  // x = G_SUB 0, y

  // G_ICMP z, x

  //

  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.

  // e.g:

  //

  // cmn z, y


  // Check if the RHS or LHS of the G_ICMP is defined by a SUB

  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);

  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);

  auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());


  // Given this:

  //

  // x = G_SUB 0, y

  // G_ICMP z, x

  //

  // Produce this:

  //

  // cmn z, y

  if (isCMN(RHSDef, P, MRI))

    return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);


  // Same idea here, but with the LHS of the compare instead:

  //

  // Given this:

  //

  // x = G_SUB 0, y

  // G_ICMP x, z

  //

  // Produce this:

  //

  // cmn y, z

  //

  // But be careful! We need to swap the predicate!

  if (isCMN(LHSDef, P, MRI)) {

    if (!CmpInst::isEquality(P)) {

      P = CmpInst::getSwappedPredicate(P);

      Predicate = MachineOperand::CreatePredicate(P);

    }

    return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);

  }


  // Given this:

  //

  // z = G_AND x, y

  // G_ICMP z, 0

  //

  // Produce this if the compare is signed:

  //

  // tst x, y

  if (!CmpInst::isUnsigned(P) && LHSDef &&

      LHSDef->getOpcode() == TargetOpcode::G_AND) {

    // Make sure that the RHS is 0.

    auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);

    if (!ValAndVReg || ValAndVReg->Value != 0)

      return nullptr;


    return emitTST(LHSDef->getOperand(1),

                   LHSDef->getOperand(2), MIRBuilder);

  }


  return nullptr;

}


bool AArch64InstructionSelector::selectShuffleVector(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  Register Src1Reg = I.getOperand(1).getReg();

  const LLT Src1Ty = MRI.getType(Src1Reg);

  Register Src2Reg = I.getOperand(2).getReg();

  const LLT Src2Ty = MRI.getType(Src2Reg);

  ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();


  MachineBasicBlock &MBB = *I.getParent();

  MachineFunction &MF = *MBB.getParent();

  LLVMContext &Ctx = MF.getFunction().getContext();


  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if

  // it's originated from a <1 x T> type. Those should have been lowered into

  // G_BUILD_VECTOR earlier.

  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {

    LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");

    return false;

  }


  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;


  SmallVector<Constant *, 64> CstIdxs;

  for (int Val : Mask) {

    // For now, any undef indexes we'll just assume to be 0. This should be

    // optimized in future, e.g. to select DUP etc.

    Val = Val < 0 ? 0 : Val;

    for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {

      unsigned Offset = Byte + Val * BytesPerElt;

      CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));

    }

  }


  // Use a constant pool to load the index vector for TBL.

  Constant *CPVal = ConstantVector::get(CstIdxs);

  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);

  if (!IndexLoad) {

    LLVM_DEBUG(dbgs() << "Could not load from a constant pool");

    return false;

  }


  if (DstTy.getSizeInBits() != 128) {

    assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");

    // This case can be done with TBL1.

    MachineInstr *Concat =

        emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);

    if (!Concat) {

      LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");

      return false;

    }


    // The constant pool load will be 64 bits, so need to convert to FPR128 reg.

    IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,

                                   IndexLoad->getOperand(0).getReg(), MIB);


    auto TBL1 = MIB.buildInstr(

        AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},

        {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});

    constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);


    auto Copy =

        MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})

            .addReg(TBL1.getReg(0), 0, AArch64::dsub);

    RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }


  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive

  // Q registers for regalloc.

  SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};

  auto RegSeq = createQTuple(Regs, MIB);

  auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},

                             {RegSeq, IndexLoad->getOperand(0)});

  constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::emitLaneInsert(

    std::optional<Register> DstReg, Register SrcReg, Register EltReg,

    unsigned LaneIdx, const RegisterBank &RB,

    MachineIRBuilder &MIRBuilder) const {

  MachineInstr *InsElt = nullptr;

  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;

  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();


  // Create a register to define with the insert if one wasn't passed in.

  if (!DstReg)

    DstReg = MRI.createVirtualRegister(DstRC);


  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();

  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;


  if (RB.getID() == AArch64::FPRRegBankID) {

    auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);

    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})

                 .addImm(LaneIdx)

                 .addUse(InsSub->getOperand(0).getReg())

                 .addImm(0);

  } else {

    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})

                 .addImm(LaneIdx)

                 .addUse(EltReg);

  }


  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);

  return InsElt;

}


bool AArch64InstructionSelector::selectUSMovFromExtend(

    MachineInstr &MI, MachineRegisterInfo &MRI) {

  if (MI.getOpcode() != TargetOpcode::G_SEXT &&

      MI.getOpcode() != TargetOpcode::G_ZEXT &&

      MI.getOpcode() != TargetOpcode::G_ANYEXT)

    return false;

  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;

  const Register DefReg = MI.getOperand(0).getReg();

  const LLT DstTy = MRI.getType(DefReg);

  unsigned DstSize = DstTy.getSizeInBits();


  if (DstSize != 32 && DstSize != 64)

    return false;


  MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,

                                       MI.getOperand(1).getReg(), MRI);

  int64_t Lane;

  if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))

    return false;

  Register Src0 = Extract->getOperand(1).getReg();


  const LLT VecTy = MRI.getType(Src0);

  if (VecTy.isScalableVector())

    return false;


  if (VecTy.getSizeInBits() != 128) {

    const MachineInstr *ScalarToVector = emitScalarToVector(

        VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);

    assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");

    Src0 = ScalarToVector->getOperand(0).getReg();

  }


  unsigned Opcode;

  if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)

    Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;

  else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)

    Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;

  else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)

    Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;

  else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)

    Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;

  else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)

    Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;

  else

    llvm_unreachable("Unexpected type combo for S/UMov!");


  // We may need to generate one of these, depending on the type and sign of the

  // input:

  //  DstReg = SMOV Src0, Lane;

  //  NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;

  MachineInstr *ExtI = nullptr;

  if (DstSize == 64 && !IsSigned) {

    Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

    MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);

    ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})

               .addImm(0)

               .addUse(NewReg)

               .addImm(AArch64::sub_32);

    RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);

  } else

    ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);


  constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);

  MI.eraseFromParent();

  return true;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {

  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = AArch64::MOVIv16b_ns;

  } else {

    Op = AArch64::MOVIv8b_ns;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();


  if (AArch64_AM::isAdvSIMDModImmType9(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType9(Val);

    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);

    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

    return &*Mov;

  }

  return nullptr;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,

    bool Inv) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;

  } else {

    Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  uint64_t Shift;


  if (AArch64_AM::isAdvSIMDModImmType5(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType5(Val);

    Shift = 0;

  } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType6(Val);

    Shift = 8;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,

    bool Inv) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;

  } else {

    Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  uint64_t Shift;


  if ((AArch64_AM::isAdvSIMDModImmType1(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType1(Val);

    Shift = 0;

  } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType2(Val);

    Shift = 8;

  } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType3(Val);

    Shift = 16;

  } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {

    Val = AArch64_AM::encodeAdvSIMDModImmType4(Val);

    Shift = 24;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = AArch64::MOVIv2d_ns;

  } else {

    Op = AArch64::MOVID;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  if (AArch64_AM::isAdvSIMDModImmType10(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType10(Val);

    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);

    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

    return &*Mov;

  }

  return nullptr;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,

    bool Inv) {


  unsigned int Op;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;

  } else {

    Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();

  uint64_t Shift;


  if (AArch64_AM::isAdvSIMDModImmType7(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);

    Shift = 264;

  } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType8(Val);

    Shift = 272;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(

    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {


  unsigned int Op;

  bool IsWide = false;

  if (DstSize == 128) {

    if (Bits.getHiBits(64) != Bits.getLoBits(64))

      return nullptr;

    Op = AArch64::FMOVv4f32_ns;

    IsWide = true;

  } else {

    Op = AArch64::FMOVv2f32_ns;

  }


  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();


  if (AArch64_AM::isAdvSIMDModImmType11(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType11(Val);

  } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {

    Val = AArch64_AM::encodeAdvSIMDModImmType12(Val);

    Op = AArch64::FMOVv2f64_ns;

  } else

    return nullptr;


  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);

  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

  return &*Mov;

}


bool AArch64InstructionSelector::selectIndexedExtLoad(

    MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);

  Register Dst = ExtLd.getDstReg();

  Register WriteBack = ExtLd.getWritebackReg();

  Register Base = ExtLd.getBaseReg();

  Register Offset = ExtLd.getOffsetReg();

  LLT Ty = MRI.getType(Dst);

  assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.

  unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();

  bool IsPre = ExtLd.isPre();

  bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);

  unsigned InsertIntoSubReg = 0;

  bool IsDst64 = Ty.getSizeInBits() == 64;


  // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so

  // long as they are scalar.

  bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;

  if ((IsSExt && IsFPR) || Ty.isVector())

    return false;


  unsigned Opc = 0;

  LLT NewLdDstTy;

  LLT s32 = LLT::scalar(32);

  LLT s64 = LLT::scalar(64);


  if (MemSizeBits == 8) {

    if (IsSExt) {

      if (IsDst64)

        Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;

      else

        Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;

      NewLdDstTy = IsDst64 ? s64 : s32;

    } else if (IsFPR) {

      Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;

      InsertIntoSubReg = AArch64::bsub;

      NewLdDstTy = LLT::scalar(MemSizeBits);

    } else {

      Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;

      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;

      NewLdDstTy = s32;

    }

  } else if (MemSizeBits == 16) {

    if (IsSExt) {

      if (IsDst64)

        Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;

      else

        Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;

      NewLdDstTy = IsDst64 ? s64 : s32;

    } else if (IsFPR) {

      Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;

      InsertIntoSubReg = AArch64::hsub;

      NewLdDstTy = LLT::scalar(MemSizeBits);

    } else {

      Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;

      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;

      NewLdDstTy = s32;

    }

  } else if (MemSizeBits == 32) {

    if (IsSExt) {

      Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;

      NewLdDstTy = s64;

    } else if (IsFPR) {

      Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;

      InsertIntoSubReg = AArch64::ssub;

      NewLdDstTy = LLT::scalar(MemSizeBits);

    } else {

      Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;

      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;

      NewLdDstTy = s32;

    }

  } else {

    llvm_unreachable("Unexpected size for indexed load");

  }


  auto Cst = getIConstantVRegVal(Offset, MRI);

  if (!Cst)

    return false; // Shouldn't happen, but just in case.


  auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})

                  .addImm(Cst->getSExtValue());

  LdMI.cloneMemRefs(ExtLd);

  constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);

  // Make sure to select the load with the MemTy as the dest type, and then

  // insert into a larger reg if needed.

  if (InsertIntoSubReg) {

    // Generate a SUBREG_TO_REG.

    auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})

                        .addImm(0)

                        .addUse(LdMI.getReg(1))

                        .addImm(InsertIntoSubReg);

    RBI.constrainGenericRegister(

        SubToReg.getReg(0),

        *getRegClassForTypeOnBank(MRI.getType(Dst),

                                  *RBI.getRegBank(Dst, MRI, TRI)),

        MRI);

  } else {

    auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));

    selectCopy(*Copy, TII, MRI, TRI, RBI);

  }

  MI.eraseFromParent();


  return true;

}


bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,

                                                   MachineRegisterInfo &MRI) {

  auto &Ld = cast<GIndexedLoad>(MI);

  Register Dst = Ld.getDstReg();

  Register WriteBack = Ld.getWritebackReg();

  Register Base = Ld.getBaseReg();

  Register Offset = Ld.getOffsetReg();

  assert(MRI.getType(Dst).getSizeInBits() <= 128 &&

         "Unexpected type for indexed load");

  unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();


  if (MemSize < MRI.getType(Dst).getSizeInBytes())

    return selectIndexedExtLoad(MI, MRI);


  unsigned Opc = 0;

  if (Ld.isPre()) {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,

        AArch64::LDRXpre};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,

        AArch64::LDRQpre};

    if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(MemSize)];

    else

      Opc = GPROpcodes[Log2_32(MemSize)];

  } else {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,

        AArch64::LDRXpost};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,

        AArch64::LDRDpost, AArch64::LDRQpost};

    if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(MemSize)];

    else

      Opc = GPROpcodes[Log2_32(MemSize)];

  }

  auto Cst = getIConstantVRegVal(Offset, MRI);

  if (!Cst)

    return false; // Shouldn't happen, but just in case.

  auto LdMI =

      MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());

  LdMI.cloneMemRefs(Ld);

  constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);

  MI.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,

                                                    MachineRegisterInfo &MRI) {

  Register Dst = I.getWritebackReg();

  Register Val = I.getValueReg();

  Register Base = I.getBaseReg();

  Register Offset = I.getOffsetReg();

  LLT ValTy = MRI.getType(Val);

  assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");


  unsigned Opc = 0;

  if (I.isPre()) {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,

        AArch64::STRXpre};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,

        AArch64::STRQpre};


    if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];

    else

      Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];

  } else {

    static constexpr unsigned GPROpcodes[] = {

        AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,

        AArch64::STRXpost};

    static constexpr unsigned FPROpcodes[] = {

        AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,

        AArch64::STRDpost, AArch64::STRQpost};


    if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)

      Opc = FPROpcodes[Log2_32(ValTy.getSizeInBytes())];

    else

      Opc = GPROpcodes[Log2_32(ValTy.getSizeInBytes())];

  }


  auto Cst = getIConstantVRegVal(Offset, MRI);

  if (!Cst)

    return false; // Shouldn't happen, but just in case.

  auto Str =

      MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());

  Str.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Str, TII, TRI, RBI);

  I.eraseFromParent();

  return true;

}


MachineInstr *

AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,

                                               MachineIRBuilder &MIRBuilder,

                                               MachineRegisterInfo &MRI) {

  LLT DstTy = MRI.getType(Dst);

  unsigned DstSize = DstTy.getSizeInBits();

  if (CV->isNullValue()) {

    if (DstSize == 128) {

      auto Mov =

          MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);

      constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);

      return &*Mov;

    }


    if (DstSize == 64) {

      auto Mov =

          MIRBuilder

              .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})

              .addImm(0);

      auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})

                      .addReg(Mov.getReg(0), 0, AArch64::dsub);

      RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);

      return &*Copy;

    }

  }


  if (Constant *SplatValue = CV->getSplatValue()) {

    APInt SplatValueAsInt =

        isa<ConstantFP>(SplatValue)

            ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()

            : SplatValue->getUniqueInteger();

    APInt DefBits = APInt::getSplat(

        DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));

    auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {

      MachineInstr *NewOp;

      bool Inv = false;

      if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||

          (NewOp =

               tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp =

               tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp =

               tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||

          (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))

        return NewOp;


      DefBits = ~DefBits;

      Inv = true;

      if ((NewOp =

               tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp =

               tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||

          (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))

        return NewOp;

      return nullptr;

    };


    if (auto *NewOp = TryMOVIWithBits(DefBits))

      return NewOp;


    // See if a fneg of the constant can be materialized with a MOVI, etc

    auto TryWithFNeg = [&](APInt DefBits, int NumBits,

                           unsigned NegOpc) -> MachineInstr * {

      // FNegate each sub-element of the constant

      APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);

      APInt NegBits(DstSize, 0);

      unsigned NumElts = DstSize / NumBits;

      for (unsigned i = 0; i < NumElts; i++)

        NegBits |= Neg << (NumBits * i);

      NegBits = DefBits ^ NegBits;


      // Try to create the new constants with MOVI, and if so generate a fneg

      // for it.

      if (auto *NewOp = TryMOVIWithBits(NegBits)) {

        Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);

        NewOp->getOperand(0).setReg(NewDst);

        return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});

      }

      return nullptr;

    };

    MachineInstr *R;

    if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||

        (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||

        (STI.hasFullFP16() &&

         (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))

      return R;

  }


  auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);

  if (!CPLoad) {

    LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");

    return nullptr;

  }


  auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));

  RBI.constrainGenericRegister(

      Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);

  return &*Copy;

}


bool AArch64InstructionSelector::tryOptConstantBuildVec(

    MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

  unsigned DstSize = DstTy.getSizeInBits();

  assert(DstSize <= 128 && "Unexpected build_vec type!");

  if (DstSize < 32)

    return false;

  // Check if we're building a constant vector, in which case we want to

  // generate a constant pool load instead of a vector insert sequence.

  SmallVector<Constant *, 16> Csts;

  for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {

    // Try to find G_CONSTANT or G_FCONSTANT

    auto *OpMI =

        getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);

    if (OpMI)

      Csts.emplace_back(

          const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));

    else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,

                                  I.getOperand(Idx).getReg(), MRI)))

      Csts.emplace_back(

          const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));

    else

      return false;

  }

  Constant *CV = ConstantVector::get(Csts);

  if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))

    return false;

  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  // Given:

  //  %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef

  //

  // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.

  Register Dst = I.getOperand(0).getReg();

  Register EltReg = I.getOperand(1).getReg();

  LLT EltTy = MRI.getType(EltReg);

  // If the index isn't on the same bank as its elements, then this can't be a

  // SUBREG_TO_REG.

  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);

  const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);

  if (EltRB != DstRB)

    return false;

  if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {

        return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);

      }))

    return false;

  unsigned SubReg;

  const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);

  if (!EltRC)

    return false;

  const TargetRegisterClass *DstRC =

      getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);

  if (!DstRC)

    return false;

  if (!getSubRegForClass(EltRC, TRI, SubReg))

    return false;

  auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})

                         .addImm(0)

                         .addUse(EltReg)

                         .addImm(SubReg);

  I.eraseFromParent();

  constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);

  return RBI.constrainGenericRegister(Dst, *DstRC, MRI);

}


bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,

                                                   MachineRegisterInfo &MRI) {

  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

  // Until we port more of the optimized selections, for now just use a vector

  // insert sequence.

  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());

  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());

  unsigned EltSize = EltTy.getSizeInBits();


  if (tryOptConstantBuildVec(I, DstTy, MRI))

    return true;

  if (tryOptBuildVecToSubregToReg(I, MRI))

    return true;


  if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)

    return false; // Don't support all element types yet.

  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);


  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;

  MachineInstr *ScalarToVec =

      emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,

                         I.getOperand(1).getReg(), MIB);

  if (!ScalarToVec)

    return false;


  Register DstVec = ScalarToVec->getOperand(0).getReg();

  unsigned DstSize = DstTy.getSizeInBits();


  // Keep track of the last MI we inserted. Later on, we might be able to save

  // a copy using it.

  MachineInstr *PrevMI = ScalarToVec;

  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {

    // Note that if we don't do a subregister copy, we can end up making an

    // extra register.

    Register OpReg = I.getOperand(i).getReg();

    // Do not emit inserts for undefs

    if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {

      PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);

      DstVec = PrevMI->getOperand(0).getReg();

    }

  }


  // If DstTy's size in bits is less than 128, then emit a subregister copy

  // from DstVec to the last register we've defined.

  if (DstSize < 128) {

    // Force this to be FPR using the destination vector.

    const TargetRegisterClass *RC =

        getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));

    if (!RC)

      return false;

    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {

      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");

      return false;

    }


    unsigned SubReg = 0;

    if (!getSubRegForClass(RC, TRI, SubReg))

      return false;

    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {

      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize

                        << "\n");

      return false;

    }


    Register Reg = MRI.createVirtualRegister(RC);

    Register DstReg = I.getOperand(0).getReg();


    MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);

    MachineOperand &RegOp = I.getOperand(1);

    RegOp.setReg(Reg);

    RBI.constrainGenericRegister(DstReg, *RC, MRI);

  } else {

    // We either have a vector with all elements (except the first one) undef or

    // at least one non-undef non-first element. In the first case, we need to

    // constrain the output register ourselves as we may have generated an

    // INSERT_SUBREG operation which is a generic operation for which the

    // output regclass cannot be automatically chosen.

    //

    // In the second case, there is no need to do this as it may generate an

    // instruction like INSvi32gpr where the regclass can be automatically

    // chosen.

    //

    // Also, we save a copy by re-using the destination register on the final

    // insert.

    PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());

    constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);


    Register DstReg = PrevMI->getOperand(0).getReg();

    if (PrevMI == ScalarToVec && DstReg.isVirtual()) {

      const TargetRegisterClass *RC =

          getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));

      RBI.constrainGenericRegister(DstReg, *RC, MRI);

    }

  }


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,

                                                           unsigned NumVecs,

                                                           MachineInstr &I) {

  assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);

  assert(Opc && "Expected an opcode?");

  assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");

  auto &MRI = *MIB.getMRI();

  LLT Ty = MRI.getType(I.getOperand(0).getReg());

  unsigned Size = Ty.getSizeInBits();

  assert((Size == 64 || Size == 128) &&

         "Destination must be 64 bits or 128 bits?");

  unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;

  auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();

  assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");

  auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});

  Load.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);

  Register SelectedLoadDst = Load->getOperand(0).getReg();

  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

    auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})

                   .addReg(SelectedLoadDst, 0, SubReg + Idx);

    // Emit the subreg copies and immediately select them.

    // FIXME: We should refactor our copy code into an emitCopy helper and

    // clean up uses of this pattern elsewhere in the selector.

    selectCopy(*Vec, TII, MRI, TRI, RBI);

  }

  return true;

}


bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(

    unsigned Opc, unsigned NumVecs, MachineInstr &I) {

  assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);

  assert(Opc && "Expected an opcode?");

  assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");

  auto &MRI = *MIB.getMRI();

  LLT Ty = MRI.getType(I.getOperand(0).getReg());

  bool Narrow = Ty.getSizeInBits() == 64;


  auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;

  SmallVector<Register, 4> Regs(NumVecs);

  std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),

                 [](auto MO) { return MO.getReg(); });


  if (Narrow) {

    transform(Regs, Regs.begin(), [this](Register Reg) {

      return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)

          ->getOperand(0)

          .getReg();

    });

    Ty = Ty.multiplyElements(2);

  }


  Register Tuple = createQTuple(Regs, MIB);

  auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);

  if (!LaneNo)

    return false;


  Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();

  auto Load = MIB.buildInstr(Opc, {Ty}, {})

                  .addReg(Tuple)

                  .addImm(LaneNo->getZExtValue())

                  .addReg(Ptr);

  Load.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);

  Register SelectedLoadDst = Load->getOperand(0).getReg();

  unsigned SubReg = AArch64::qsub0;

  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {

    auto Vec = MIB.buildInstr(TargetOpcode::COPY,

                              {Narrow ? DstOp(&AArch64::FPR128RegClass)

                                      : DstOp(I.getOperand(Idx).getReg())},

                              {})

                   .addReg(SelectedLoadDst, 0, SubReg + Idx);

    Register WideReg = Vec.getReg(0);

    // Emit the subreg copies and immediately select them.

    selectCopy(*Vec, TII, MRI, TRI, RBI);

    if (Narrow &&

        !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))

      return false;

  }

  return true;

}


void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,

                                                            unsigned NumVecs,

                                                            unsigned Opc) {

  MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();

  LLT Ty = MRI.getType(I.getOperand(1).getReg());

  Register Ptr = I.getOperand(1 + NumVecs).getReg();


  SmallVector<Register, 2> Regs(NumVecs);

  std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,

                 Regs.begin(), [](auto MO) { return MO.getReg(); });


  Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)

                                             : createDTuple(Regs, MIB);

  auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});

  Store.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);

}


bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(

    MachineInstr &I, unsigned NumVecs, unsigned Opc) {

  MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();

  LLT Ty = MRI.getType(I.getOperand(1).getReg());

  bool Narrow = Ty.getSizeInBits() == 64;


  SmallVector<Register, 2> Regs(NumVecs);

  std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,

                 Regs.begin(), [](auto MO) { return MO.getReg(); });


  if (Narrow)

    transform(Regs, Regs.begin(), [this](Register Reg) {

      return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)

          ->getOperand(0)

          .getReg();

    });


  Register Tuple = createQTuple(Regs, MIB);


  auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);

  if (!LaneNo)

    return false;

  Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();

  auto Store = MIB.buildInstr(Opc, {}, {})

                   .addReg(Tuple)

                   .addImm(LaneNo->getZExtValue())

                   .addReg(Ptr);

  Store.cloneMemRefs(I);

  constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);

  return true;

}


bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(

    MachineInstr &I, MachineRegisterInfo &MRI) {

  // Find the intrinsic ID.

  unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();


  const LLT S8 = LLT::scalar(8);

  const LLT S16 = LLT::scalar(16);

  const LLT S32 = LLT::scalar(32);

  const LLT S64 = LLT::scalar(64);

  const LLT P0 = LLT::pointer(0, 64);

  // Select the instruction.

  switch (IntrinID) {

  default:

    return false;

  case Intrinsic::aarch64_ldxp:

  case Intrinsic::aarch64_ldaxp: {

    auto NewI = MIB.buildInstr(

        IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,

        {I.getOperand(0).getReg(), I.getOperand(1).getReg()},

        {I.getOperand(3)});

    NewI.cloneMemRefs(I);

    constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);

    break;

  }

  case Intrinsic::aarch64_neon_ld1x2: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD1Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD1Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD1Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD1Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD1Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD1Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD1Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Twov1d;

    else

      llvm_unreachable("Unexpected type for ld1x2!");

    selectVectorLoadIntrinsic(Opc, 2, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld1x3: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD1Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD1Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD1Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD1Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD1Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD1Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD1Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Threev1d;

    else

      llvm_unreachable("Unexpected type for ld1x3!");

    selectVectorLoadIntrinsic(Opc, 3, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld1x4: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD1Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD1Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD1Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD1Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD1Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD1Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD1Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Fourv1d;

    else

      llvm_unreachable("Unexpected type for ld1x4!");

    selectVectorLoadIntrinsic(Opc, 4, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld2: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD2Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD2Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD2Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD2Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD2Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD2Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD2Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Twov1d;

    else

      llvm_unreachable("Unexpected type for ld2!");

    selectVectorLoadIntrinsic(Opc, 2, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld2lane: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD2i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD2i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD2i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::LD2i64;

    else

      llvm_unreachable("Unexpected type for st2lane!");

    if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_ld2r: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD2Rv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD2Rv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD2Rv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD2Rv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD2Rv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD2Rv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD2Rv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD2Rv1d;

    else

      llvm_unreachable("Unexpected type for ld2r!");

    selectVectorLoadIntrinsic(Opc, 2, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld3: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD3Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD3Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD3Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD3Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD3Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD3Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD3Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Threev1d;

    else

      llvm_unreachable("Unexpected type for ld3!");

    selectVectorLoadIntrinsic(Opc, 3, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld3lane: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD3i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD3i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD3i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::LD3i64;

    else

      llvm_unreachable("Unexpected type for st3lane!");

    if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_ld3r: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD3Rv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD3Rv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD3Rv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD3Rv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD3Rv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD3Rv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD3Rv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD3Rv1d;

    else

      llvm_unreachable("Unexpected type for ld3r!");

    selectVectorLoadIntrinsic(Opc, 3, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld4: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD4Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD4Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD4Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD4Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD4Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD4Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD4Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD1Fourv1d;

    else

      llvm_unreachable("Unexpected type for ld4!");

    selectVectorLoadIntrinsic(Opc, 4, I);

    break;

  }

  case Intrinsic::aarch64_neon_ld4lane: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD4i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD4i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD4i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::LD4i64;

    else

      llvm_unreachable("Unexpected type for st4lane!");

    if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_ld4r: {

    LLT Ty = MRI.getType(I.getOperand(0).getReg());

    unsigned Opc = 0;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::LD4Rv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::LD4Rv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::LD4Rv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::LD4Rv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::LD4Rv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::LD4Rv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::LD4Rv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::LD4Rv1d;

    else

      llvm_unreachable("Unexpected type for ld4r!");

    selectVectorLoadIntrinsic(Opc, 4, I);

    break;

  }

  case Intrinsic::aarch64_neon_st1x2: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST1Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST1Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST1Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST1Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST1Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST1Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST1Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Twov1d;

    else

      llvm_unreachable("Unexpected type for st1x2!");

    selectVectorStoreIntrinsic(I, 2, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st1x3: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST1Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST1Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST1Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST1Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST1Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST1Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST1Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Threev1d;

    else

      llvm_unreachable("Unexpected type for st1x3!");

    selectVectorStoreIntrinsic(I, 3, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st1x4: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST1Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST1Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST1Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST1Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST1Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST1Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST1Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Fourv1d;

    else

      llvm_unreachable("Unexpected type for st1x4!");

    selectVectorStoreIntrinsic(I, 4, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st2: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST2Twov8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST2Twov16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST2Twov4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST2Twov8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST2Twov2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST2Twov4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST2Twov2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Twov1d;

    else

      llvm_unreachable("Unexpected type for st2!");

    selectVectorStoreIntrinsic(I, 2, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st3: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST3Threev8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST3Threev16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST3Threev4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST3Threev8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST3Threev2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST3Threev4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST3Threev2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Threev1d;

    else

      llvm_unreachable("Unexpected type for st3!");

    selectVectorStoreIntrinsic(I, 3, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st4: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8))

      Opc = AArch64::ST4Fourv8b;

    else if (Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST4Fourv16b;

    else if (Ty == LLT::fixed_vector(4, S16))

      Opc = AArch64::ST4Fourv4h;

    else if (Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST4Fourv8h;

    else if (Ty == LLT::fixed_vector(2, S32))

      Opc = AArch64::ST4Fourv2s;

    else if (Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST4Fourv4s;

    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))

      Opc = AArch64::ST4Fourv2d;

    else if (Ty == S64 || Ty == P0)

      Opc = AArch64::ST1Fourv1d;

    else

      llvm_unreachable("Unexpected type for st4!");

    selectVectorStoreIntrinsic(I, 4, Opc);

    break;

  }

  case Intrinsic::aarch64_neon_st2lane: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST2i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST2i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST2i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::ST2i64;

    else

      llvm_unreachable("Unexpected type for st2lane!");

    if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_st3lane: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST3i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST3i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST3i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::ST3i64;

    else

      llvm_unreachable("Unexpected type for st3lane!");

    if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))

      return false;

    break;

  }

  case Intrinsic::aarch64_neon_st4lane: {

    LLT Ty = MRI.getType(I.getOperand(1).getReg());

    unsigned Opc;

    if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))

      Opc = AArch64::ST4i8;

    else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))

      Opc = AArch64::ST4i16;

    else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))

      Opc = AArch64::ST4i32;

    else if (Ty == LLT::fixed_vector(2, S64) ||

             Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)

      Opc = AArch64::ST4i64;

    else

      llvm_unreachable("Unexpected type for st4lane!");

    if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))

      return false;

    break;

  }

  case Intrinsic::aarch64_mops_memset_tag: {

    // Transform

    //    %dst:gpr(p0) = \

    //      G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),

    //      \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)

    // where %dst is updated, into

    //    %Rd:GPR64common, %Rn:GPR64) = \

    //      MOPSMemorySetTaggingPseudo \

    //      %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64

    // where Rd and Rn are tied.

    // It is expected that %val has been extended to s64 in legalization.

    // Note that the order of the size/value operands are swapped.


    Register DstDef = I.getOperand(0).getReg();

    // I.getOperand(1) is the intrinsic function

    Register DstUse = I.getOperand(2).getReg();

    Register ValUse = I.getOperand(3).getReg();

    Register SizeUse = I.getOperand(4).getReg();


    // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.

    // Therefore an additional virtual register is required for the updated size

    // operand. This value is not accessible via the semantics of the intrinsic.

    Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));


    auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,

                                 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});

    Memset.cloneMemRefs(I);

    constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);

    break;

  }

  }


  I.eraseFromParent();

  return true;

}


bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,

                                                 MachineRegisterInfo &MRI) {

  unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();


  switch (IntrinID) {

  default:

    break;

  case Intrinsic::ptrauth_resign: {

    Register DstReg = I.getOperand(0).getReg();

    Register ValReg = I.getOperand(2).getReg();

    uint64_t AUTKey = I.getOperand(3).getImm();

    Register AUTDisc = I.getOperand(4).getReg();

    uint64_t PACKey = I.getOperand(5).getImm();

    Register PACDisc = I.getOperand(6).getReg();


    Register AUTAddrDisc = AUTDisc;

    uint16_t AUTConstDiscC = 0;

    std::tie(AUTConstDiscC, AUTAddrDisc) =

        extractPtrauthBlendDiscriminators(AUTDisc, MRI);


    Register PACAddrDisc = PACDisc;

    uint16_t PACConstDiscC = 0;

    std::tie(PACConstDiscC, PACAddrDisc) =

        extractPtrauthBlendDiscriminators(PACDisc, MRI);


    MIB.buildCopy({AArch64::X16}, {ValReg});

    MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

    MIB.buildInstr(AArch64::AUTPAC)

        .addImm(AUTKey)

        .addImm(AUTConstDiscC)

        .addUse(AUTAddrDisc)

        .addImm(PACKey)

        .addImm(PACConstDiscC)

        .addUse(PACAddrDisc)

        .constrainAllUses(TII, TRI, RBI);

    MIB.buildCopy({DstReg}, Register(AArch64::X16));


    RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }

  case Intrinsic::ptrauth_auth: {

    Register DstReg = I.getOperand(0).getReg();

    Register ValReg = I.getOperand(2).getReg();

    uint64_t AUTKey = I.getOperand(3).getImm();

    Register AUTDisc = I.getOperand(4).getReg();


    Register AUTAddrDisc = AUTDisc;

    uint16_t AUTConstDiscC = 0;

    std::tie(AUTConstDiscC, AUTAddrDisc) =

        extractPtrauthBlendDiscriminators(AUTDisc, MRI);


    if (STI.isX16X17Safer()) {

      MIB.buildCopy({AArch64::X16}, {ValReg});

      MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

      MIB.buildInstr(AArch64::AUTx16x17)

          .addImm(AUTKey)

          .addImm(AUTConstDiscC)

          .addUse(AUTAddrDisc)

          .constrainAllUses(TII, TRI, RBI);

      MIB.buildCopy({DstReg}, Register(AArch64::X16));

    } else {

      Register ScratchReg =

          MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);

      MIB.buildInstr(AArch64::AUTxMxN)

          .addDef(DstReg)

          .addDef(ScratchReg)

          .addUse(ValReg)

          .addImm(AUTKey)

          .addImm(AUTConstDiscC)

          .addUse(AUTAddrDisc)

          .constrainAllUses(TII, TRI, RBI);

    }


    RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }

  case Intrinsic::frameaddress:

  case Intrinsic::returnaddress: {

    MachineFunction &MF = *I.getParent()->getParent();

    MachineFrameInfo &MFI = MF.getFrameInfo();


    unsigned Depth = I.getOperand(2).getImm();

    Register DstReg = I.getOperand(0).getReg();

    RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);


    if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {

      if (!MFReturnAddr) {

        // Insert the copy from LR/X30 into the entry block, before it can be

        // clobbered by anything.

        MFI.setReturnAddressIsTaken(true);

        MFReturnAddr = getFunctionLiveInPhysReg(

            MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());

      }


      if (STI.hasPAuth()) {

        MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});

      } else {

        MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});

        MIB.buildInstr(AArch64::XPACLRI);

        MIB.buildCopy({DstReg}, {Register(AArch64::LR)});

      }


      I.eraseFromParent();

      return true;

    }


    MFI.setFrameAddressIsTaken(true);

    Register FrameAddr(AArch64::FP);

    while (Depth--) {

      Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);

      auto Ldr =

          MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);

      constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);

      FrameAddr = NextFrame;

    }


    if (IntrinID == Intrinsic::frameaddress)

      MIB.buildCopy({DstReg}, {FrameAddr});

    else {

      MFI.setReturnAddressIsTaken(true);


      if (STI.hasPAuth()) {

        Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

        MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);

        MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});

      } else {

        MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})

            .addImm(1);

        MIB.buildInstr(AArch64::XPACLRI);

        MIB.buildCopy({DstReg}, {Register(AArch64::LR)});

      }

    }


    I.eraseFromParent();

    return true;

  }

  case Intrinsic::aarch64_neon_tbl2:

    SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);

    return true;

  case Intrinsic::aarch64_neon_tbl3:

    SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,

                false);

    return true;

  case Intrinsic::aarch64_neon_tbl4:

    SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);

    return true;

  case Intrinsic::aarch64_neon_tbx2:

    SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);

    return true;

  case Intrinsic::aarch64_neon_tbx3:

    SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);

    return true;

  case Intrinsic::aarch64_neon_tbx4:

    SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);

    return true;

  case Intrinsic::swift_async_context_addr:

    auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},

                              {Register(AArch64::FP)})

                   .addImm(8)

                   .addImm(0);

    constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);


    MF->getFrameInfo().setFrameAddressIsTaken(true);

    MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);

    I.eraseFromParent();

    return true;

  }

  return false;

}


// G_PTRAUTH_GLOBAL_VALUE lowering

//

// We have 3 lowering alternatives to choose from:

// - MOVaddrPAC: similar to MOVaddr, with added PAC.

//   If the GV doesn't need a GOT load (i.e., is locally defined)

//   materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.

//

// - LOADgotPAC: similar to LOADgot, with added PAC.

//   If the GV needs a GOT load, materialize the pointer using the usual

//   GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT

//   section is assumed to be read-only (for example, via relro mechanism). See

//   LowerMOVaddrPAC.

//

// - LOADauthptrstatic: similar to LOADgot, but use a

//   special stub slot instead of a GOT slot.

//   Load a signed pointer for symbol 'sym' from a stub slot named

//   'sym$auth_ptr$key$disc' filled by dynamic linker during relocation

//   resolving. This usually lowers to adrp+ldr, but also emits an entry into

//   .data with an

//   @AUTH relocation. See LowerLOADauthptrstatic.

//

// All 3 are pseudos that are expand late to longer sequences: this lets us

// provide integrity guarantees on the to-be-signed intermediate values.

//

// LOADauthptrstatic is undesirable because it requires a large section filled

// with often similarly-signed pointers, making it a good harvesting target.

// Thus, it's only used for ptrauth references to extern_weak to avoid null

// checks.


bool AArch64InstructionSelector::selectPtrAuthGlobalValue(

    MachineInstr &I, MachineRegisterInfo &MRI) const {

  Register DefReg = I.getOperand(0).getReg();

  Register Addr = I.getOperand(1).getReg();

  uint64_t Key = I.getOperand(2).getImm();

  Register AddrDisc = I.getOperand(3).getReg();

  uint64_t Disc = I.getOperand(4).getImm();

  int64_t Offset = 0;


  if (Key > AArch64PACKey::LAST)

    report_fatal_error("key in ptrauth global out of range [0, " +

                       Twine((int)AArch64PACKey::LAST) + "]");


  // Blend only works if the integer discriminator is 16-bit wide.

  if (!isUInt<16>(Disc))

    report_fatal_error(

        "constant discriminator in ptrauth global out of range [0, 0xffff]");


  // Choosing between 3 lowering alternatives is target-specific.

  if (!STI.isTargetELF() && !STI.isTargetMachO())

    report_fatal_error("ptrauth global lowering only supported on MachO/ELF");


  if (!MRI.hasOneDef(Addr))

    return false;


  // First match any offset we take from the real global.

  const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);

  if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {

    Register OffsetReg = DefMI->getOperand(2).getReg();

    if (!MRI.hasOneDef(OffsetReg))

      return false;

    const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);

    if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)

      return false;


    Addr = DefMI->getOperand(1).getReg();

    if (!MRI.hasOneDef(Addr))

      return false;


    DefMI = &*MRI.def_instr_begin(Addr);

    Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();

  }


  // We should be left with a genuine unauthenticated GlobalValue.

  const GlobalValue *GV;

  if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {

    GV = DefMI->getOperand(1).getGlobal();

    Offset += DefMI->getOperand(1).getOffset();

  } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {

    GV = DefMI->getOperand(2).getGlobal();

    Offset += DefMI->getOperand(2).getOffset();

  } else {

    return false;

  }


  MachineIRBuilder MIB(I);


  // Classify the reference to determine whether it needs a GOT load.

  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);

  const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);

  assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&

         "unsupported non-GOT op flags on ptrauth global reference");

  assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&

         "unsupported non-GOT reference to weak ptrauth global");


  std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);

  bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;


  // Non-extern_weak:

  // - No GOT load needed -> MOVaddrPAC

  // - GOT load for non-extern_weak -> LOADgotPAC

  //   Note that we disallow extern_weak refs to avoid null checks later.

  if (!GV->hasExternalWeakLinkage()) {

    MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});

    MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});

    MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)

        .addGlobalAddress(GV, Offset)

        .addImm(Key)

        .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)

        .addImm(Disc)

        .constrainAllUses(TII, TRI, RBI);

    MIB.buildCopy(DefReg, Register(AArch64::X16));

    RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);

    I.eraseFromParent();

    return true;

  }


  // extern_weak -> LOADauthptrstatic


  // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the

  // offset alone as a pointer if the symbol wasn't available, which would

  // probably break null checks in users. Ptrauth complicates things further:

  // error out.

  if (Offset != 0)

    report_fatal_error(

        "unsupported non-zero offset in weak ptrauth global reference");


  if (HasAddrDisc)

    report_fatal_error("unsupported weak addr-div ptrauth global");


  MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})

      .addGlobalAddress(GV, Offset)

      .addImm(Key)

      .addImm(Disc);

  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);


  I.eraseFromParent();

  return true;

}


void AArch64InstructionSelector::SelectTable(MachineInstr &I,

                                             MachineRegisterInfo &MRI,

                                             unsigned NumVec, unsigned Opc1,

                                             unsigned Opc2, bool isExt) {

  Register DstReg = I.getOperand(0).getReg();

  unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;


  // Create the REG_SEQUENCE

  SmallVector<Register, 4> Regs;

  for (unsigned i = 0; i < NumVec; i++)

    Regs.push_back(I.getOperand(i + 2 + isExt).getReg());

  Register RegSeq = createQTuple(Regs, MIB);


  Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();

  MachineInstrBuilder Instr;

  if (isExt) {

    Register Reg = I.getOperand(2).getReg();

    Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});

  } else

    Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});

  constrainSelectedInstRegOperands(*Instr, TII, TRI, RBI);

  I.eraseFromParent();

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 31)

    return std::nullopt;

  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 31)

    return std::nullopt;

  uint64_t Enc = 31 - *MaybeImmed;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 63)

    return std::nullopt;

  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt || *MaybeImmed > 63)

    return std::nullopt;

  uint64_t Enc = 63 - *MaybeImmed;

  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};

}


/// Helper to select an immediate value that can be represented as a 12-bit

/// value shifted left by either 0 or 12. If it is possible to do so, return

/// the immediate and shift value. If not, return std::nullopt.

///

/// Used by selectArithImmed and selectNegArithImmed.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::select12BitValueWithLeftShift(

    uint64_t Immed) const {

  unsigned ShiftAmt;

  if (Immed >> 12 == 0) {

    ShiftAmt = 0;

  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {

    ShiftAmt = 12;

    Immed = Immed >> 12;

  } else

    return std::nullopt;


  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);

  return {{

      [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },

      [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },

  }};

}


/// SelectArithImmed - Select an immediate value that can be represented as

/// a 12-bit value shifted left by either 0 or 12.  If so, return true with

/// Val set to the 12-bit value and Shift set to the shifter operand.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {

  // This function is called from the addsub_shifted_imm ComplexPattern,

  // which lists [imm] as the list of opcode it's interested in, however

  // we still need to check whether the operand is actually an immediate

  // here because the ComplexPattern opcode list is only used in

  // root-level opcode matching.

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt)

    return std::nullopt;

  return select12BitValueWithLeftShift(*MaybeImmed);

}


/// SelectNegArithImmed - As above, but negates the value before trying to

/// select it.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {

  // We need a register here, because we need to know if we have a 64 or 32

  // bit immediate.

  if (!Root.isReg())

    return std::nullopt;

  auto MaybeImmed = getImmedFromMO(Root);

  if (MaybeImmed == std::nullopt)

    return std::nullopt;

  uint64_t Immed = *MaybeImmed;


  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"

  // have the opposite effect on the C flag, so this pattern mustn't match under

  // those circumstances.

  if (Immed == 0)

    return std::nullopt;


  // Check if we're dealing with a 32-bit type on the root or a 64-bit type on

  // the root.

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();

  if (MRI.getType(Root.getReg()).getSizeInBits() == 32)

    Immed = ~((uint32_t)Immed) + 1;

  else

    Immed = ~Immed + 1ULL;


  if (Immed & 0xFFFFFFFFFF000000ULL)

    return std::nullopt;


  Immed &= 0xFFFFFFULL;

  return select12BitValueWithLeftShift(Immed);

}


/// Checks if we are sure that folding MI into load/store addressing mode is

/// beneficial or not.

///

/// Returns:

/// - true if folding MI would be beneficial.

/// - false if folding MI would be bad.

/// - std::nullopt if it is not sure whether folding MI is beneficial.

///

/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:

///

/// %13:gpr(s64) = G_CONSTANT i64 1

/// %8:gpr(s64) = G_SHL %6, %13(s64)

/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)

/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))

std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(

    MachineInstr &MI, const MachineRegisterInfo &MRI) const {

  if (MI.getOpcode() == AArch64::G_SHL) {

    // Address operands with shifts are free, except for running on subtargets

    // with AddrLSLSlow14.

    if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(

            MI.getOperand(2).getReg(), MRI)) {

      const APInt ShiftVal = ValAndVeg->Value;


      // Don't fold if we know this will be slow.

      return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));

    }

  }

  return std::nullopt;

}


/// Return true if it is worth folding MI into an extended register. That is,

/// if it's safe to pull it into the addressing mode of a load or store as a

/// shift.

/// \p IsAddrOperand whether the def of MI is used as an address operand

/// (e.g. feeding into an LDR/STR).

bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(

    MachineInstr &MI, const MachineRegisterInfo &MRI,

    bool IsAddrOperand) const {


  // Always fold if there is one use, or if we're optimizing for size.

  Register DefReg = MI.getOperand(0).getReg();

  if (MRI.hasOneNonDBGUse(DefReg) ||

      MI.getParent()->getParent()->getFunction().hasOptSize())

    return true;


  if (IsAddrOperand) {

    // If we are already sure that folding MI is good or bad, return the result.

    if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))

      return *Worth;


    // Fold G_PTR_ADD if its offset operand can be folded

    if (MI.getOpcode() == AArch64::G_PTR_ADD) {

      MachineInstr *OffsetInst =

          getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);


      // Note, we already know G_PTR_ADD is used by at least two instructions.

      // If we are also sure about whether folding is beneficial or not,

      // return the result.

      if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))

        return *Worth;

    }

  }


  // FIXME: Consider checking HasALULSLFast as appropriate.


  // We have a fastpath, so folding a shift in and potentially computing it

  // many times may be beneficial. Check if this is only used in memory ops.

  // If it is, then we should fold.

  return all_of(MRI.use_nodbg_instructions(DefReg),

                [](MachineInstr &Use) { return Use.mayLoadOrStore(); });

}


static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {

  switch (Type) {

  case AArch64_AM::SXTB:

  case AArch64_AM::SXTH:

  case AArch64_AM::SXTW:

    return true;

  default:

    return false;

  }

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectExtendedSHL(

    MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,

    unsigned SizeInBytes, bool WantsExt) const {

  assert(Base.isReg() && "Expected base to be a register operand");

  assert(Offset.isReg() && "Expected offset to be a register operand");


  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();

  MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());


  unsigned OffsetOpc = OffsetInst->getOpcode();

  bool LookedThroughZExt = false;

  if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {

    // Try to look through a ZEXT.

    if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)

      return std::nullopt;


    OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());

    OffsetOpc = OffsetInst->getOpcode();

    LookedThroughZExt = true;


    if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)

      return std::nullopt;

  }

  // Make sure that the memory op is a valid size.

  int64_t LegalShiftVal = Log2_32(SizeInBytes);

  if (LegalShiftVal == 0)

    return std::nullopt;

  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))

    return std::nullopt;


  // Now, try to find the specific G_CONSTANT. Start by assuming that the

  // register we will offset is the LHS, and the register containing the

  // constant is the RHS.

  Register OffsetReg = OffsetInst->getOperand(1).getReg();

  Register ConstantReg = OffsetInst->getOperand(2).getReg();

  auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

  if (!ValAndVReg) {

    // We didn't get a constant on the RHS. If the opcode is a shift, then

    // we're done.

    if (OffsetOpc == TargetOpcode::G_SHL)

      return std::nullopt;


    // If we have a G_MUL, we can use either register. Try looking at the RHS.

    std::swap(OffsetReg, ConstantReg);

    ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);

    if (!ValAndVReg)

      return std::nullopt;

  }


  // The value must fit into 3 bits, and must be positive. Make sure that is

  // true.

  int64_t ImmVal = ValAndVReg->Value.getSExtValue();


  // Since we're going to pull this into a shift, the constant value must be

  // a power of 2. If we got a multiply, then we need to check this.

  if (OffsetOpc == TargetOpcode::G_MUL) {

    if (!llvm::has_single_bit<uint32_t>(ImmVal))

      return std::nullopt;


    // Got a power of 2. So, the amount we'll shift is the log base-2 of that.

    ImmVal = Log2_32(ImmVal);

  }


  if ((ImmVal & 0x7) != ImmVal)

    return std::nullopt;


  // We are only allowed to shift by LegalShiftVal. This shift value is built

  // into the instruction, so we can't just use whatever we want.

  if (ImmVal != LegalShiftVal)

    return std::nullopt;


  unsigned SignExtend = 0;

  if (WantsExt) {

    // Check if the offset is defined by an extend, unless we looked through a

    // G_ZEXT earlier.

    if (!LookedThroughZExt) {

      MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);

      auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);

      if (Ext == AArch64_AM::InvalidShiftExtend)

        return std::nullopt;


      SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;

      // We only support SXTW for signed extension here.

      if (SignExtend && Ext != AArch64_AM::SXTW)

        return std::nullopt;

      OffsetReg = ExtInst->getOperand(1).getReg();

    }


    // Need a 32-bit wide register here.

    MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));

    OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);

  }


  // We can use the LHS of the GEP as the base, and the LHS of the shift as an

  // offset. Signify that we are shifting by setting the shift flag to 1.

  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },

           [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },

           [=](MachineInstrBuilder &MIB) {

             // Need to add both immediates here to make sure that they are both

             // added to the instruction.

             MIB.addImm(SignExtend);

             MIB.addImm(1);

           }}};

}


/// This is used for computing addresses like this:

///

/// ldr x1, [x2, x3, lsl #3]

///

/// Where x2 is the base register, and x3 is an offset register. The shift-left

/// is a constant value specific to this load instruction. That is, we'll never

/// see anything other than a 3 here (which corresponds to the size of the

/// element being loaded.)

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(

    MachineOperand &Root, unsigned SizeInBytes) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();


  // We want to find something like this:

  //

  // val = G_CONSTANT LegalShiftVal

  // shift = G_SHL off_reg val

  // ptr = G_PTR_ADD base_reg shift

  // x = G_LOAD ptr

  //

  // And fold it into this addressing mode:

  //

  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]


  // Check if we can find the G_PTR_ADD.

  MachineInstr *PtrAdd =

      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);

  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))

    return std::nullopt;


  // Now, try to match an opcode which will match our specific offset.

  // We want a G_SHL or a G_MUL.

  MachineInstr *OffsetInst =

      getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);

  return selectExtendedSHL(Root, PtrAdd->getOperand(1),

                           OffsetInst->getOperand(0), SizeInBytes,

                           /*WantsExt=*/false);

}


/// This is used for computing addresses like this:

///

/// ldr x1, [x2, x3]

///

/// Where x2 is the base register, and x3 is an offset register.

///

/// When possible (or profitable) to fold a G_PTR_ADD into the address

/// calculation, this will do so. Otherwise, it will return std::nullopt.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeRegisterOffset(

    MachineOperand &Root) const {

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();


  // We need a GEP.

  MachineInstr *Gep = MRI.getVRegDef(Root.getReg());

  if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)

    return std::nullopt;


  // If this is used more than once, let's not bother folding.

  // TODO: Check if they are memory ops. If they are, then we can still fold

  // without having to recompute anything.

  if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))

    return std::nullopt;


  // Base is the GEP's LHS, offset is its RHS.

  return {{[=](MachineInstrBuilder &MIB) {

             MIB.addUse(Gep->getOperand(1).getReg());

           },

           [=](MachineInstrBuilder &MIB) {

             MIB.addUse(Gep->getOperand(2).getReg());

           },

           [=](MachineInstrBuilder &MIB) {

             // Need to add both immediates here to make sure that they are both

             // added to the instruction.

             MIB.addImm(0);

             MIB.addImm(0);

           }}};

}


/// This is intended to be equivalent to selectAddrModeXRO in

/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,

                                              unsigned SizeInBytes) const {

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();

  if (!Root.isReg())

    return std::nullopt;

  MachineInstr *PtrAdd =

      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);

  if (!PtrAdd)

    return std::nullopt;


  // Check for an immediates which cannot be encoded in the [base + imm]

  // addressing mode, and can't be encoded in an add/sub. If this happens, we'll

  // end up with code like:

  //

  // mov x0, wide

  // add x1 base, x0

  // ldr x2, [x1, x0]

  //

  // In this situation, we can use the [base, xreg] addressing mode to save an

  // add/sub:

  //

  // mov x0, wide

  // ldr x2, [base, x0]

  auto ValAndVReg =

      getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);

  if (ValAndVReg) {

    unsigned Scale = Log2_32(SizeInBytes);

    int64_t ImmOff = ValAndVReg->Value.getSExtValue();


    // Skip immediates that can be selected in the load/store addressing

    // mode.

    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&

        ImmOff < (0x1000 << Scale))

      return std::nullopt;


    // Helper lambda to decide whether or not it is preferable to emit an add.

    auto isPreferredADD = [](int64_t ImmOff) {

      // Constants in [0x0, 0xfff] can be encoded in an add.

      if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)

        return true;


      // Can it be encoded in an add lsl #12?

      if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)

        return false;


      // It can be encoded in an add lsl #12, but we may not want to. If it is

      // possible to select this as a single movz, then prefer that. A single

      // movz is faster than an add with a shift.

      return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&

             (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;

    };


    // If the immediate can be encoded in a single add/sub, then bail out.

    if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))

      return std::nullopt;

  }


  // Try to fold shifts into the addressing mode.

  auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);

  if (AddrModeFns)

    return AddrModeFns;


  // If that doesn't work, see if it's possible to fold in registers from

  // a GEP.

  return selectAddrModeRegisterOffset(Root);

}


/// This is used for computing addresses like this:

///

/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]

///

/// Where we have a 64-bit base register, a 32-bit offset register, and an

/// extend (which may or may not be signed).

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,

                                              unsigned SizeInBytes) const {

  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();


  MachineInstr *PtrAdd =

      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);

  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))

    return std::nullopt;


  MachineOperand &LHS = PtrAdd->getOperand(1);

  MachineOperand &RHS = PtrAdd->getOperand(2);

  MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);


  // The first case is the same as selectAddrModeXRO, except we need an extend.

  // In this case, we try to find a shift and extend, and fold them into the

  // addressing mode.

  //

  // E.g.

  //

  // off_reg = G_Z/S/ANYEXT ext_reg

  // val = G_CONSTANT LegalShiftVal

  // shift = G_SHL off_reg val

  // ptr = G_PTR_ADD base_reg shift

  // x = G_LOAD ptr

  //

  // In this case we can get a load like this:

  //

  // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]

  auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),

                                       SizeInBytes, /*WantsExt=*/true);

  if (ExtendedShl)

    return ExtendedShl;


  // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.

  //

  // e.g.

  // ldr something, [base_reg, ext_reg, sxtw]

  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))

    return std::nullopt;


  // Check if this is an extend. We'll get an extend type if it is.

  AArch64_AM::ShiftExtendType Ext =

      getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);

  if (Ext == AArch64_AM::InvalidShiftExtend)

    return std::nullopt;


  // Need a 32-bit wide register.

  MachineIRBuilder MIB(*PtrAdd);

  Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),

                                       AArch64::GPR32RegClass, MIB);

  unsigned SignExtend = Ext == AArch64_AM::SXTW;


  // Base is LHS, offset is ExtReg.

  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },

           [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },

           [=](MachineInstrBuilder &MIB) {

             MIB.addImm(SignExtend);

             MIB.addImm(0);

           }}};

}


/// Select a "register plus unscaled signed 9-bit immediate" address.  This

/// should only match when there is an offset that is not valid for a scaled

/// immediate addressing mode.  The "Size" argument is the size in bytes of the

/// memory reference, which is needed here to know what is valid for a scaled

/// immediate.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,

                                                   unsigned Size) const {

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  if (!Root.isReg())

    return std::nullopt;


  if (!isBaseWithConstantOffset(Root, MRI))

    return std::nullopt;


  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());


  MachineOperand &OffImm = RootDef->getOperand(2);

  if (!OffImm.isReg())

    return std::nullopt;

  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());

  if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)

    return std::nullopt;

  int64_t RHSC;

  MachineOperand &RHSOp1 = RHS->getOperand(1);

  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)

    return std::nullopt;

  RHSC = RHSOp1.getCImm()->getSExtValue();


  if (RHSC >= -256 && RHSC < 256) {

    MachineOperand &Base = RootDef->getOperand(1);

    return {{

        [=](MachineInstrBuilder &MIB) { MIB.add(Base); },

        [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },

    }};

  }

  return std::nullopt;

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,

                                                 unsigned Size,

                                                 MachineRegisterInfo &MRI) const {

  if (RootDef.getOpcode() != AArch64::G_ADD_LOW)

    return std::nullopt;

  MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());

  if (Adrp.getOpcode() != AArch64::ADRP)

    return std::nullopt;


  // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.

  auto Offset = Adrp.getOperand(1).getOffset();

  if (Offset % Size != 0)

    return std::nullopt;


  auto GV = Adrp.getOperand(1).getGlobal();

  if (GV->isThreadLocal())

    return std::nullopt;


  auto &MF = *RootDef.getParent()->getParent();

  if (GV->getPointerAlignment(MF.getDataLayout()) < Size)

    return std::nullopt;


  unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());

  MachineIRBuilder MIRBuilder(RootDef);

  Register AdrpReg = Adrp.getOperand(0).getReg();

  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },

           [=](MachineInstrBuilder &MIB) {

             MIB.addGlobalAddress(GV, Offset,

                                  OpFlags | AArch64II::MO_PAGEOFF |

                                      AArch64II::MO_NC);

           }}};

}


/// Select a "register plus scaled unsigned 12-bit immediate" address.  The

/// "Size" argument is the size in bytes of the memory reference, which

/// determines the scale.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,

                                                  unsigned Size) const {

  MachineFunction &MF = *Root.getParent()->getParent()->getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  if (!Root.isReg())

    return std::nullopt;


  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());

  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {

    return {{

        [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },

        [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },

    }};

  }


  CodeModel::Model CM = MF.getTarget().getCodeModel();

  // Check if we can fold in the ADD of small code model ADRP + ADD address.

  // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold

  // globals into the offset.

  MachineInstr *RootParent = Root.getParent();

  if (CM == CodeModel::Small &&

      !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&

        STI.isTargetDarwin())) {

    auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);

    if (OpFns)

      return OpFns;

  }


  if (isBaseWithConstantOffset(Root, MRI)) {

    MachineOperand &LHS = RootDef->getOperand(1);

    MachineOperand &RHS = RootDef->getOperand(2);

    MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());

    MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());


    int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();

    unsigned Scale = Log2_32(Size);

    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {

      if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)

        return {{

            [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },

            [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },

        }};


      return {{

          [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },

          [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },

      }};

    }

  }


  // Before falling back to our general case, check if the unscaled

  // instructions can handle this. If so, that's preferable.

  if (selectAddrModeUnscaled(Root, Size))

    return std::nullopt;


  return {{

      [=](MachineInstrBuilder &MIB) { MIB.add(Root); },

      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },

  }};

}


/// Given a shift instruction, return the correct shift type for that

/// instruction.


static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    return AArch64_AM::InvalidShiftExtend;

  case TargetOpcode::G_SHL:

    return AArch64_AM::LSL;

  case TargetOpcode::G_LSHR:

    return AArch64_AM::LSR;

  case TargetOpcode::G_ASHR:

    return AArch64_AM::ASR;

  case TargetOpcode::G_ROTR:

    return AArch64_AM::ROR;

  }

}


/// Select a "shifted register" operand. If the value is not shifted, set the

/// shift operand to a default value of "lsl 0".

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,

                                                  bool AllowROR) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  // Check if the operand is defined by an instruction which corresponds to

  // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.

  MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());

  AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);

  if (ShType == AArch64_AM::InvalidShiftExtend)

    return std::nullopt;

  if (ShType == AArch64_AM::ROR && !AllowROR)

    return std::nullopt;

  if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))

    return std::nullopt;


  // Need an immediate on the RHS.

  MachineOperand &ShiftRHS = ShiftInst->getOperand(2);

  auto Immed = getImmedFromMO(ShiftRHS);

  if (!Immed)

    return std::nullopt;


  // We have something that we can fold. Fold in the shift's LHS and RHS into

  // the instruction.

  MachineOperand &ShiftLHS = ShiftInst->getOperand(1);

  Register ShiftReg = ShiftLHS.getReg();


  unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();

  unsigned Val = *Immed & (NumBits - 1);

  unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);


  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },

           [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};

}


AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(

    MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {

  unsigned Opc = MI.getOpcode();


  // Handle explicit extend instructions first.

  if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {

    unsigned Size;

    if (Opc == TargetOpcode::G_SEXT)

      Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

    else

      Size = MI.getOperand(2).getImm();

    assert(Size != 64 && "Extend from 64 bits?");

    switch (Size) {

    case 8:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;

    case 16:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;

    case 32:

      return AArch64_AM::SXTW;

    default:

      return AArch64_AM::InvalidShiftExtend;

    }

  }


  if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {

    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

    assert(Size != 64 && "Extend from 64 bits?");

    switch (Size) {

    case 8:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;

    case 16:

      return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;

    case 32:

      return AArch64_AM::UXTW;

    default:

      return AArch64_AM::InvalidShiftExtend;

    }

  }


  // Don't have an explicit extend. Try to handle a G_AND with a constant mask

  // on the RHS.

  if (Opc != TargetOpcode::G_AND)

    return AArch64_AM::InvalidShiftExtend;


  std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));

  if (!MaybeAndMask)

    return AArch64_AM::InvalidShiftExtend;

  uint64_t AndMask = *MaybeAndMask;

  switch (AndMask) {

  default:

    return AArch64_AM::InvalidShiftExtend;

  case 0xFF:

    return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;

  case 0xFFFF:

    return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;

  case 0xFFFFFFFF:

    return AArch64_AM::UXTW;

  }

}


Register AArch64InstructionSelector::moveScalarRegClass(

    Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {

  MachineRegisterInfo &MRI = *MIB.getMRI();

  auto Ty = MRI.getType(Reg);

  assert(!Ty.isVector() && "Expected scalars only!");

  if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))

    return Reg;


  // Create a copy and immediately select it.

  // FIXME: We should have an emitCopy function?

  auto Copy = MIB.buildCopy({&RC}, {Reg});

  selectCopy(*Copy, TII, MRI, TRI, RBI);

  return Copy.getReg(0);

}


/// Select an "extended register" operand. This operand folds in an extend

/// followed by an optional left shift.

InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectArithExtendedRegister(

    MachineOperand &Root) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  uint64_t ShiftVal = 0;

  Register ExtReg;

  AArch64_AM::ShiftExtendType Ext;

  MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);

  if (!RootDef)

    return std::nullopt;


  if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))

    return std::nullopt;


  // Check if we can fold a shift and an extend.

  if (RootDef->getOpcode() == TargetOpcode::G_SHL) {

    // Look for a constant on the RHS of the shift.

    MachineOperand &RHS = RootDef->getOperand(2);

    std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);

    if (!MaybeShiftVal)

      return std::nullopt;

    ShiftVal = *MaybeShiftVal;

    if (ShiftVal > 4)

      return std::nullopt;

    // Look for a valid extend instruction on the LHS of the shift.

    MachineOperand &LHS = RootDef->getOperand(1);

    MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);

    if (!ExtDef)

      return std::nullopt;

    Ext = getExtendTypeForInst(*ExtDef, MRI);

    if (Ext == AArch64_AM::InvalidShiftExtend)

      return std::nullopt;

    ExtReg = ExtDef->getOperand(1).getReg();

  } else {

    // Didn't get a shift. Try just folding an extend.

    Ext = getExtendTypeForInst(*RootDef, MRI);

    if (Ext == AArch64_AM::InvalidShiftExtend)

      return std::nullopt;

    ExtReg = RootDef->getOperand(1).getReg();


    // If we have a 32 bit instruction which zeroes out the high half of a

    // register, we get an implicit zero extend for free. Check if we have one.

    // FIXME: We actually emit the extend right now even though we don't have

    // to.

    if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {

      MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);

      if (isDef32(*ExtInst))

        return std::nullopt;

    }

  }


  // We require a GPR32 here. Narrow the ExtReg if needed using a subregister

  // copy.

  MachineIRBuilder MIB(*RootDef);

  ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);


  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },

           [=](MachineInstrBuilder &MIB) {

             MIB.addImm(getArithExtendImm(Ext, ShiftVal));

           }}};

}


InstructionSelector::ComplexRendererFns

AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {

  if (!Root.isReg())

    return std::nullopt;

  MachineRegisterInfo &MRI =

      Root.getParent()->getParent()->getParent()->getRegInfo();


  auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);

  while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&

         STI.isLittleEndian())

    Extract =

        getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);

  if (!Extract)

    return std::nullopt;


  if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {

    if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {

      Register ExtReg = Extract->MI->getOperand(2).getReg();

      return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};

    }

  }

  if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {

    LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());

    auto LaneIdx = getIConstantVRegValWithLookThrough(

        Extract->MI->getOperand(2).getReg(), MRI);

    if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&

        LaneIdx->Value.getSExtValue() == 1) {

      Register ExtReg = Extract->MI->getOperand(1).getReg();

      return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};

    }

  }


  return std::nullopt;

}


void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,

                                                const MachineInstr &MI,

                                                int OpIdx) const {

  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

         "Expected G_CONSTANT");

  std::optional<int64_t> CstVal =

      getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);

  assert(CstVal && "Expected constant value");

  MIB.addImm(*CstVal);

}


void AArch64InstructionSelector::renderLogicalImm32(

  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {

  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

         "Expected G_CONSTANT");

  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();

  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);

  MIB.addImm(Enc);

}


void AArch64InstructionSelector::renderLogicalImm64(

  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {

  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&

         "Expected G_CONSTANT");

  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();

  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);

  MIB.addImm(Enc);

}


void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,

                                                 const MachineInstr &MI,

                                                 int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&

         "Expected G_UBSANTRAP");

  MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));

}


void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,

                                               const MachineInstr &MI,

                                               int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(

      AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));

}


void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,

                                               const MachineInstr &MI,

                                               int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(

      AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));

}


void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,

                                               const MachineInstr &MI,

                                               int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(

      AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));

}


void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(

    MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {

  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&

         "Expected G_FCONSTANT");

  MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)

                                                      .getFPImm()

                                                      ->getValueAPF()

                                                      .bitcastToAPInt()

                                                      .getZExtValue()));

}


bool AArch64InstructionSelector::isLoadStoreOfNumBytes(

    const MachineInstr &MI, unsigned NumBytes) const {

  if (!MI.mayLoadOrStore())

    return false;

  assert(MI.hasOneMemOperand() &&

         "Expected load/store to have only one mem op!");

  return (*MI.memoperands_begin())->getSize() == NumBytes;

}


bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {

  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

  if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)

    return false;


  // Only return true if we know the operation will zero-out the high half of

  // the 64-bit register. Truncates can be subregister copies, which don't

  // zero out the high bits. Copies and other copy-like instructions can be

  // fed by truncates, or could be lowered as subregister copies.

  switch (MI.getOpcode()) {

  default:

    return true;

  case TargetOpcode::COPY:

  case TargetOpcode::G_BITCAST:

  case TargetOpcode::G_TRUNC:

  case TargetOpcode::G_PHI:

    return false;

  }

}


// Perform fixups on the given PHI instruction's operands to force them all

// to be the same as the destination regbank.


static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,

                            const AArch64RegisterBankInfo &RBI) {

  assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");

  Register DstReg = MI.getOperand(0).getReg();

  const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);

  assert(DstRB && "Expected PHI dst to have regbank assigned");

  MachineIRBuilder MIB(MI);


  // Go through each operand and ensure it has the same regbank.

  for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {

    if (!MO.isReg())

      continue;

    Register OpReg = MO.getReg();

    const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);

    if (RB != DstRB) {

      // Insert a cross-bank copy.

      auto *OpDef = MRI.getVRegDef(OpReg);

      const LLT &Ty = MRI.getType(OpReg);

      MachineBasicBlock &OpDefBB = *OpDef->getParent();


      // Any instruction we insert must appear after all PHIs in the block

      // for the block to be valid MIR.

      MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());

      if (InsertPt != OpDefBB.end() && InsertPt->isPHI())

        InsertPt = OpDefBB.getFirstNonPHI();

      MIB.setInsertPt(*OpDef->getParent(), InsertPt);

      auto Copy = MIB.buildCopy(Ty, OpReg);

      MRI.setRegBank(Copy.getReg(0), *DstRB);

      MO.setReg(Copy.getReg(0));

    }

  }

}


void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {

  // We're looking for PHIs, build a list so we don't invalidate iterators.

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SmallVector<MachineInstr *, 32> Phis;

  for (auto &BB : MF) {

    for (auto &MI : BB) {

      if (MI.getOpcode() == TargetOpcode::G_PHI)

        Phis.emplace_back(&MI);

    }

  }


  for (auto *MI : Phis) {

    // We need to do some work here if the operand types are < 16 bit and they

    // are split across fpr/gpr banks. Since all types <32b on gpr

    // end up being assigned gpr32 regclasses, we can end up with PHIs here

    // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't

    // be selecting heterogenous regbanks for operands if possible, but we

    // still need to be able to deal with it here.

    //

    // To fix this, if we have a gpr-bank operand < 32b in size and at least

    // one other operand is on the fpr bank, then we add cross-bank copies

    // to homogenize the operand banks. For simplicity the bank that we choose

    // to settle on is whatever bank the def operand has. For example:

    //

    // %endbb:

    //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2

    //  =>

    // %bb2:

    //   ...

    //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)

    //   ...

    // %endbb:

    //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2

    bool HasGPROp = false, HasFPROp = false;

    for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {

      if (!MO.isReg())

        continue;

      const LLT &Ty = MRI.getType(MO.getReg());

      if (!Ty.isValid() || !Ty.isScalar())

        break;

      if (Ty.getSizeInBits() >= 32)

        break;

      const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());

      // If for some reason we don't have a regbank yet. Don't try anything.

      if (!RB)

        break;


      if (RB->getID() == AArch64::GPRRegBankID)

        HasGPROp = true;

      else

        HasFPROp = true;

    }

    // We have heterogenous regbanks, need to fixup.

    if (HasGPROp && HasFPROp)

      fixupPHIOpBanks(*MI, MRI, RBI);

  }

}


namespace llvm {

InstructionSelector *


createAArch64InstructionSelector(const AArch64TargetMachine &TM,

                                 const AArch64Subtarget &Subtarget,

                                 const AArch64RegisterBankInfo &RBI) {

  return new AArch64InstructionSelector(TM, Subtarget, RBI);

}


}

AArch64AddressingModes.h

SubReg
unsigned SubReg
Definition AArch64AdvSIMDScalarPass.cpp:102

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

Success
#define Success
Definition AArch64Disassembler.cpp:42

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition AArch64ExpandPseudoInsts.cpp:112

AArch64GlobalISelUtils.h

extractPtrauthBlendDiscriminators
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
Definition AArch64ISelDAGToDAG.cpp:1492

isPreferredADD
static bool isPreferredADD(int64_t ImmOff)
Definition AArch64ISelDAGToDAG.cpp:1315

emitConditionalComparison
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
Definition AArch64ISelLowering.cpp:3759

tryAdvSIMDModImm16
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition AArch64ISelLowering.cpp:14730

tryAdvSIMDModImmFP
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition AArch64ISelLowering.cpp:14827

tryAdvSIMDModImm64
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition AArch64ISelLowering.cpp:14656

isCMN
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:3628

tryAdvSIMDModImm8
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition AArch64ISelLowering.cpp:14806

emitConjunctionRec
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
Definition AArch64ISelLowering.cpp:3877

canEmitConjunction
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
Definition AArch64ISelLowering.cpp:3815

tryAdvSIMDModImm321s
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition AArch64ISelLowering.cpp:14775

changeFPCCToANDAArch64CC
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
Definition AArch64ISelLowering.cpp:3514

tryAdvSIMDModImm32
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition AArch64ISelLowering.cpp:14677

emitConjunction
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
Definition AArch64ISelLowering.cpp:3989

AArch64InstrInfo.h

GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_PREDICATES_INIT

getRegClassesForCopy
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
Definition AArch64InstructionSelector.cpp:955

GET_GLOBALISEL_TEMPORARIES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT

getTestBitReg
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
Definition AArch64InstructionSelector.cpp:1400

getMinSizeForRegBank
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
Definition AArch64InstructionSelector.cpp:684

getVectorShiftImm
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
Definition AArch64InstructionSelector.cpp:1794

selectLoadStoreUIOp
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
Definition AArch64InstructionSelector.cpp:891

getMinClassForRegBank
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
Definition AArch64InstructionSelector.cpp:607

selectBinaryOp
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
Definition AArch64InstructionSelector.cpp:820

getSubRegForClass
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
Definition AArch64InstructionSelector.cpp:656

selectCopy
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Definition AArch64InstructionSelector.cpp:1014

copySubReg
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
Definition AArch64InstructionSelector.cpp:929

changeICMPPredToAArch64CC
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
Definition AArch64InstructionSelector.cpp:1277

createDTuple
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
Definition AArch64InstructionSelector.cpp:730

fixupPHIOpBanks
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
Definition AArch64InstructionSelector.cpp:7940

selectDebugInstr
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
Definition AArch64InstructionSelector.cpp:985

isSignExtendShiftType
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
Definition AArch64InstructionSelector.cpp:7121

getShiftTypeForInst
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
Definition AArch64InstructionSelector.cpp:7599

unsupportedBinOp
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
Definition AArch64InstructionSelector.cpp:774

getLaneCopyOpcode
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
Definition AArch64InstructionSelector.cpp:3910

createQTuple
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
Definition AArch64InstructionSelector.cpp:739

getImmedFromMO
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
Definition AArch64InstructionSelector.cpp:747

getInsertVecEltOpInfo
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
Definition AArch64InstructionSelector.cpp:4268

createTuple
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
Definition AArch64InstructionSelector.cpp:710

getVectorSHLImm
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
Definition AArch64InstructionSelector.cpp:1803

changeFPCCToORAArch64CC
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
Definition AArch64InstructionSelector.cpp:1316

AArch64MCTargetDesc.h

RegSize
unsigned RegSize
Definition AArch64MIPeepholeOpt.cpp:165

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AArch64MachineFunctionInfo.h

AArch64RegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AArch64.

AArch64RegisterInfo.h

AArch64Subtarget.h

AArch64TargetMachine.h

S16
constexpr LLT S16
Definition AMDGPULegalizerInfo.cpp:296

S32
constexpr LLT S32
Definition AMDGPULegalizerInfo.cpp:297

S64
constexpr LLT S64
Definition AMDGPULegalizerInfo.cpp:299

S8
constexpr LLT S8
Definition AMDGPULegalizerInfo.cpp:295

isStore
static bool isStore(int Opcode)
Definition ARCInstrInfo.cpp:58

selectMergeValues
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Definition ARMInstructionSelector.cpp:233

selectUnmergeValues
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Definition ARMInstructionSelector.cpp:264

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Utils.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

DerivedTypes.h

Dwarf.h
This file contains constants used for implementing Dwarf debug support.

GIMatchTableExecutorImpl.h

GISelValueTracking.h
Provides analysis for querying information about KnownBits during GISel passes.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Type.h

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

InstructionSelector.h

Instructions.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

emitLoadFromConstantPool
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Definition LegalizerHelper.cpp:3503

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineBasicBlock.h

MachineConstantPool.h
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...

MachineFrameInfo.h

MachineFunction.h

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineInstrBuilder.h

MachineInstr.h

MachineMemOperand.h

MachineOperand.h

MachineRegisterInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

T
#define T
Definition Mips16ISelLowering.cpp:353

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition MipsDisassembler.cpp:106

OpIdx
MachineInstr unsigned OpIdx
Definition NVPTXPrologEpilogPass.cpp:56

P
#define P(N)

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

Pass.h

getName
static StringRef getName(Value *V)
Definition ProvenanceAnalysisEvaluator.cpp:20

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

Ptr
@ Ptr
Definition TargetLibraryInfo.cpp:77

TargetOpcodes.h

TargetRegisterInfo.h

Concat
static constexpr int Concat[]
Definition X86InterleavedAccess.cpp:232

RHS
Value * RHS
Definition X86PartialReduction.cpp:74

LHS
Value * LHS
Definition X86PartialReduction.cpp:73

Predicate
Definition AMDGPURegBankLegalizeRules.cpp:376

llvm::AArch64FunctionInfo::getVarArgsFPRSize
unsigned getVarArgsFPRSize() const
Definition AArch64MachineFunctionInfo.h:446

llvm::AArch64FunctionInfo::getVarArgsFPRIndex
int getVarArgsFPRIndex() const
Definition AArch64MachineFunctionInfo.h:443

llvm::AArch64FunctionInfo::getVarArgsStackIndex
int getVarArgsStackIndex() const
Definition AArch64MachineFunctionInfo.h:431

llvm::AArch64FunctionInfo::getVarArgsGPRIndex
int getVarArgsGPRIndex() const
Definition AArch64MachineFunctionInfo.h:437

llvm::AArch64FunctionInfo::getVarArgsGPRSize
unsigned getVarArgsGPRSize() const
Definition AArch64MachineFunctionInfo.h:440

llvm::AArch64InstrInfo
Definition AArch64InstrInfo.h:180

llvm::AArch64RegisterBankInfo
This class provides the information for the target register banks.
Definition AArch64RegisterBankInfo.h:106

llvm::AArch64RegisterInfo
Definition AArch64RegisterInfo.h:26

llvm::AArch64Subtarget
Definition AArch64Subtarget.h:38

llvm::AArch64Subtarget::isTargetELF
bool isTargetELF() const
Definition AArch64Subtarget.h:300

llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition AArch64Subtarget.h:291

llvm::AArch64Subtarget::isTargetILP32
bool isTargetILP32() const
Definition AArch64Subtarget.h:303

llvm::AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
Definition AArch64Subtarget.cpp:664

llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition AArch64Subtarget.h:146

llvm::AArch64Subtarget::isTargetMachO
bool isTargetMachO() const
Definition AArch64Subtarget.h:301

llvm::AArch64Subtarget::ClassifyGlobalReference
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
Definition AArch64Subtarget.cpp:466

llvm::AArch64Subtarget::isLittleEndian
bool isLittleEndian() const
Definition AArch64Subtarget.h:289

llvm::AArch64Subtarget::isX16X17Safer
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
Definition AArch64Subtarget.cpp:674

llvm::AArch64Subtarget::isCallingConvWin64
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
Definition AArch64Subtarget.h:363

llvm::AArch64TargetMachine
Definition AArch64TargetMachine.h:24

llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition APFloat.h:1353

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::zext
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540

llvm::APInt::trunc
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936

llvm::APInt::getSplat
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651

llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147

llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition BlockFrequencyInfo.h:38

llvm::CmpInst::isEquality
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:917

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678

llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681

llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707

llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708

llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684

llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:693

llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682

llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683

llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701

llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705

llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:692

llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686

llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703

llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:690

llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685

llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:687

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:699

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition InstrTypes.h:700

llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706

llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694

llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704

llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:691

llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688

llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829

llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791

llvm::CmpInst::isIntPredicate
bool isIntPredicate() const
Definition InstrTypes.h:785

llvm::CmpInst::isUnsigned
bool isUnsigned() const
Definition InstrTypes.h:938

llvm::CodeGenCoverage
Definition CodeGenCoverage.h:19

llvm::ConstantDataVector::getSplat
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition Constants.cpp:3066

llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition Constants.h:320

llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:327

llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:324

llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169

llvm::ConstantInt::getBitWidth
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163

llvm::ConstantVector::get
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
Definition Constants.cpp:1423

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getSplatValue
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Definition Constants.cpp:1713

llvm::Constant::isNullValue
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90

llvm::DataLayout::getTypeStoreSize
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:468

llvm::DataLayout::getPrefTypeAlign
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition DataLayout.cpp:888

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359

llvm::Function::isVarArg
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727

llvm::GIMatchTableExecutor::setupMF
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Definition GIMatchTableExecutor.h:608

llvm::GISelValueTracking
Definition GISelValueTracking.h:34

llvm::GIndexedStore
Represents indexed stores.
Definition GenericMachineInstrs.h:165

llvm::GLoadStore::getPointerReg
Register getPointerReg() const
Get the source register of the pointer value.
Definition GenericMachineInstrs.h:87

llvm::GMemOperation::getMMO
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
Definition GenericMachineInstrs.h:56

llvm::GMemOperation::getMemSize
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
Definition GenericMachineInstrs.h:73

llvm::GMemOperation::getMemSizeInBits
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Definition GenericMachineInstrs.h:75

llvm::GSelect
Represents a G_SELECT.
Definition GenericMachineInstrs.h:350

llvm::GSelect::getCondReg
Register getCondReg() const
Definition GenericMachineInstrs.h:352

llvm::GSelect::getFalseReg
Register getFalseReg() const
Definition GenericMachineInstrs.h:354

llvm::GSelect::getTrueReg
Register getTrueReg() const
Definition GenericMachineInstrs.h:353

llvm::GenericMachineInstr::getReg
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
Definition GenericMachineInstrs.h:38

llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition GlobalValue.h:265

llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition GlobalValue.h:531

llvm::ICmpInst::isEquality
bool isEquality() const
Return true if this predicate is either EQ or NE.
Definition Instructions.h:1322

llvm::InstructionSelector
Definition InstructionSelector.h:22

llvm::LLT
Definition LowLevelType.h:40

llvm::LLT::isScalableVector
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition LowLevelType.h:182

llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition LowLevelType.h:265

llvm::LLT::isScalar
constexpr bool isScalar() const
Definition LowLevelType.h:147

llvm::LLT::changeElementType
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition LowLevelType.h:212

llvm::LLT::multiplyElements
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition LowLevelType.h:252

llvm::LLT::isPointerVector
constexpr bool isPointerVector() const
Definition LowLevelType.h:153

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition LowLevelType.h:43

llvm::LLT::isValid
constexpr bool isValid() const
Definition LowLevelType.h:146

llvm::LLT::isVector
constexpr bool isVector() const
Definition LowLevelType.h:149

llvm::LLT::pointer
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition LowLevelType.h:58

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition LowLevelType.h:191

llvm::LLT::isPointer
constexpr bool isPointer() const
Definition LowLevelType.h:150

llvm::LLT::getElementType
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition LowLevelType.h:278

llvm::LLT::getAddressSpace
constexpr unsigned getAddressSpace() const
Definition LowLevelType.h:271

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition LowLevelType.h:101

llvm::LLT::getSizeInBytes
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition LowLevelType.h:201

llvm::LocationSize::getValue
TypeSize getValue() const
Definition MemoryLocation.h:157

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::getFirstNonPHI
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition MachineBasicBlock.cpp:200

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:379

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:323

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:341

llvm::MachineConstantPool::getConstantPoolIndex
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
Definition MachineFunction.cpp:1568

llvm::MachineFrameInfo::setAdjustsStack
void setAdjustsStack(bool V)
Definition MachineFrameInfo.h:620

llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition MachineFrameInfo.h:376

llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition MachineFrameInfo.h:382

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition MachineFunction.cpp:536

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:778

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition MachineFunction.cpp:309

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:860

llvm::MachineFunction::getConstantPool
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Definition MachineFunction.h:794

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:758

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition MachineIRBuilder.h:236

llvm::MachineIRBuilder::setInsertPt
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
Definition MachineIRBuilder.h:339

llvm::MachineIRBuilder::setInstr
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Definition MachineIRBuilder.h:366

llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition MachineIRBuilder.h:418

llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition MachineIRBuilder.h:288

llvm::MachineIRBuilder::setInstrAndDebugLoc
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Definition MachineIRBuilder.h:377

llvm::MachineIRBuilder::getMBB
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
Definition MachineIRBuilder.h:320

llvm::MachineIRBuilder::getMRI
MachineRegisterInfo * getMRI()
Getter for MRI.
Definition MachineIRBuilder.h:310

llvm::MachineIRBuilder::getState
MachineIRBuilderState & getState()
Getter for the State.
Definition MachineIRBuilder.h:314

llvm::MachineIRBuilder::buildCopy
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Definition MachineIRBuilder.cpp:328

llvm::MachineIRBuilder::getDataLayout
const DataLayout & getDataLayout() const
Definition MachineIRBuilder.h:298

llvm::MachineIRBuilder::setState
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
Definition MachineIRBuilder.h:317

llvm::MachineIRBuilder::buildPtrToInt
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Definition MachineIRBuilder.h:748

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:98

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition MachineInstrBuilder.h:123

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::addBlockAddress
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:219

llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition MachineInstrBuilder.h:181

llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition MachineInstrBuilder.h:226

llvm::MachineInstrBuilder::addGlobalAddress
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:206

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::constrainAllUses
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
Definition MachineInstrBuilder.h:361

llvm::MachineInstrBuilder::addJumpTableIndex
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:200

llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:175

llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition MachineInstrBuilder.h:242

llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition MachineInstrBuilder.h:152

llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition MachineInstrBuilder.h:301

llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition MachineInstrBuilder.h:231

llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition MachineInstrBuilder.h:145

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:587

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:359

llvm::MachineInstr::addOperand
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Definition MachineInstr.cpp:206

llvm::MachineInstr::NoFPExcept
@ NoFPExcept
Definition MachineInstr.h:114

llvm::MachineInstr::getMF
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition MachineInstr.cpp:756

llvm::MachineInstr::eraseFromParent
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition MachineInstr.cpp:770

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:595

llvm::MachineInstr::addMemOperand
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
Definition MachineInstr.cpp:383

llvm::MachineMemOperand::getMemoryType
LLT getMemoryType() const
Return the memory type of the memory reference.
Definition MachineMemOperand.h:240

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition MachineMemOperand.h:139

llvm::MachineMemOperand::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
Definition MachineMemOperand.h:282

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:48

llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition MachineOperand.h:582

llvm::MachineOperand::getCImm
const ConstantInt * getCImm() const
Definition MachineOperand.h:561

llvm::MachineOperand::isCImm
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
Definition MachineOperand.h:332

llvm::MachineOperand::getImm
int64_t getImm() const
Definition MachineOperand.h:556

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:328

llvm::MachineOperand::setReg
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Definition MachineOperand.cpp:60

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition MachineOperand.h:330

llvm::MachineOperand::ChangeToImmediate
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition MachineOperand.cpp:161

llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition MachineOperand.h:243

llvm::MachineOperand::CreatePredicate
static MachineOperand CreatePredicate(unsigned Pred)
Definition MachineOperand.h:980

llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition MachineOperand.h:821

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::MachineOperand::CreateGA
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
Definition MachineOperand.h:898

llvm::MachineOperand::CreateBA
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
Definition MachineOperand.h:914

llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition MachineOperand.h:566

llvm::MachineOperand::getPredicate
unsigned getPredicate() const
Definition MachineOperand.h:617

llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition MachineOperand.h:629

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition ProfileSummaryInfo.h:42

llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition RegisterBankInfo.h:40

llvm::RegisterBankInfo::constrainGenericRegister
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
Definition RegisterBankInfo.cpp:131

llvm::RegisterBankInfo::getRegBank
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
Definition RegisterBankInfo.h:440

llvm::RegisterBankInfo::getSizeInBits
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
Definition RegisterBankInfo.cpp:497

llvm::RegisterBank
This class implements the register bank concept.
Definition RegisterBank.h:29

llvm::RegisterBank::getID
unsigned getID() const
Get the identifier of this register bank.
Definition RegisterBank.h:46

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::Register::isValid
constexpr bool isValid() const
Definition Register.h:107

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74

llvm::Register::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:938

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:414

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition SmallVector.h:268

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition TargetInstrInfo.h:114

llvm::TargetMachine::isPositionIndependent
bool isPositionIndependent() const
Definition TargetMachine.cpp:143

llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition TargetMachine.cpp:260

llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition TargetMachine.h:264

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:237

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.

llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition TargetSubtargetInfo.h:103

llvm::TypeSize
Definition TypeSize.h:332

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:232

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::getPointerAlignment
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:134

uint64_t

Changed
Changed
Definition ObjCARCOpts.cpp:2370

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AArch64CC::CondCode
CondCode
Definition AArch64BaseInfo.h:254

llvm::AArch64CC::VC
@ VC
Definition AArch64BaseInfo.h:262

llvm::AArch64CC::NE
@ NE
Definition AArch64BaseInfo.h:256

llvm::AArch64CC::GE
@ GE
Definition AArch64BaseInfo.h:265

llvm::AArch64CC::PL
@ PL
Definition AArch64BaseInfo.h:260

llvm::AArch64CC::EQ
@ EQ
Definition AArch64BaseInfo.h:255

llvm::AArch64CC::HS
@ HS
Definition AArch64BaseInfo.h:257

llvm::AArch64CC::MI
@ MI
Definition AArch64BaseInfo.h:259

llvm::AArch64CC::GT
@ GT
Definition AArch64BaseInfo.h:267

llvm::AArch64CC::LT
@ LT
Definition AArch64BaseInfo.h:266

llvm::AArch64CC::VS
@ VS
Definition AArch64BaseInfo.h:261

llvm::AArch64CC::HI
@ HI
Definition AArch64BaseInfo.h:263

llvm::AArch64CC::LO
@ LO
Definition AArch64BaseInfo.h:258

llvm::AArch64CC::AL
@ AL
Definition AArch64BaseInfo.h:269

llvm::AArch64CC::LE
@ LE
Definition AArch64BaseInfo.h:268

llvm::AArch64CC::LS
@ LS
Definition AArch64BaseInfo.h:264

llvm::AArch64CC::getInvertedCondCode
static CondCode getInvertedCondCode(CondCode Code)
Definition AArch64BaseInfo.h:303

llvm::AArch64CC::getNZCVToSatisfyCondCode
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
Definition AArch64BaseInfo.h:343

llvm::AArch64GISelUtils
Definition AArch64GlobalISelUtils.h:24

llvm::AArch64GISelUtils::changeFCMPPredToAArch64CC
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
Definition AArch64GlobalISelUtils.cpp:128

llvm::AArch64GISelUtils::getAArch64VectorSplatScalar
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition AArch64GlobalISelUtils.cpp:33

llvm::AArch64II::MO_NC
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
Definition AArch64BaseInfo.h:852

llvm::AArch64II::MO_G1
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
Definition AArch64BaseInfo.h:828

llvm::AArch64II::MO_PAGEOFF
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
Definition AArch64BaseInfo.h:816

llvm::AArch64II::MO_GOT
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
Definition AArch64BaseInfo.h:847

llvm::AArch64II::MO_G0
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
Definition AArch64BaseInfo.h:832

llvm::AArch64II::MO_PAGE
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Definition AArch64BaseInfo.h:811

llvm::AArch64II::MO_TLS
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
Definition AArch64BaseInfo.h:858

llvm::AArch64II::MO_G2
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
Definition AArch64BaseInfo.h:824

llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition AArch64BaseInfo.h:820

llvm::AArch64PACKey::LAST
@ LAST
Definition AArch64BaseInfo.h:899

llvm::AArch64PACKey::IA
@ IA
Definition AArch64BaseInfo.h:895

llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition AArch64AddressingModes.h:275

llvm::AArch64_AM::encodeAdvSIMDModImmType2
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
Definition AArch64AddressingModes.h:471

llvm::AArch64_AM::isAdvSIMDModImmType9
static bool isAdvSIMDModImmType9(uint64_t Imm)
Definition AArch64AddressingModes.h:573

llvm::AArch64_AM::isAdvSIMDModImmType4
static bool isAdvSIMDModImmType4(uint64_t Imm)
Definition AArch64AddressingModes.h:496

llvm::AArch64_AM::isAdvSIMDModImmType5
static bool isAdvSIMDModImmType5(uint64_t Imm)
Definition AArch64AddressingModes.h:511

llvm::AArch64_AM::getFP32Imm
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Definition AArch64AddressingModes.h:393

llvm::AArch64_AM::encodeAdvSIMDModImmType7
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
Definition AArch64AddressingModes.h:548

llvm::AArch64_AM::encodeAdvSIMDModImmType12
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
Definition AArch64AddressingModes.h:733

llvm::AArch64_AM::encodeAdvSIMDModImmType10
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
Definition AArch64AddressingModes.h:635

llvm::AArch64_AM::encodeAdvSIMDModImmType9
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
Definition AArch64AddressingModes.h:579

llvm::AArch64_AM::ShiftExtendType
ShiftExtendType
Definition AArch64AddressingModes.h:32

llvm::AArch64_AM::SXTW
@ SXTW
Definition AArch64AddressingModes.h:47

llvm::AArch64_AM::LSL
@ LSL
Definition AArch64AddressingModes.h:34

llvm::AArch64_AM::UXTW
@ UXTW
Definition AArch64AddressingModes.h:42

llvm::AArch64_AM::ASR
@ ASR
Definition AArch64AddressingModes.h:36

llvm::AArch64_AM::InvalidShiftExtend
@ InvalidShiftExtend
Definition AArch64AddressingModes.h:33

llvm::AArch64_AM::UXTB
@ UXTB
Definition AArch64AddressingModes.h:40

llvm::AArch64_AM::LSR
@ LSR
Definition AArch64AddressingModes.h:35

llvm::AArch64_AM::UXTH
@ UXTH
Definition AArch64AddressingModes.h:41

llvm::AArch64_AM::ROR
@ ROR
Definition AArch64AddressingModes.h:37

llvm::AArch64_AM::SXTB
@ SXTB
Definition AArch64AddressingModes.h:45

llvm::AArch64_AM::SXTH
@ SXTH
Definition AArch64AddressingModes.h:46

llvm::AArch64_AM::encodeLogicalImmediate
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
Definition AArch64AddressingModes.h:282

llvm::AArch64_AM::isAdvSIMDModImmType7
static bool isAdvSIMDModImmType7(uint64_t Imm)
Definition AArch64AddressingModes.h:543

llvm::AArch64_AM::encodeAdvSIMDModImmType5
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
Definition AArch64AddressingModes.h:517

llvm::AArch64_AM::getFP64Imm
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Definition AArch64AddressingModes.h:421

llvm::AArch64_AM::isAdvSIMDModImmType10
static bool isAdvSIMDModImmType10(uint64_t Imm)
Definition AArch64AddressingModes.h:593

llvm::AArch64_AM::getFP16Imm
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
Definition AArch64AddressingModes.h:367

llvm::AArch64_AM::encodeAdvSIMDModImmType8
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
Definition AArch64AddressingModes.h:568

llvm::AArch64_AM::isAdvSIMDModImmType12
static bool isAdvSIMDModImmType12(uint64_t Imm)
Definition AArch64AddressingModes.h:727

llvm::AArch64_AM::encodeAdvSIMDModImmType11
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
Definition AArch64AddressingModes.h:684

llvm::AArch64_AM::isAdvSIMDModImmType11
static bool isAdvSIMDModImmType11(uint64_t Imm)
Definition AArch64AddressingModes.h:677

llvm::AArch64_AM::encodeAdvSIMDModImmType6
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
Definition AArch64AddressingModes.h:533

llvm::AArch64_AM::isAdvSIMDModImmType8
static bool isAdvSIMDModImmType8(uint64_t Imm)
Definition AArch64AddressingModes.h:558

llvm::AArch64_AM::encodeAdvSIMDModImmType4
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
Definition AArch64AddressingModes.h:501

llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition AArch64AddressingModes.h:98

llvm::AArch64_AM::isAdvSIMDModImmType6
static bool isAdvSIMDModImmType6(uint64_t Imm)
Definition AArch64AddressingModes.h:527

llvm::AArch64_AM::encodeAdvSIMDModImmType1
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
Definition AArch64AddressingModes.h:456

llvm::AArch64_AM::encodeAdvSIMDModImmType3
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
Definition AArch64AddressingModes.h:486

llvm::AArch64_AM::isAdvSIMDModImmType2
static bool isAdvSIMDModImmType2(uint64_t Imm)
Definition AArch64AddressingModes.h:466

llvm::AArch64_AM::isAdvSIMDModImmType3
static bool isAdvSIMDModImmType3(uint64_t Imm)
Definition AArch64AddressingModes.h:481

llvm::AArch64_AM::isAdvSIMDModImmType1
static bool isAdvSIMDModImmType1(uint64_t Imm)
Definition AArch64AddressingModes.h:451

llvm::AMDGPUISD::BFM
@ BFM
Definition AMDGPUISelLowering.h:516

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:129

llvm::AMDGPU::P0
@ P0
Definition AMDGPURegBankLegalizeRules.h:61

llvm::ARMISD::CSINC
@ CSINC
Definition ARMISelLowering.h:322

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::CodeModel::Model
Model
Definition CodeGen.h:31

llvm::CodeModel::Large
@ Large
Definition CodeGen.h:31

llvm::CodeModel::Tiny
@ Tiny
Definition CodeGen.h:31

llvm::CodeModel::Small
@ Small
Definition CodeGen.h:31

llvm::ISD::ConstantFP
@ ConstantFP
Definition ISDOpcodes.h:87

llvm::ISD::FrameIndex
@ FrameIndex
Definition ISDOpcodes.h:90

llvm::ISD::Constant
@ Constant
Definition ISDOpcodes.h:86

llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition ISDOpcodes.h:1691

llvm::MIPatternMatch
Definition MIPatternMatch.h:25

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition MIPatternMatch.h:310

llvm::MIPatternMatch::m_GZExt
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
Definition MIPatternMatch.h:705

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition MIPatternMatch.h:102

llvm::MIPatternMatch::m_GAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:550

llvm::MIPatternMatch::m_GOr
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:616

llvm::MIPatternMatch::m_Neg
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
Definition MIPatternMatch.h:928

llvm::MIPatternMatch::m_OneNonDBGUse
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
Definition MIPatternMatch.h:70

llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition MIPatternMatch.h:936

llvm::MIPatternMatch::m_SpecificICst
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
Definition MIPatternMatch.h:213

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition MIPatternMatch.h:28

llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:569

llvm::MIPatternMatch::m_GShl
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:636

llvm::MIPatternMatch::m_any_of
Or< Preds... > m_any_of(Preds &&... preds)
Definition MIPatternMatch.h:354

llvm::MIPatternMatch::m_GAnd
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Definition MIPatternMatch.h:605

llvm::MipsISD::Ext
@ Ext
Definition MipsISelLowering.h:157

llvm::MipsISD::Ins
@ Ins
Definition MipsISelLowering.h:158

llvm::NVPTX::LoadStore
LoadStore
Definition NVPTX.h:145

llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition PPCPredicates.h:26

llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition PPCPredicates.h:87

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:400

llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition MachineInstrBuilder.h:49

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:55

llvm::SPII::Store
@ Store
Definition SparcInstrInfo.h:33

llvm::SPII::Load
@ Load
Definition SparcInstrInfo.h:32

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
Definition X86BaseInfo.h:109

llvm::codeview::SimpleTypeKind::Byte
@ Byte
Definition TypeIndex.h:42

llvm::codeview::FrameCookieKind::Copy
@ Copy
Definition CodeView.h:495

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:409

llvm::dwarf::Index
Index
Definition Dwarf.h:889

llvm::hlsl::rootsig::RegisterType::TReg
@ TReg
Definition HLSLRootSignature.h:31

llvm::logicalview::LVElementKind::Optimized
@ Optimized
Definition LVElement.h:66

llvm::numbers::e
constexpr double e
Definition MathExtras.h:47

llvm::rdf::Instr
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm::sframe::Flags
Flags
Definition SFrame.h:39

llvm::tgtok::Bit
@ Bit
Definition TGLexer.h:78

llvm::tgtok::Bits
@ Bits
Definition TGLexer.h:79

llvm::yaml::NodeKind::Scalar
@ Scalar
Definition YAMLTraits.h:45

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::getFunctionLiveInPhysReg
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:916

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727

llvm::constrainOperandRegClass
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56

llvm::getOpcodeDef
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651

llvm::RegClassOrRegBank
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
Definition MachineRegisterInfo.h:47

llvm::getConstantFPVRegVal
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:369

llvm::getIConstantVRegVal
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294

llvm::Depth
@ Depth
Definition SIMachineScheduler.h:36

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649

llvm::constrainSelectedInstRegOperands
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155

llvm::isPreISelGenericOpcode
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
Definition TargetOpcodes.h:30

llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition AArch64InstrInfo.cpp:10765

llvm::getDefIgnoringCopies
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492

llvm::getIConstantVRegSExtVal
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:314

llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282

llvm::createAArch64InstructionSelector
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
Definition AArch64InstructionSelector.cpp:8033

llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1970

llvm::has_single_bit
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::getAnyConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1123

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548

llvm::Key
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Definition PassManager.h:668

llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition AtomicOrdering.h:56

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565

llvm::getIConstantVRegValWithLookThrough
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433

llvm::getDefSrcRegIgnoringCopies
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467

llvm::getSrcRegIgnoringCopies
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853

raw_ostream.h

llvm::EVT::getFloatingPointVT
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59

llvm::MachinePointerInfo::getConstantPool
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition MachineOperand.cpp:1058