LLVM: lib/Target/AMDGPU/R600ISelLowering.cpp Source File

//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Custom DAG lowering for R600

//

//===----------------------------------------------------------------------===//


#include "R600ISelLowering.h"

#include "AMDGPU.h"

#include "MCTargetDesc/R600MCTargetDesc.h"

#include "R600Defines.h"

#include "R600MachineFunctionInfo.h"

#include "R600Subtarget.h"

#include "R600TargetMachine.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/IR/IntrinsicsR600.h"

#include "llvm/Passes/CodeGenPassBuilder.h"


using namespace llvm;


#include "R600GenCallingConv.inc"


R600TargetLowering::R600TargetLowering(const TargetMachine &TM,

                                       const R600Subtarget &STI)

    : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {

  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);

  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);

  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);

  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);

  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);

  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);


  setBooleanContents(ZeroOrNegativeOneBooleanContent);

  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);


  computeRegisterProperties(Subtarget->getRegisterInfo());


  // Legalize loads and stores to the private address space.

  setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);


  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address

  // spaces, so it is custom lowered to handle those where it isn't.

  for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})

    for (MVT VT : MVT::integer_valuetypes()) {

      setLoadExtAction(Op, VT, MVT::i1, Promote);

      setLoadExtAction(Op, VT, MVT::i8, Custom);

      setLoadExtAction(Op, VT, MVT::i16, Custom);

    }


  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.

  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i32,

                   MVT::v2i1, Expand);


  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v4i32,

                   MVT::v4i1, Expand);


  setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},

                     Custom);


  setTruncStoreAction(MVT::i32, MVT::i8, Custom);

  setTruncStoreAction(MVT::i32, MVT::i16, Custom);

  // We need to include these since trunc STORES to PRIVATE need

  // special handling to accommodate RMW

  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);

  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);

  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);

  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);

  setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);

  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);

  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);

  setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);

  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);

  setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);


  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.

  setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);

  setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);


  // Set condition code actions

  setCondCodeAction({ISD::SETO, ISD::SETUO, ISD::SETLT, ISD::SETLE, ISD::SETOLT,

                     ISD::SETOLE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGE,

                     ISD::SETUGT, ISD::SETULT, ISD::SETULE},

                    MVT::f32, Expand);


  setCondCodeAction({ISD::SETLE, ISD::SETLT, ISD::SETULE, ISD::SETULT},

                    MVT::i32, Expand);


  setOperationAction({ISD::FCOS, ISD::FSIN}, MVT::f32, Custom);


  setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);


  setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);

  setOperationAction(ISD::BRCOND, MVT::Other, Custom);


  setOperationAction(ISD::FSUB, MVT::f32, Expand);


  setOperationAction(ISD::IS_FPCLASS,

                     {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,

                      MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},

                     Expand);


  setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},

                     MVT::f64, Custom);


  setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);


  setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);

  setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},

                     Custom);


  setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},

                     Expand);


  // ADD, SUB overflow.

  // TODO: turn these into Legal?

  if (Subtarget->hasCARRY())

    setOperationAction(ISD::UADDO, MVT::i32, Custom);


  if (Subtarget->hasBORROW())

    setOperationAction(ISD::USUBO, MVT::i32, Custom);


  // Expand sign extension of vectors

  if (!Subtarget->hasBFE())

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);


  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);


  if (!Subtarget->hasBFE())

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);

  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);


  if (!Subtarget->hasBFE())

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);

  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);


  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);

  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);


  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);


  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);


  setOperationAction(ISD::EXTRACT_VECTOR_ELT,

                     {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);


  setOperationAction(ISD::INSERT_VECTOR_ELT,

                     {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);


  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32

  //  to be Legal/Custom in order to avoid library calls.

  setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, MVT::i32,

                     Custom);


  if (!Subtarget->hasFMA())

    setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);


  // FIXME: May need no denormals check

  setOperationAction(ISD::FMAD, MVT::f32, Legal);


  if (!Subtarget->hasBFI())

    // fcopysign can be done in a single instruction with BFI.

    setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);


  if (!Subtarget->hasBCNT(32))

    setOperationAction(ISD::CTPOP, MVT::i32, Expand);


  if (!Subtarget->hasBCNT(64))

    setOperationAction(ISD::CTPOP, MVT::i64, Expand);


  if (Subtarget->hasFFBH())

    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);


  if (Subtarget->hasFFBL())

    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);


  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we

  // need it for R600.

  if (Subtarget->hasBFE())

    setHasExtractBitsInsn(true);


  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

  setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);


  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

  for (MVT VT : ScalarIntVTs)

    setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT,

                       Expand);


  // LLVM will expand these to atomic_cmp_swap(0)

  // and atomic_swap, respectively.

  setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Expand);


  // We need to custom lower some of the intrinsics

  setOperationAction({ISD::INTRINSIC_VOID, ISD::INTRINSIC_WO_CHAIN}, MVT::Other,

                     Custom);


  setSchedulingPreference(Sched::Source);


  setTargetDAGCombine({ISD::FP_ROUND, ISD::FP_TO_SINT, ISD::EXTRACT_VECTOR_ELT,

                       ISD::SELECT_CC, ISD::INSERT_VECTOR_ELT, ISD::LOAD});

}


static inline bool isEOP(MachineBasicBlock::iterator I) {

  if (std::next(I) == I->getParent()->end())

    return false;

  return std::next(I)->getOpcode() == R600::RETURN;

}


MachineBasicBlock *

R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

                                                MachineBasicBlock *BB) const {

  MachineFunction *MF = BB->getParent();

  MachineRegisterInfo &MRI = MF->getRegInfo();

  MachineBasicBlock::iterator I = MI;

  const R600InstrInfo *TII = Subtarget->getInstrInfo();


  switch (MI.getOpcode()) {

  default:

    // Replace LDS_*_RET instruction that don't have any uses with the

    // equivalent LDS_*_NORET instruction.

    if (TII->isLDSRetInstr(MI.getOpcode())) {

      int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);

      assert(DstIdx != -1);

      MachineInstrBuilder NewMI;

      // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add

      //        LDS_1A2D support and remove this special case.

      if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||

          MI.getOpcode() == R600::LDS_CMPST_RET)

        return BB;


      NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),

                      TII->get(R600::getLDSNoRetOp(MI.getOpcode())));

      for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))

        NewMI.add(MO);

    } else {

      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);

    }

    break;


  case R600::FABS_R600: {

    MachineInstr *NewMI = TII->buildDefaultInstruction(

        *BB, I, R600::MOV, MI.getOperand(0).getReg(),

        MI.getOperand(1).getReg());

    TII->addFlag(*NewMI, 0, MO_FLAG_ABS);

    break;

  }


  case R600::FNEG_R600: {

    MachineInstr *NewMI = TII->buildDefaultInstruction(

        *BB, I, R600::MOV, MI.getOperand(0).getReg(),

        MI.getOperand(1).getReg());

    TII->addFlag(*NewMI, 0, MO_FLAG_NEG);

    break;

  }


  case R600::MASK_WRITE: {

    Register maskedRegister = MI.getOperand(0).getReg();

    assert(maskedRegister.isVirtual());

    MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);

    TII->addFlag(*defInstr, 0, MO_FLAG_MASK);

    break;

  }


  case R600::MOV_IMM_F32:

    TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)

                                                            .getFPImm()

                                                            ->getValueAPF()

                                                            .bitcastToAPInt()

                                                            .getZExtValue());

    break;


  case R600::MOV_IMM_I32:

    TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),

                     MI.getOperand(1).getImm());

    break;


  case R600::MOV_IMM_GLOBAL_ADDR: {

    //TODO: Perhaps combine this instruction with the next if possible

    auto MIB = TII->buildDefaultInstruction(

        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);

    int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);

    //TODO: Ugh this is rather ugly

    const MachineOperand &MO = MI.getOperand(1);

    MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),

                                    MO.getTargetFlags());

    break;

  }


  case R600::CONST_COPY: {

    MachineInstr *NewMI = TII->buildDefaultInstruction(

        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);

    TII->setImmOperand(*NewMI, R600::OpName::src0_sel,

                       MI.getOperand(1).getImm());

    break;

  }


  case R600::RAT_WRITE_CACHELESS_32_eg:

  case R600::RAT_WRITE_CACHELESS_64_eg:

  case R600::RAT_WRITE_CACHELESS_128_eg:

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))

        .add(MI.getOperand(0))

        .add(MI.getOperand(1))

        .addImm(isEOP(I)); // Set End of program bit

    break;


  case R600::RAT_STORE_TYPED_eg:

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))

        .add(MI.getOperand(0))

        .add(MI.getOperand(1))

        .add(MI.getOperand(2))

        .addImm(isEOP(I)); // Set End of program bit

    break;


  case R600::BRANCH:

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))

        .add(MI.getOperand(0));

    break;


  case R600::BRANCH_COND_f32: {

    MachineInstr *NewMI =

        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),

                R600::PREDICATE_BIT)

            .add(MI.getOperand(1))

            .addImm(R600::PRED_SETNE)

            .addImm(0); // Flags

    TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))

        .add(MI.getOperand(0))

        .addReg(R600::PREDICATE_BIT, RegState::Kill);

    break;

  }


  case R600::BRANCH_COND_i32: {

    MachineInstr *NewMI =

        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),

                R600::PREDICATE_BIT)

            .add(MI.getOperand(1))

            .addImm(R600::PRED_SETNE_INT)

            .addImm(0); // Flags

    TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))

        .add(MI.getOperand(0))

        .addReg(R600::PREDICATE_BIT, RegState::Kill);

    break;

  }


  case R600::EG_ExportSwz:

  case R600::R600_ExportSwz: {

    // Instruction is left unmodified if its not the last one of its type

    bool isLastInstructionOfItsType = true;

    unsigned InstExportType = MI.getOperand(1).getImm();

    for (MachineBasicBlock::iterator NextExportInst = std::next(I),

         EndBlock = BB->end(); NextExportInst != EndBlock;

         NextExportInst = std::next(NextExportInst)) {

      if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||

          NextExportInst->getOpcode() == R600::R600_ExportSwz) {

        unsigned CurrentInstExportType = NextExportInst->getOperand(1)

            .getImm();

        if (CurrentInstExportType == InstExportType) {

          isLastInstructionOfItsType = false;

          break;

        }

      }

    }

    bool EOP = isEOP(I);

    if (!EOP && !isLastInstructionOfItsType)

      return BB;

    unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))

        .add(MI.getOperand(0))

        .add(MI.getOperand(1))

        .add(MI.getOperand(2))

        .add(MI.getOperand(3))

        .add(MI.getOperand(4))

        .add(MI.getOperand(5))

        .add(MI.getOperand(6))

        .addImm(CfInst)

        .addImm(EOP);

    break;

  }

  case R600::RETURN: {

    return BB;

  }

  }


  MI.eraseFromParent();

  return BB;

}


//===----------------------------------------------------------------------===//

// Custom DAG Lowering Operations

//===----------------------------------------------------------------------===//


SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();

  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();

  switch (Op.getOpcode()) {

  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);

  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);

  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);

  case ISD::SHL_PARTS:

  case ISD::SRA_PARTS:

  case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);

  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);

  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);

  case ISD::FCOS:

  case ISD::FSIN: return LowerTrig(Op, DAG);

  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

  case ISD::STORE: return LowerSTORE(Op, DAG);

  case ISD::LOAD: {

    SDValue Result = LowerLOAD(Op, DAG);

    assert((!Result.getNode() ||

            Result.getNode()->getNumValues() == 2) &&

           "Load should return a value and a chain");

    return Result;

  }


  case ISD::BRCOND: return LowerBRCOND(Op, DAG);

  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);

  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);

  case ISD::ADDRSPACECAST:

    return lowerADDRSPACECAST(Op, DAG);

  case ISD::INTRINSIC_VOID: {

    SDValue Chain = Op.getOperand(0);

    unsigned IntrinsicID = Op.getConstantOperandVal(1);

    switch (IntrinsicID) {

    case Intrinsic::r600_store_swizzle: {

      SDLoc DL(Op);

      const SDValue Args[8] = {

        Chain,

        Op.getOperand(2), // Export Value

        Op.getOperand(3), // ArrayBase

        Op.getOperand(4), // Type

        DAG.getConstant(0, DL, MVT::i32), // SWZ_X

        DAG.getConstant(1, DL, MVT::i32), // SWZ_Y

        DAG.getConstant(2, DL, MVT::i32), // SWZ_Z

        DAG.getConstant(3, DL, MVT::i32) // SWZ_W

      };

      return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);

    }


    // default for switch(IntrinsicID)

    default: break;

    }

    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())

    break;

  }

  case ISD::INTRINSIC_WO_CHAIN: {

    unsigned IntrinsicID = Op.getConstantOperandVal(0);

    EVT VT = Op.getValueType();

    SDLoc DL(Op);

    switch (IntrinsicID) {

    case Intrinsic::r600_tex:

    case Intrinsic::r600_texc: {

      unsigned TextureOp;

      switch (IntrinsicID) {

      case Intrinsic::r600_tex:

        TextureOp = 0;

        break;

      case Intrinsic::r600_texc:

        TextureOp = 1;

        break;

      default:

        llvm_unreachable("unhandled texture operation");

      }


      SDValue TexArgs[19] = {

        DAG.getConstant(TextureOp, DL, MVT::i32),

        Op.getOperand(1),

        DAG.getConstant(0, DL, MVT::i32),

        DAG.getConstant(1, DL, MVT::i32),

        DAG.getConstant(2, DL, MVT::i32),

        DAG.getConstant(3, DL, MVT::i32),

        Op.getOperand(2),

        Op.getOperand(3),

        Op.getOperand(4),

        DAG.getConstant(0, DL, MVT::i32),

        DAG.getConstant(1, DL, MVT::i32),

        DAG.getConstant(2, DL, MVT::i32),

        DAG.getConstant(3, DL, MVT::i32),

        Op.getOperand(5),

        Op.getOperand(6),

        Op.getOperand(7),

        Op.getOperand(8),

        Op.getOperand(9),

        Op.getOperand(10)

      };

      return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);

    }

    case Intrinsic::r600_dot4: {

      SDValue Args[8] = {

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(0, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(0, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(1, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(1, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(2, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(2, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(3, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(3, DL, MVT::i32))

      };

      return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);

    }


    case Intrinsic::r600_implicitarg_ptr: {

      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);

      uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);

      return DAG.getConstant(ByteOffset, DL, PtrVT);

    }

    case Intrinsic::r600_read_ngroups_x:

      return LowerImplicitParameter(DAG, VT, DL, 0);

    case Intrinsic::r600_read_ngroups_y:

      return LowerImplicitParameter(DAG, VT, DL, 1);

    case Intrinsic::r600_read_ngroups_z:

      return LowerImplicitParameter(DAG, VT, DL, 2);

    case Intrinsic::r600_read_global_size_x:

      return LowerImplicitParameter(DAG, VT, DL, 3);

    case Intrinsic::r600_read_global_size_y:

      return LowerImplicitParameter(DAG, VT, DL, 4);

    case Intrinsic::r600_read_global_size_z:

      return LowerImplicitParameter(DAG, VT, DL, 5);

    case Intrinsic::r600_read_local_size_x:

      return LowerImplicitParameter(DAG, VT, DL, 6);

    case Intrinsic::r600_read_local_size_y:

      return LowerImplicitParameter(DAG, VT, DL, 7);

    case Intrinsic::r600_read_local_size_z:

      return LowerImplicitParameter(DAG, VT, DL, 8);


    case Intrinsic::r600_read_tgid_x:

    case Intrinsic::amdgcn_workgroup_id_x:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T1_X, VT);

    case Intrinsic::r600_read_tgid_y:

    case Intrinsic::amdgcn_workgroup_id_y:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T1_Y, VT);

    case Intrinsic::r600_read_tgid_z:

    case Intrinsic::amdgcn_workgroup_id_z:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T1_Z, VT);

    case Intrinsic::r600_read_tidig_x:

    case Intrinsic::amdgcn_workitem_id_x:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T0_X, VT);

    case Intrinsic::r600_read_tidig_y:

    case Intrinsic::amdgcn_workitem_id_y:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T0_Y, VT);

    case Intrinsic::r600_read_tidig_z:

    case Intrinsic::amdgcn_workitem_id_z:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T0_Z, VT);


    case Intrinsic::r600_recipsqrt_ieee:

      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));


    case Intrinsic::r600_recipsqrt_clamped:

      return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));

    default:

      return Op;

    }


    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())

    break;

  }

  } // end switch(Op.getOpcode())

  return SDValue();

}


void R600TargetLowering::ReplaceNodeResults(SDNode *N,

                                            SmallVectorImpl<SDValue> &Results,

                                            SelectionDAG &DAG) const {

  switch (N->getOpcode()) {

  default:

    AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);

    return;

  case ISD::FP_TO_UINT:

    if (N->getValueType(0) == MVT::i1) {

      Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));

      return;

    }

    // Since we don't care about out of bounds values we can use FP_TO_SINT for

    // uints too. The DAGLegalizer code for uint considers some extra cases

    // which are not necessary here.

    [[fallthrough]];

  case ISD::FP_TO_SINT: {

    if (N->getValueType(0) == MVT::i1) {

      Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));

      return;

    }


    SDValue Result;

    if (expandFP_TO_SINT(N, Result, DAG))

      Results.push_back(Result);

    return;

  }

  case ISD::SDIVREM: {

    SDValue Op = SDValue(N, 1);

    SDValue RES = LowerSDIVREM(Op, DAG);

    Results.push_back(RES);

    Results.push_back(RES.getValue(1));

    break;

  }

  case ISD::UDIVREM: {

    SDValue Op = SDValue(N, 0);

    LowerUDIVREM64(Op, DAG, Results);

    break;

  }

  }

}


SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,

                                                   SDValue Vector) const {

  SDLoc DL(Vector);

  EVT VecVT = Vector.getValueType();

  EVT EltVT = VecVT.getVectorElementType();

  SmallVector<SDValue, 8> Args;


  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {

    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,

                               DAG.getVectorIdxConstant(i, DL)));

  }


  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);

}


SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,

                                                    SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Vector = Op.getOperand(0);

  SDValue Index = Op.getOperand(1);


  if (isa<ConstantSDNode>(Index) ||

      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)

    return Op;


  Vector = vectorToVerticalVector(DAG, Vector);

  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),

                     Vector, Index);

}


SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

                                                   SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Vector = Op.getOperand(0);

  SDValue Value = Op.getOperand(1);

  SDValue Index = Op.getOperand(2);


  if (isa<ConstantSDNode>(Index) ||

      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)

    return Op;


  Vector = vectorToVerticalVector(DAG, Vector);

  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),

                               Vector, Value, Index);

  return vectorToVerticalVector(DAG, Insert);

}


SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,

                                               SDValue Op,

                                               SelectionDAG &DAG) const {

  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);

  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)

    return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);


  const DataLayout &DL = DAG.getDataLayout();

  const GlobalValue *GV = GSD->getGlobal();

  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);


  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);

  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);

}


SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {

  // On hw >= R700, COS/SIN input must be between -1. and 1.

  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)

  EVT VT = Op.getValueType();

  SDValue Arg = Op.getOperand(0);

  SDLoc DL(Op);


  // TODO: Should this propagate fast-math-flags?

  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,

      DAG.getNode(ISD::FADD, DL, VT,

        DAG.getNode(ISD::FMUL, DL, VT, Arg,

          DAG.getConstantFP(0.15915494309, DL, MVT::f32)),

        DAG.getConstantFP(0.5, DL, MVT::f32)));

  unsigned TrigNode;

  switch (Op.getOpcode()) {

  case ISD::FCOS:

    TrigNode = AMDGPUISD::COS_HW;

    break;

  case ISD::FSIN:

    TrigNode = AMDGPUISD::SIN_HW;

    break;

  default:

    llvm_unreachable("Wrong trig opcode");

  }

  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,

      DAG.getNode(ISD::FADD, DL, VT, FractPart,

        DAG.getConstantFP(-0.5, DL, MVT::f32)));

  if (Gen >= AMDGPUSubtarget::R700)

    return TrigVal;

  // On R600 hw, COS/SIN input must be between -Pi and Pi.

  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,

      DAG.getConstantFP(numbers::pif, DL, MVT::f32));

}


SDValue R600TargetLowering::LowerShiftParts(SDValue Op,

                                            SelectionDAG &DAG) const {

  SDValue Lo, Hi;

  expandShiftParts(Op.getNode(), Lo, Hi, DAG);

  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));

}


SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,

                                          unsigned mainop, unsigned ovf) const {

  SDLoc DL(Op);

  EVT VT = Op.getValueType();


  SDValue Lo = Op.getOperand(0);

  SDValue Hi = Op.getOperand(1);


  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);

  // Extend sign.

  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,

                    DAG.getValueType(MVT::i1));


  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);


  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);

}


SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  return DAG.getNode(

      ISD::SETCC,

      DL,

      MVT::i1,

      Op, DAG.getConstantFP(1.0f, DL, MVT::f32),

      DAG.getCondCode(ISD::SETEQ));

}


SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  return DAG.getNode(

      ISD::SETCC,

      DL,

      MVT::i1,

      Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),

      DAG.getCondCode(ISD::SETEQ));

}


SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,

                                                   const SDLoc &DL,

                                                   unsigned DwordOffset) const {

  unsigned ByteOffset = DwordOffset * 4;

  PointerType *PtrType =

      PointerType::get(*DAG.getContext(), AMDGPUAS::PARAM_I_ADDRESS);


  // We shouldn't be using an offset wider than 16-bits for implicit parameters.

  assert(isInt<16>(ByteOffset));


  return DAG.getLoad(VT, DL, DAG.getEntryNode(),

                     DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR

                     MachinePointerInfo(ConstantPointerNull::get(PtrType)));

}


bool R600TargetLowering::isZero(SDValue Op) const {

  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))

    return Cst->isZero();

  if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))

    return CstFP->isZero();

  return false;

}


bool R600TargetLowering::isHWTrueValue(SDValue Op) const {

  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {

    return CFP->isExactlyValue(1.0);

  }

  return isAllOnesConstant(Op);

}


bool R600TargetLowering::isHWFalseValue(SDValue Op) const {

  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {

    return CFP->getValueAPF().isZero();

  }

  return isNullConstant(Op);

}


SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  EVT VT = Op.getValueType();


  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);

  SDValue True = Op.getOperand(2);

  SDValue False = Op.getOperand(3);

  SDValue CC = Op.getOperand(4);

  SDValue Temp;


  if (VT == MVT::f32) {

    DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);

    SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);

    if (MinMax)

      return MinMax;

  }


  // LHS and RHS are guaranteed to be the same value type

  EVT CompareVT = LHS.getValueType();


  // Check if we can lower this to a native operation.


  // Try to lower to a SET* instruction:

  //

  // SET* can match the following patterns:

  //

  // select_cc f32, f32, -1,  0, cc_supported

  // select_cc f32, f32, 1.0f, 0.0f, cc_supported

  // select_cc i32, i32, -1,  0, cc_supported

  //


  // Move hardware True/False values to the correct operand.

  if (isHWTrueValue(False) && isHWFalseValue(True)) {

    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();

    ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);

    if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {

      std::swap(False, True);

      CC = DAG.getCondCode(InverseCC);

    } else {

      ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);

      if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {

        std::swap(False, True);

        std::swap(LHS, RHS);

        CC = DAG.getCondCode(SwapInvCC);

      }

    }

  }


  if (isHWTrueValue(True) && isHWFalseValue(False) &&

      (CompareVT == VT || VT == MVT::i32)) {

    // This can be matched by a SET* instruction.

    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);

  }


  // Try to lower to a CND* instruction:

  //

  // CND* can match the following patterns:

  //

  // select_cc f32, 0.0, f32, f32, cc_supported

  // select_cc f32, 0.0, i32, i32, cc_supported

  // select_cc i32, 0,   f32, f32, cc_supported

  // select_cc i32, 0,   i32, i32, cc_supported

  //


  // Try to move the zero value to the RHS

  if (isZero(LHS)) {

    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();

    // Try swapping the operands

    ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);

    if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {

      std::swap(LHS, RHS);

      CC = DAG.getCondCode(CCSwapped);

    } else {

      // Try inverting the condition and then swapping the operands

      ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);

      CCSwapped = ISD::getSetCCSwappedOperands(CCInv);

      if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {

        std::swap(True, False);

        std::swap(LHS, RHS);

        CC = DAG.getCondCode(CCSwapped);

      }

    }

  }

  if (isZero(RHS)) {

    SDValue Cond = LHS;

    SDValue Zero = RHS;

    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();

    if (CompareVT != VT) {

      // Bitcast True / False to the correct types.  This will end up being

      // a nop, but it allows us to define only a single pattern in the

      // .TD files for each CND* instruction rather than having to have

      // one pattern for integer True/False and one for fp True/False

      True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);

      False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);

    }


    switch (CCOpcode) {

    case ISD::SETONE:

    case ISD::SETUNE:

    case ISD::SETNE:

      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);

      Temp = True;

      True = False;

      False = Temp;

      break;

    default:

      break;

    }

    SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,

        Cond, Zero,

        True, False,

        DAG.getCondCode(CCOpcode));

    return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);

  }


  // If we make it this for it means we have no native instructions to handle

  // this SELECT_CC, so we must lower it.

  SDValue HWTrue, HWFalse;


  if (CompareVT == MVT::f32) {

    HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);

    HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);

  } else if (CompareVT == MVT::i32) {

    HWTrue = DAG.getAllOnesConstant(DL, CompareVT);

    HWFalse = DAG.getConstant(0, DL, CompareVT);

  }

  else {

    llvm_unreachable("Unhandled value type in LowerSELECT_CC");

  }


  // Lower this unsupported SELECT_CC into a combination of two supported

  // SELECT_CC operations.

  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);


  return DAG.getNode(ISD::SELECT_CC, DL, VT,

      Cond, HWFalse,

      True, False,

      DAG.getCondCode(ISD::SETNE));

}


SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,

                                               SelectionDAG &DAG) const {

  SDLoc SL(Op);

  EVT VT = Op.getValueType();


  const R600TargetMachine &TM =

      static_cast<const R600TargetMachine &>(getTargetMachine());


  const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);

  unsigned SrcAS = ASC->getSrcAddressSpace();

  unsigned DestAS = ASC->getDestAddressSpace();


  if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)

    return DAG.getSignedConstant(TM.getNullPointerValue(DestAS), SL, VT);


  return Op;

}


/// LLVM generates byte-addressed pointers.  For indirect addressing, we need to

/// convert these pointers to a register index.  Each register holds

/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the

/// \p StackWidth, which tells us how many of the 4 sub-registers will be used

/// for indirect addressing.

SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,

                                               unsigned StackWidth,

                                               SelectionDAG &DAG) const {

  unsigned SRLPad;

  switch(StackWidth) {

  case 1:

    SRLPad = 2;

    break;

  case 2:

    SRLPad = 3;

    break;

  case 4:

    SRLPad = 4;

    break;

  default: llvm_unreachable("Invalid stack width");

  }


  SDLoc DL(Ptr);

  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,

                     DAG.getConstant(SRLPad, DL, MVT::i32));

}


void R600TargetLowering::getStackAddress(unsigned StackWidth,

                                         unsigned ElemIdx,

                                         unsigned &Channel,

                                         unsigned &PtrIncr) const {

  switch (StackWidth) {

  default:

  case 1:

    Channel = 0;

    if (ElemIdx > 0) {

      PtrIncr = 1;

    } else {

      PtrIncr = 0;

    }

    break;

  case 2:

    Channel = ElemIdx % 2;

    if (ElemIdx == 2) {

      PtrIncr = 1;

    } else {

      PtrIncr = 0;

    }

    break;

  case 4:

    Channel = ElemIdx;

    PtrIncr = 0;

    break;

  }

}


SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,

                                                   SelectionDAG &DAG) const {

  SDLoc DL(Store);

  //TODO: Who creates the i8 stores?

  assert(Store->isTruncatingStore()

         || Store->getValue().getValueType() == MVT::i8);

  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);


  SDValue Mask;

  if (Store->getMemoryVT() == MVT::i8) {

    assert(Store->getAlign() >= 1);

    Mask = DAG.getConstant(0xff, DL, MVT::i32);

  } else if (Store->getMemoryVT() == MVT::i16) {

    assert(Store->getAlign() >= 2);

    Mask = DAG.getConstant(0xffff, DL, MVT::i32);

  } else {

    llvm_unreachable("Unsupported private trunc store");

  }


  SDValue OldChain = Store->getChain();

  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);

  // Skip dummy

  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;

  SDValue BasePtr = Store->getBasePtr();

  SDValue Offset = Store->getOffset();

  EVT MemVT = Store->getMemoryVT();


  SDValue LoadPtr = BasePtr;

  if (!Offset.isUndef()) {

    LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);

  }


  // Get dword location

  // TODO: this should be eliminated by the future SHR ptr, 2

  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,

                            DAG.getConstant(0xfffffffc, DL, MVT::i32));


  // Load dword

  // TODO: can we be smarter about machine pointer info?

  MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);

  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);


  Chain = Dst.getValue(1);


  // Get offset in dword

  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,

                                DAG.getConstant(0x3, DL, MVT::i32));


  // Convert byte offset to bit shift

  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,

                                 DAG.getConstant(3, DL, MVT::i32));


  // TODO: Contrary to the name of the function,

  // it also handles sub i32 non-truncating stores (like i1)

  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,

                                  Store->getValue());


  // Mask the value to the right type

  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);


  // Shift the value in place

  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,

                                     MaskedValue, ShiftAmt);


  // Shift the mask in place

  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);


  // Invert the mask. NOTE: if we had native ROL instructions we could

  // use inverted mask

  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);


  // Cleanup the target bits

  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);


  // Add the new bits

  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);


  // Store dword

  // TODO: Can we be smarter about MachinePointerInfo?

  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);


  // If we are part of expanded vector, make our neighbors depend on this store

  if (VectorTrunc) {

    // Make all other vector elements depend on this store

    Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);

    DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);

  }

  return NewStore;

}


SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {

  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);

  unsigned AS = StoreNode->getAddressSpace();


  SDValue Chain = StoreNode->getChain();

  SDValue Ptr = StoreNode->getBasePtr();

  SDValue Value = StoreNode->getValue();


  EVT VT = Value.getValueType();

  EVT MemVT = StoreNode->getMemoryVT();

  EVT PtrVT = Ptr.getValueType();


  SDLoc DL(Op);


  const bool TruncatingStore = StoreNode->isTruncatingStore();


  // Neither LOCAL nor PRIVATE can do vectors at the moment

  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||

       TruncatingStore) &&

      VT.isVector()) {

    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {

      // Add an extra level of chain to isolate this vector

      SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);

      // TODO: can the chain be replaced without creating a new store?

      SDValue NewStore = DAG.getTruncStore(

          NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,

          StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),

          StoreNode->getAAInfo());

      StoreNode = cast<StoreSDNode>(NewStore);

    }


    return scalarizeVectorStore(StoreNode, DAG);

  }


  Align Alignment = StoreNode->getAlign();

  if (Alignment < MemVT.getStoreSize() &&

      !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,

                                      StoreNode->getMemOperand()->getFlags(),

                                      nullptr)) {

    return expandUnalignedStore(StoreNode, DAG);

  }


  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,

                                  DAG.getConstant(2, DL, PtrVT));


  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {

    // It is beneficial to create MSKOR here instead of combiner to avoid

    // artificial dependencies introduced by RMW

    if (TruncatingStore) {

      assert(VT.bitsLE(MVT::i32));

      SDValue MaskConstant;

      if (MemVT == MVT::i8) {

        MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);

      } else {

        assert(MemVT == MVT::i16);

        assert(StoreNode->getAlign() >= 2);

        MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);

      }


      SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,

                                      DAG.getConstant(0x00000003, DL, PtrVT));

      SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,

                                     DAG.getConstant(3, DL, VT));


      // Put the mask in correct place

      SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);


      // Put the value bits in correct place

      SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);

      SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);


      // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32

      // vector instead.

      SDValue Src[4] = {

        ShiftedValue,

        DAG.getConstant(0, DL, MVT::i32),

        DAG.getConstant(0, DL, MVT::i32),

        Mask

      };

      SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);

      SDValue Args[3] = { Chain, Input, DWordAddr };

      return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,

                                     Op->getVTList(), Args, MemVT,

                                     StoreNode->getMemOperand());

    }

    if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {

      // Convert pointer from byte address to dword address.

      Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);


      if (StoreNode->isIndexed()) {

        llvm_unreachable("Indexed stores not supported yet");

      } else {

        Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());

      }

      return Chain;

    }

  }


  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes

  if (AS != AMDGPUAS::PRIVATE_ADDRESS)

    return SDValue();


  if (MemVT.bitsLT(MVT::i32))

    return lowerPrivateTruncStore(StoreNode, DAG);


  // Standard i32+ store, tag it with DWORDADDR to note that the address

  // has been shifted

  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {

    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);

    return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());

  }


  // Tagged i32+ stores will be matched by patterns

  return SDValue();

}


// return (512 + (kc_bank << 12)

static int

ConstantAddressBlock(unsigned AddressSpace) {

  switch (AddressSpace) {

  case AMDGPUAS::CONSTANT_BUFFER_0:

    return 512;

  case AMDGPUAS::CONSTANT_BUFFER_1:

    return 512 + 4096;

  case AMDGPUAS::CONSTANT_BUFFER_2:

    return 512 + 4096 * 2;

  case AMDGPUAS::CONSTANT_BUFFER_3:

    return 512 + 4096 * 3;

  case AMDGPUAS::CONSTANT_BUFFER_4:

    return 512 + 4096 * 4;

  case AMDGPUAS::CONSTANT_BUFFER_5:

    return 512 + 4096 * 5;

  case AMDGPUAS::CONSTANT_BUFFER_6:

    return 512 + 4096 * 6;

  case AMDGPUAS::CONSTANT_BUFFER_7:

    return 512 + 4096 * 7;

  case AMDGPUAS::CONSTANT_BUFFER_8:

    return 512 + 4096 * 8;

  case AMDGPUAS::CONSTANT_BUFFER_9:

    return 512 + 4096 * 9;

  case AMDGPUAS::CONSTANT_BUFFER_10:

    return 512 + 4096 * 10;

  case AMDGPUAS::CONSTANT_BUFFER_11:

    return 512 + 4096 * 11;

  case AMDGPUAS::CONSTANT_BUFFER_12:

    return 512 + 4096 * 12;

  case AMDGPUAS::CONSTANT_BUFFER_13:

    return 512 + 4096 * 13;

  case AMDGPUAS::CONSTANT_BUFFER_14:

    return 512 + 4096 * 14;

  case AMDGPUAS::CONSTANT_BUFFER_15:

    return 512 + 4096 * 15;

  default:

    return -1;

  }

}


SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,

                                                SelectionDAG &DAG) const {

  SDLoc DL(Op);

  LoadSDNode *Load = cast<LoadSDNode>(Op);

  ISD::LoadExtType ExtType = Load->getExtensionType();

  EVT MemVT = Load->getMemoryVT();

  assert(Load->getAlign() >= MemVT.getStoreSize());


  SDValue BasePtr = Load->getBasePtr();

  SDValue Chain = Load->getChain();

  SDValue Offset = Load->getOffset();


  SDValue LoadPtr = BasePtr;

  if (!Offset.isUndef()) {

    LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);

  }


  // Get dword location

  // NOTE: this should be eliminated by the future SHR ptr, 2

  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,

                            DAG.getConstant(0xfffffffc, DL, MVT::i32));


  // Load dword

  // TODO: can we be smarter about machine pointer info?

  MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);

  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);


  // Get offset within the register.

  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,

                                LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));


  // Bit offset of target byte (byteIdx * 8).

  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,

                                 DAG.getConstant(3, DL, MVT::i32));


  // Shift to the right.

  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);


  // Eliminate the upper bits by setting them to ...

  EVT MemEltVT = MemVT.getScalarType();


  if (ExtType == ISD::SEXTLOAD) { // ... ones.

    SDValue MemEltVTNode = DAG.getValueType(MemEltVT);

    Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);

  } else { // ... or zeros.

    Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);

  }


  SDValue Ops[] = {

    Ret,

    Read.getValue(1) // This should be our output chain

  };


  return DAG.getMergeValues(Ops, DL);

}


SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {

  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);

  unsigned AS = LoadNode->getAddressSpace();

  EVT MemVT = LoadNode->getMemoryVT();

  ISD::LoadExtType ExtType = LoadNode->getExtensionType();


  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&

      ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {

    return lowerPrivateExtLoad(Op, DAG);

  }


  SDLoc DL(Op);

  EVT VT = Op.getValueType();

  SDValue Chain = LoadNode->getChain();

  SDValue Ptr = LoadNode->getBasePtr();


  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||

      LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&

      VT.isVector()) {

    SDValue Ops[2];

    std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);

    return DAG.getMergeValues(Ops, DL);

  }


  // This is still used for explicit load from addrspace(8)

  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());

  if (ConstantBlock > -1 &&

      ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||

       (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {

    SDValue Result;

    if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||

        isa<ConstantSDNode>(Ptr)) {

      return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);

    }

    // TODO: Does this even work?

    //  non-constant ptr can't be folded, keeps it as a v4f32 load

    Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,

                         DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,

                                     DAG.getConstant(4, DL, MVT::i32)),

                         DAG.getConstant(LoadNode->getAddressSpace() -

                                             AMDGPUAS::CONSTANT_BUFFER_0,

                                         DL, MVT::i32));


    if (!VT.isVector()) {

      Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,

                           DAG.getConstant(0, DL, MVT::i32));

    }


    SDValue MergedValues[2] = {

      Result,

      Chain

    };

    return DAG.getMergeValues(MergedValues, DL);

  }


  // For most operations returning SDValue() will result in the node being

  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we

  // need to manually expand loads that may be legal in some address spaces and

  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for

  // compute shaders, since the data is sign extended when it is uploaded to the

  // buffer. However SEXT loads from other address spaces are not supported, so

  // we need to expand them here.

  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {

    assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));

    SDValue NewLoad = DAG.getExtLoad(

        ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,

        LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());

    SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,

                              DAG.getValueType(MemVT));


    SDValue MergedValues[2] = { Res, Chain };

    return DAG.getMergeValues(MergedValues, DL);

  }


  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {

    return SDValue();

  }


  // DWORDADDR ISD marks already shifted address

  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {

    assert(VT == MVT::i32);

    Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));

    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);

    return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());

  }

  return SDValue();

}


SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {

  SDValue Chain = Op.getOperand(0);

  SDValue Cond  = Op.getOperand(1);

  SDValue Jump  = Op.getOperand(2);


  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),

                     Chain, Jump, Cond);

}


SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,

                                            SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();

  const R600FrameLowering *TFL = Subtarget->getFrameLowering();


  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);


  unsigned FrameIndex = FIN->getIndex();

  Register IgnoredFrameReg;

  StackOffset Offset =

      TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);

  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),

                         SDLoc(Op), Op.getValueType());

}


CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,

                                                  bool IsVarArg) const {

  switch (CC) {

  case CallingConv::AMDGPU_KERNEL:

  case CallingConv::SPIR_KERNEL:

  case CallingConv::C:

  case CallingConv::Fast:

  case CallingConv::Cold:

    llvm_unreachable("kernels should not be handled here");

  case CallingConv::AMDGPU_VS:

  case CallingConv::AMDGPU_GS:

  case CallingConv::AMDGPU_PS:

  case CallingConv::AMDGPU_CS:

  case CallingConv::AMDGPU_HS:

  case CallingConv::AMDGPU_ES:

  case CallingConv::AMDGPU_LS:

    return CC_R600;

  default:

    reportFatalUsageError("unsupported calling convention");

  }

}


/// XXX Only kernel functions are supported, so we can assume for now that

/// every function is a kernel function, but in the future we should use

/// separate calling conventions for kernel and non-kernel functions.

SDValue R600TargetLowering::LowerFormalArguments(

    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,

    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

  SmallVector<CCValAssign, 16> ArgLocs;

  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

                 *DAG.getContext());

  MachineFunction &MF = DAG.getMachineFunction();


  if (AMDGPU::isShader(CallConv)) {

    CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));

  } else {

    analyzeFormalArgumentsCompute(CCInfo, Ins);

  }


  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {

    CCValAssign &VA = ArgLocs[i];

    const ISD::InputArg &In = Ins[i];

    EVT VT = In.VT;

    EVT MemVT = VA.getLocVT();

    if (!VT.isVector() && MemVT.isVector()) {

      // Get load source type if scalarized.

      MemVT = MemVT.getVectorElementType();

    }


    if (AMDGPU::isShader(CallConv)) {

      Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);

      SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);

      InVals.push_back(Register);

      continue;

    }


    // i64 isn't a legal type, so the register type used ends up as i32, which

    // isn't expected here. It attempts to create this sextload, but it ends up

    // being invalid. Somehow this seems to work with i64 arguments, but breaks

    // for <1 x i64>.


    // The first 36 bytes of the input buffer contains information about

    // thread group and global sizes.

    ISD::LoadExtType Ext = ISD::NON_EXTLOAD;

    if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {

      // FIXME: This should really check the extload type, but the handling of

      // extload vector parameters seems to be broken.


      // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;

      Ext = ISD::SEXTLOAD;

    }


    // Compute the offset from the value.

    // XXX - I think PartOffset should give you this, but it seems to give the

    // size of the register which isn't useful.


    unsigned PartOffset = VA.getLocMemOffset();

    Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);


    MachinePointerInfo PtrInfo(AMDGPUAS::PARAM_I_ADDRESS);

    SDValue Arg = DAG.getLoad(

        ISD::UNINDEXED, Ext, VT, DL, Chain,

        DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),

        PtrInfo,

        MemVT, Alignment, MachineMemOperand::MONonTemporal |

                                        MachineMemOperand::MODereferenceable |

                                        MachineMemOperand::MOInvariant);


    InVals.push_back(Arg);

  }

  return Chain;

}


EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,

                                           EVT VT) const {

   if (!VT.isVector())

     return MVT::i32;

   return VT.changeVectorElementTypeToInteger();

}


bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,

                                          const MachineFunction &MF) const {

  // Local and Private addresses do not handle vectors. Limit to i32

  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {

    return (MemVT.getSizeInBits() <= 32);

  }

  return true;

}


bool R600TargetLowering::allowsMisalignedMemoryAccesses(

    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,

    unsigned *IsFast) const {

  if (IsFast)

    *IsFast = 0;


  if (!VT.isSimple() || VT == MVT::Other)

    return false;


  if (VT.bitsLT(MVT::i32))

    return false;


  // TODO: This is a rough estimate.

  if (IsFast)

    *IsFast = 1;


  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);

}


static SDValue CompactSwizzlableVector(

  SelectionDAG &DAG, SDValue VectorEntry,

  DenseMap<unsigned, unsigned> &RemapSwizzle) {

  assert(RemapSwizzle.empty());


  SDLoc DL(VectorEntry);

  EVT EltTy = VectorEntry.getValueType().getVectorElementType();


  SDValue NewBldVec[4];

  for (unsigned i = 0; i < 4; i++)

    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,

                               DAG.getIntPtrConstant(i, DL));


  for (unsigned i = 0; i < 4; i++) {

    if (NewBldVec[i].isUndef())

      // We mask write here to teach later passes that the ith element of this

      // vector is undef. Thus we can use it to reduce 128 bits reg usage,

      // break false dependencies and additionally make assembly easier to read.

      RemapSwizzle[i] = 7; // SEL_MASK_WRITE

    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {

      if (C->isZero()) {

        RemapSwizzle[i] = 4; // SEL_0

        NewBldVec[i] = DAG.getUNDEF(MVT::f32);

      } else if (C->isExactlyValue(1.0)) {

        RemapSwizzle[i] = 5; // SEL_1

        NewBldVec[i] = DAG.getUNDEF(MVT::f32);

      }

    }


    if (NewBldVec[i].isUndef())

      continue;


    for (unsigned j = 0; j < i; j++) {

      if (NewBldVec[i] == NewBldVec[j]) {

        NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());

        RemapSwizzle[i] = j;

        break;

      }

    }

  }


  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),

                            NewBldVec);

}


static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,

                                DenseMap<unsigned, unsigned> &RemapSwizzle) {

  assert(RemapSwizzle.empty());


  SDLoc DL(VectorEntry);

  EVT EltTy = VectorEntry.getValueType().getVectorElementType();


  SDValue NewBldVec[4];

  bool isUnmovable[4] = {false, false, false, false};

  for (unsigned i = 0; i < 4; i++)

    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,

                               DAG.getIntPtrConstant(i, DL));


  for (unsigned i = 0; i < 4; i++) {

    RemapSwizzle[i] = i;

    if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

      unsigned Idx = NewBldVec[i].getConstantOperandVal(1);

      if (i == Idx)

        isUnmovable[Idx] = true;

    }

  }


  for (unsigned i = 0; i < 4; i++) {

    if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

      unsigned Idx = NewBldVec[i].getConstantOperandVal(1);

      if (isUnmovable[Idx])

        continue;

      // Swap i and Idx

      std::swap(NewBldVec[Idx], NewBldVec[i]);

      std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);

      break;

    }

  }


  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),

                            NewBldVec);

}


SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],

                                            SelectionDAG &DAG,

                                            const SDLoc &DL) const {

  // Old -> New swizzle values

  DenseMap<unsigned, unsigned> SwizzleRemap;


  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);

  for (unsigned i = 0; i < 4; i++) {

    unsigned Idx = Swz[i]->getAsZExtVal();

    auto It = SwizzleRemap.find(Idx);

    if (It != SwizzleRemap.end())

      Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);

  }


  SwizzleRemap.clear();

  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);

  for (unsigned i = 0; i < 4; i++) {

    unsigned Idx = Swz[i]->getAsZExtVal();

    auto It = SwizzleRemap.find(Idx);

    if (It != SwizzleRemap.end())

      Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);

  }


  return BuildVector;

}


SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,

                                            SelectionDAG &DAG) const {

  SDLoc DL(LoadNode);

  EVT VT = LoadNode->getValueType(0);

  SDValue Chain = LoadNode->getChain();

  SDValue Ptr = LoadNode->getBasePtr();

  assert (isa<ConstantSDNode>(Ptr));


  //TODO: Support smaller loads

  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))

    return SDValue();


  if (LoadNode->getAlign() < Align(4))

    return SDValue();


  int ConstantBlock = ConstantAddressBlock(Block);


  SDValue Slots[4];

  for (unsigned i = 0; i < 4; i++) {

    // We want Const position encoded with the following formula :

    // (((512 + (kc_bank << 12) + const_index) << 2) + chan)

    // const_index is Ptr computed by llvm using an alignment of 16.

    // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and

    // then div by 4 at the ISel step

    SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,

        DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));

    Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);

  }

  EVT NewVT = MVT::v4i32;

  unsigned NumElements = 4;

  if (VT.isVector()) {

    NewVT = VT;

    NumElements = VT.getVectorNumElements();

  }

  SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));

  if (!VT.isVector()) {

    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,

                         DAG.getConstant(0, DL, MVT::i32));

  }

  SDValue MergedValues[2] = {

    Result,

    Chain

  };

  return DAG.getMergeValues(MergedValues, DL);

}


//===----------------------------------------------------------------------===//

// Custom DAG Optimizations

//===----------------------------------------------------------------------===//


SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,

                                              DAGCombinerInfo &DCI) const {

  SelectionDAG &DAG = DCI.DAG;

  SDLoc DL(N);


  switch (N->getOpcode()) {

  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)

  case ISD::FP_ROUND: {

      SDValue Arg = N->getOperand(0);

      if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {

        return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),

                           Arg.getOperand(0));

      }

      break;

    }


  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->

  // (i32 select_cc f32, f32, -1, 0 cc)

  //

  // Mesa's GLSL frontend generates the above pattern a lot and we can lower

  // this to one of the SET*_DX10 instructions.

  case ISD::FP_TO_SINT: {

    SDValue FNeg = N->getOperand(0);

    if (FNeg.getOpcode() != ISD::FNEG) {

      return SDValue();

    }

    SDValue SelectCC = FNeg.getOperand(0);

    if (SelectCC.getOpcode() != ISD::SELECT_CC ||

        SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS

        SelectCC.getOperand(2).getValueType() != MVT::f32 || // True

        !isHWTrueValue(SelectCC.getOperand(2)) ||

        !isHWFalseValue(SelectCC.getOperand(3))) {

      return SDValue();

    }


    return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),

                       SelectCC.getOperand(0),               // LHS

                       SelectCC.getOperand(1),               // RHS

                       DAG.getAllOnesConstant(DL, MVT::i32), // True

                       DAG.getConstant(0, DL, MVT::i32),     // False

                       SelectCC.getOperand(4));              // CC

  }


  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx

  // => build_vector elt0, ... , NewEltIdx, ... , eltN

  case ISD::INSERT_VECTOR_ELT: {

    SDValue InVec = N->getOperand(0);

    SDValue InVal = N->getOperand(1);

    SDValue EltNo = N->getOperand(2);


    // If the inserted element is an UNDEF, just use the input vector.

    if (InVal.isUndef())

      return InVec;


    EVT VT = InVec.getValueType();


    // If we can't generate a legal BUILD_VECTOR, exit

    if (!isOperationLegal(ISD::BUILD_VECTOR, VT))

      return SDValue();


    // Check that we know which element is being inserted

    if (!isa<ConstantSDNode>(EltNo))

      return SDValue();

    unsigned Elt = EltNo->getAsZExtVal();


    // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially

    // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the

    // vector elements.

    SmallVector<SDValue, 8> Ops;

    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {

      Ops.append(InVec.getNode()->op_begin(),

                 InVec.getNode()->op_end());

    } else if (InVec.isUndef()) {

      unsigned NElts = VT.getVectorNumElements();

      Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));

    } else {

      return SDValue();

    }


    // Insert the element

    if (Elt < Ops.size()) {

      // All the operands of BUILD_VECTOR must have the same type;

      // we enforce that here.

      EVT OpVT = Ops[0].getValueType();

      if (InVal.getValueType() != OpVT)

        InVal = OpVT.bitsGT(InVal.getValueType()) ?

          DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :

          DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);

      Ops[Elt] = InVal;

    }


    // Return the new vector

    return DAG.getBuildVector(VT, DL, Ops);

  }


  // Extract_vec (Build_vector) generated by custom lowering

  // also needs to be customly combined

  case ISD::EXTRACT_VECTOR_ELT: {

    SDValue Arg = N->getOperand(0);

    if (Arg.getOpcode() == ISD::BUILD_VECTOR) {

      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {

        unsigned Element = Const->getZExtValue();

        return Arg->getOperand(Element);

      }

    }

    if (Arg.getOpcode() == ISD::BITCAST &&

        Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&

        (Arg.getOperand(0).getValueType().getVectorNumElements() ==

         Arg.getValueType().getVectorNumElements())) {

      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {

        unsigned Element = Const->getZExtValue();

        return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),

                           Arg->getOperand(0).getOperand(Element));

      }

    }

    break;

  }


  case ISD::SELECT_CC: {

    // Try common optimizations

    if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))

      return Ret;


    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->

    //      selectcc x, y, a, b, inv(cc)

    //

    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->

    //      selectcc x, y, a, b, cc

    SDValue LHS = N->getOperand(0);

    if (LHS.getOpcode() != ISD::SELECT_CC) {

      return SDValue();

    }


    SDValue RHS = N->getOperand(1);

    SDValue True = N->getOperand(2);

    SDValue False = N->getOperand(3);

    ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();


    if (LHS.getOperand(2).getNode() != True.getNode() ||

        LHS.getOperand(3).getNode() != False.getNode() ||

        RHS.getNode() != False.getNode()) {

      return SDValue();

    }


    switch (NCC) {

    default: return SDValue();

    case ISD::SETNE: return LHS;

    case ISD::SETEQ: {

      ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();

      LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());

      if (DCI.isBeforeLegalizeOps() ||

          isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))

        return DAG.getSelectCC(DL,

                               LHS.getOperand(0),

                               LHS.getOperand(1),

                               LHS.getOperand(2),

                               LHS.getOperand(3),

                               LHSCC);

      break;

    }

    }

    return SDValue();

  }


  case AMDGPUISD::R600_EXPORT: {

    SDValue Arg = N->getOperand(1);

    if (Arg.getOpcode() != ISD::BUILD_VECTOR)

      break;


    SDValue NewArgs[8] = {

      N->getOperand(0), // Chain

      SDValue(),

      N->getOperand(2), // ArrayBase

      N->getOperand(3), // Type

      N->getOperand(4), // SWZ_X

      N->getOperand(5), // SWZ_Y

      N->getOperand(6), // SWZ_Z

      N->getOperand(7) // SWZ_W

    };

    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);

    return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);

  }

  case AMDGPUISD::TEXTURE_FETCH: {

    SDValue Arg = N->getOperand(1);

    if (Arg.getOpcode() != ISD::BUILD_VECTOR)

      break;


    SDValue NewArgs[19] = {

      N->getOperand(0),

      N->getOperand(1),

      N->getOperand(2),

      N->getOperand(3),

      N->getOperand(4),

      N->getOperand(5),

      N->getOperand(6),

      N->getOperand(7),

      N->getOperand(8),

      N->getOperand(9),

      N->getOperand(10),

      N->getOperand(11),

      N->getOperand(12),

      N->getOperand(13),

      N->getOperand(14),

      N->getOperand(15),

      N->getOperand(16),

      N->getOperand(17),

      N->getOperand(18),

    };

    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);

    return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);

  }


  case ISD::LOAD: {

    LoadSDNode *LoadNode = cast<LoadSDNode>(N);

    SDValue Ptr = LoadNode->getBasePtr();

    if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&

         isa<ConstantSDNode>(Ptr))

      return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);

    break;

  }


  default: break;

  }


  return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);

}


bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,

                                     SDValue &Src, SDValue &Neg, SDValue &Abs,

                                     SDValue &Sel, SDValue &Imm,

                                     SelectionDAG &DAG) const {

  const R600InstrInfo *TII = Subtarget->getInstrInfo();

  if (!Src.isMachineOpcode())

    return false;


  switch (Src.getMachineOpcode()) {

  case R600::FNEG_R600:

    if (!Neg.getNode())

      return false;

    Src = Src.getOperand(0);

    Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);

    return true;

  case R600::FABS_R600:

    if (!Abs.getNode())

      return false;

    Src = Src.getOperand(0);

    Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);

    return true;

  case R600::CONST_COPY: {

    unsigned Opcode = ParentNode->getMachineOpcode();

    bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;


    if (!Sel.getNode())

      return false;


    SDValue CstOffset = Src.getOperand(0);

    if (ParentNode->getValueType(0).isVector())

      return false;


    // Gather constants values

    int SrcIndices[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0),

      TII->getOperandIdx(Opcode, R600::OpName::src1),

      TII->getOperandIdx(Opcode, R600::OpName::src2),

      TII->getOperandIdx(Opcode, R600::OpName::src0_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_W)

    };

    std::vector<unsigned> Consts;

    for (int OtherSrcIdx : SrcIndices) {

      int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);

      if (OtherSrcIdx < 0 || OtherSelIdx < 0)

        continue;

      if (HasDst) {

        OtherSrcIdx--;

        OtherSelIdx--;

      }

      if (RegisterSDNode *Reg =

          dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {

        if (Reg->getReg() == R600::ALU_CONST) {

          Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));

        }

      }

    }


    ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);

    Consts.push_back(Cst->getZExtValue());

    if (!TII->fitsConstReadLimitations(Consts)) {

      return false;

    }


    Sel = CstOffset;

    Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);

    return true;

  }

  case R600::MOV_IMM_GLOBAL_ADDR:

    // Check if the Imm slot is used. Taken from below.

    if (Imm->getAsZExtVal())

      return false;

    Imm = Src.getOperand(0);

    Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);

    return true;

  case R600::MOV_IMM_I32:

  case R600::MOV_IMM_F32: {

    unsigned ImmReg = R600::ALU_LITERAL_X;

    uint64_t ImmValue = 0;


    if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {

      ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));

      float FloatValue = FPC->getValueAPF().convertToFloat();

      if (FloatValue == 0.0) {

        ImmReg = R600::ZERO;

      } else if (FloatValue == 0.5) {

        ImmReg = R600::HALF;

      } else if (FloatValue == 1.0) {

        ImmReg = R600::ONE;

      } else {

        ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();

      }

    } else {

      uint64_t Value = Src.getConstantOperandVal(0);

      if (Value == 0) {

        ImmReg = R600::ZERO;

      } else if (Value == 1) {

        ImmReg = R600::ONE_INT;

      } else {

        ImmValue = Value;

      }

    }


    // Check that we aren't already using an immediate.

    // XXX: It's possible for an instruction to have more than one

    // immediate operand, but this is not supported yet.

    if (ImmReg == R600::ALU_LITERAL_X) {

      if (!Imm.getNode())

        return false;

      ConstantSDNode *C = cast<ConstantSDNode>(Imm);

      if (C->getZExtValue())

        return false;

      Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);

    }

    Src = DAG.getRegister(ImmReg, MVT::i32);

    return true;

  }

  default:

    return false;

  }

}


/// Fold the instructions after selecting them

SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,

                                            SelectionDAG &DAG) const {

  const R600InstrInfo *TII = Subtarget->getInstrInfo();

  if (!Node->isMachineOpcode())

    return Node;


  unsigned Opcode = Node->getMachineOpcode();

  SDValue FakeOp;


  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());


  if (Opcode == R600::DOT_4) {

    int OperandIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_W)

        };

    int NegIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)

    };

    int AbsIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)

    };

    for (unsigned i = 0; i < 8; i++) {

      if (OperandIdx[i] < 0)

        return Node;

      SDValue &Src = Ops[OperandIdx[i] - 1];

      SDValue &Neg = Ops[NegIdx[i] - 1];

      SDValue &Abs = Ops[AbsIdx[i] - 1];

      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;

      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);

      if (HasDst)

        SelIdx--;

      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;

      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))

        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);

    }

  } else if (Opcode == R600::REG_SEQUENCE) {

    for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {

      SDValue &Src = Ops[i];

      if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))

        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);

    }

  } else {

    if (!TII->hasInstrModifiers(Opcode))

      return Node;

    int OperandIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0),

      TII->getOperandIdx(Opcode, R600::OpName::src1),

      TII->getOperandIdx(Opcode, R600::OpName::src2)

    };

    int NegIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg),

      TII->getOperandIdx(Opcode, R600::OpName::src2_neg)

    };

    int AbsIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs),

      -1

    };

    for (unsigned i = 0; i < 3; i++) {

      if (OperandIdx[i] < 0)

        return Node;

      SDValue &Src = Ops[OperandIdx[i] - 1];

      SDValue &Neg = Ops[NegIdx[i] - 1];

      SDValue FakeAbs;

      SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;

      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;

      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);

      int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);

      if (HasDst) {

        SelIdx--;

        ImmIdx--;

      }

      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;

      SDValue &Imm = Ops[ImmIdx];

      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))

        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);

    }

  }


  return Node;

}


TargetLowering::AtomicExpansionKind

R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {

  switch (RMW->getOperation()) {

  case AtomicRMWInst::Nand:

  case AtomicRMWInst::FAdd:

  case AtomicRMWInst::FSub:

  case AtomicRMWInst::FMax:

  case AtomicRMWInst::FMin:

    return AtomicExpansionKind::CmpXChg;

  case AtomicRMWInst::UIncWrap:

  case AtomicRMWInst::UDecWrap:

    // FIXME: Cayman at least appears to have instructions for this, but the

    // instruction defintions appear to be missing.

    return AtomicExpansionKind::CmpXChg;

  case AtomicRMWInst::Xchg: {

    const DataLayout &DL = RMW->getFunction()->getDataLayout();

    unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());

    if (ValSize == 32 || ValSize == 64)

      return AtomicExpansionKind::None;

    return AtomicExpansionKind::CmpXChg;

  }

  default:

    if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {

      unsigned Size = IntTy->getBitWidth();

      if (Size == 32 || Size == 64)

        return AtomicExpansionKind::None;

    }


    return AtomicExpansionKind::CmpXChg;

  }


  llvm_unreachable("covered atomicrmw op switch");

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:103

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU.h

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:722

CodeGenPassBuilder.h
Interfaces for producing common pass manager configurations.

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:347

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:110

I
#define I(x, y, z)
Definition: MD5.cpp:58

MachineFunction.h

isUndef
static bool isUndef(const MachineInstr &MI)
Definition: MachineSSAContext.cpp:57

R600Defines.h

MO_FLAG_NEG
#define MO_FLAG_NEG
Definition: R600Defines.h:15

MO_FLAG_ABS
#define MO_FLAG_ABS
Definition: R600Defines.h:16

MO_FLAG_MASK
#define MO_FLAG_MASK
Definition: R600Defines.h:17

MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18

isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition: R600ISelLowering.cpp:210

ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1608

ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition: R600ISelLowering.cpp:1227

CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1563

R600ISelLowering.h
R600 DAG Lowering interface definition.

R600MCTargetDesc.h
Provides R600 specific target descriptions.

R600MachineFunctionInfo.h

R600Subtarget.h
AMDGPU R600 specific subclass of TargetSubtarget.

R600TargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:71

getValueType
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
Definition: SLPVectorizer.cpp:254

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

LHS
Value * LHS
Definition: X86PartialReduction.cpp:73

Node
Definition: ItaniumDemangle.h:166

PointerType
Definition: ItaniumDemangle.h:639

llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition: AMDGPUFrameLowering.cpp:22

llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:24

llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:35

llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27

llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition: AMDGPUISelLowering.cpp:1721

llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition: AMDGPUISelLowering.cpp:1202

llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:2366

llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: AMDGPUISelLowering.cpp:1418

llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition: AMDGPUISelLowering.cpp:5652

llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:1506

llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition: AMDGPUISelLowering.h:356

llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition: AMDGPUISelLowering.h:381

llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: AMDGPUISelLowering.cpp:1466

llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AMDGPUISelLowering.cpp:5295

llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition: AMDGPUISelLowering.cpp:2097

llvm::APFloat::convertToFloat
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:6143

llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1353

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540

llvm::AddrSpaceCastSDNode
Definition: SelectionDAGNodes.h:1377

llvm::AddrSpaceCastSDNode::getSrcAddressSpace
unsigned getSrcAddressSpace() const
Definition: SelectionDAGNodes.h:1388

llvm::AddrSpaceCastSDNode::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: SelectionDAGNodes.h:1389

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709

llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:746

llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:749

llvm::AtomicRMWInst::UIncWrap
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769

llvm::AtomicRMWInst::FMin
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:757

llvm::AtomicRMWInst::FMax
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:753

llvm::AtomicRMWInst::UDecWrap
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773

llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:723

llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731

llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:819

llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:171

llvm::CCState::AnalyzeFormalArguments
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:85

llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:34

llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:129

llvm::CCValAssign::getLocMemOffset
int64_t getLocMemOffset() const
Definition: CallingConvLower.h:130

llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:133

llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1795

llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1808

llvm::ConstantPointerNull::get
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1833

llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1740

llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1757

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:33

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177

llvm::DenseMapBase::empty
bool empty() const
Definition: DenseMap.h:119

llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:87

llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:131

llvm::DenseMap
Definition: DenseMap.h:730

llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1983

llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1994

llvm::Function::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:363

llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1955

llvm::GlobalAddressSDNode::getAddressSpace
LLVM_ABI unsigned getAddressSpace() const
Definition: SelectionDAG.cpp:13499

llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1969

llvm::GlobalValue
Definition: GlobalValue.h:49

llvm::Instruction::getFunction
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68

llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2521

llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2533

llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2552

llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2548

llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:36

llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:526

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:122

llvm::MachineBasicBlock::findDebugLoc
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Definition: MachineBasicBlock.cpp:1523

llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:379

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:323

llvm::MachineFunction
Definition: MachineFunction.h:286

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:772

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:860

llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:782

llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:98

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:253

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:126

llvm::MachineInstrBundleIterator< MachineInstr >

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:72

llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:133

llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:145

llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:143

llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:147

llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:227

llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:216

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:582

llvm::MachineOperand::getTargetFlags
unsigned getTargetFlags() const
Definition: MachineOperand.h:226

llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition: MachineOperand.h:629

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:53

llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:3143

llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1488

llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1415

llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1445

llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1481

llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1483

llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1508

llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1477

llvm::PointerType::get
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.

llvm::R600FrameLowering
Definition: R600FrameLowering.h:16

llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition: R600FrameLowering.cpp:19

llvm::R600InstrInfo
Definition: R600InstrInfo.h:39

llvm::R600MachineFunctionInfo
Definition: R600MachineFunctionInfo.h:21

llvm::R600Subtarget
Definition: R600Subtarget.h:29

llvm::R600Subtarget::hasFFBH
bool hasFFBH() const
Definition: R600Subtarget.h:113

llvm::R600Subtarget::hasFMA
bool hasFMA() const
Definition: R600Subtarget.h:117

llvm::R600Subtarget::hasBFI
bool hasBFI() const
Definition: R600Subtarget.h:86

llvm::R600Subtarget::getFrameLowering
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:53

llvm::R600Subtarget::hasCARRY
bool hasCARRY() const
Definition: R600Subtarget.h:101

llvm::R600Subtarget::getRegisterInfo
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:61

llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:51

llvm::R600Subtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:90

llvm::R600Subtarget::hasBORROW
bool hasBORROW() const
Definition: R600Subtarget.h:97

llvm::R600Subtarget::hasFFBL
bool hasFFBL() const
Definition: R600Subtarget.h:109

llvm::R600Subtarget::hasBFE
bool hasBFE() const
Definition: R600Subtarget.h:82

llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: R600ISelLowering.cpp:1722

llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: R600ISelLowering.cpp:217

llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition: R600ISelLowering.cpp:1535

llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition: R600ISelLowering.cpp:30

llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: R600ISelLowering.cpp:1528

llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: R600ISelLowering.cpp:584

llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition: R600ISelLowering.cpp:1544

llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition: R600ISelLowering.cpp:1434

llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition: R600ISelLowering.cpp:1459

llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: R600ISelLowering.cpp:401

llvm::R600TargetMachine
Definition: R600TargetMachine.h:28

llvm::RegisterSDNode
Definition: SelectionDAGNodes.h:2349

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74

llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1225

llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:501

llvm::SDNode::getAsZExtVal
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Definition: SelectionDAGNodes.h:1783

llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:750

llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:1034

llvm::SDNode::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Definition: SelectionDAGNodes.h:1779

llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:1104

llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:1042

llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:1041

llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:147

llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1292

llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:161

llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:181

llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1260

llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1268

llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1272

llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1256

llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229

llvm::SelectionDAG::getExtLoad
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:9747

llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758

llvm::SelectionDAG::getMergeValues
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:9490

llvm::SelectionDAG::getVTList
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:11150

llvm::SelectionDAG::getAllOnesConstant
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.cpp:1795

llvm::SelectionDAG::getMachineNode
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:11591

llvm::SelectionDAG::getConstantFP
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1868

llvm::SelectionDAG::getRegister
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
Definition: SelectionDAG.cpp:2323

llvm::SelectionDAG::getLoad
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:9730

llvm::SelectionDAG::getMemIntrinsicNode
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:9501

llvm::SelectionDAG::getNOT
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1617

llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:1175

llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868

llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839

llvm::SelectionDAG::getZeroExtendInReg
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1563

llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498

llvm::SelectionDAG::getConstant
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1661

llvm::SelectionDAG::getTruncStore
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:9856

llvm::SelectionDAG::getStore
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:9780

llvm::SelectionDAG::getSignedConstant
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Definition: SelectionDAG.cpp:1789

llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1350

llvm::SelectionDAG::getIntPtrConstant
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1801

llvm::SelectionDAG::getValueType
LLVM_ABI SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:2033

llvm::SelectionDAG::getNode
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:10781

llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707

llvm::SelectionDAG::getVectorIdxConstant
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1819

llvm::SelectionDAG::ReplaceAllUsesOfValueWith
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:12256

llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493

llvm::SelectionDAG::getCondCode
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:2074

llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:511

llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:79

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574

llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:414

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197

llvm::StackOffset
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34

llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2561

llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2580

llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2579

llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2577

llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2566

llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2626

llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:207

llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:205

llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:204

llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:373

llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2592

llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2552

llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1351

llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2609

llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
Definition: TargetLowering.h:1657

llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:380

llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1474

llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2689

llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:240

llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:256

llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg

llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None

llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2750

llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2814

llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2643

llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2571

llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:10235

llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition: TargetLowering.cpp:10458

llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition: TargetLowering.cpp:8421

llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition: TargetLowering.cpp:10147

llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: SelectionDAGISel.cpp:314

llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:8369

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83

llvm::Value
LLVM Value Representation.
Definition: Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256

uint32_t

uint64_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:164

llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition: AMDGPUAddrSpace.h:68

llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPUAddrSpace.h:78

llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition: AMDGPUAddrSpace.h:74

llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition: AMDGPUAddrSpace.h:69

llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition: AMDGPUAddrSpace.h:75

llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition: AMDGPUAddrSpace.h:65

llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition: AMDGPUAddrSpace.h:64

llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPUAddrSpace.h:63

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition: AMDGPUAddrSpace.h:71

llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition: AMDGPUAddrSpace.h:67

llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition: AMDGPUAddrSpace.h:66

llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition: AMDGPUAddrSpace.h:73

llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPUAddrSpace.h:55

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPUAddrSpace.h:31

llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition: AMDGPUAddrSpace.h:77

llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition: AMDGPUAddrSpace.h:72

llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition: AMDGPUAddrSpace.h:70

llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition: AMDGPUAddrSpace.h:76

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPUAddrSpace.h:36

llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition: AMDGPUISelLowering.h:528

llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:560

llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition: AMDGPUISelLowering.h:530

llvm::AMDGPUISD::CARRY
@ CARRY
Definition: AMDGPUISelLowering.h:510

llvm::AMDGPUISD::BRANCH_COND
@ BRANCH_COND
Definition: AMDGPUISelLowering.h:414

llvm::AMDGPUISD::RSQ_CLAMP
@ RSQ_CLAMP
Definition: AMDGPUISelLowering.h:507

llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:468

llvm::AMDGPUISD::DOT4
@ DOT4
Definition: AMDGPUISelLowering.h:509

llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:449

llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:496

llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:450

llvm::AMDGPUISD::CONST_DATA_PTR
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
Definition: AMDGPUISelLowering.h:562

llvm::AMDGPUISD::STORE_MSKOR
@ STORE_MSKOR
Definition: AMDGPUISelLowering.h:577

llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition: AMDGPUISelLowering.h:567

llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:469

llvm::AMDGPUISD::BORROW
@ BORROW
Definition: AMDGPUISelLowering.h:511

llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition: AMDGPUISelLowering.h:529

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:396

llvm::AMDGPU::isShader
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1425

llvm::AMDGPU::Imm
@ Imm
Definition: AMDGPURegBankLegalizeRules.h:129

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126

llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197

llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206

llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191

llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194

llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144

llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41

llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218

llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::IRSimilarity::Legal
@ Legal
Definition: IRSimilarityIdentifier.h:77

llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:3298

llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801

llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:256

llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:774

llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:1142

llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:863

llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351

llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:344

llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:1034

llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:289

llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:515

llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259

llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141

llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835

llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:411

llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511

llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:290

llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215

llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:88

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410

llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:276

llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275

llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:758

llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975

llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:1039

llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:90

llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:348

llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826

llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:773

llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002

llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187

llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:909

llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:731

llvm::ISD::IS_FPCLASS
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535

llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:816

llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778

llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347

llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756

llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:1007

llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563

llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:768

llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793

llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:412

llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:817

llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:260

llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870

llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:1038

llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908

llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730

llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200

llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:300

llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:299

llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552

llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:1006

llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:1033

llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941

llvm::ISD::ADDRSPACECAST
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:979

llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838

llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180

llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815

llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521

llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543

llvm::ISD::getSetCCInverse
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:628

llvm::ISD::getSetCCSwappedOperands
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:605

llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1634

llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685

llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1701

llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1696

llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1692

llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1691

llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1710

llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1697

llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1699

llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1695

llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1693

llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1708

llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1694

llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1698

llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1709

llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1700

llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1705

llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665

llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1665

llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1665

llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1665

llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1665

llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:117

llvm::NVPTXAS::AddressSpace
AddressSpace
Definition: NVPTXAddrSpace.h:21

llvm::R600::getLDSNoRetOp
int getLDSNoRetOp(uint16_t Opcode)

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:51

llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33

llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32

llvm::Sched::Source
@ Source
Definition: TargetLowering.h:105

llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:66

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:621

llvm::cfg::UpdateKind::Insert
@ Insert

llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr

llvm::dwarf::Index
Index
Definition: Dwarf.h:889

llvm::logicalview::LVAttributeKind::Zero
@ Zero

llvm::ms_demangle::QualifierMangleMode::Result
@ Result

llvm::numbers::pif
constexpr float pif
Definition: MathExtras.h:68

llvm::numbers::e
constexpr double e
Definition: MathExtras.h:47

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338

llvm::Offset
@ Offset
Definition: DWP.cpp:477

llvm::PseudoProbeType::Block
@ Block

llvm::Read
@ Read
Definition: CodeGenData.h:108

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:369

llvm::isNullConstant
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:12647

llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:157

llvm::PackElem::Hi
@ Hi

llvm::PackElem::Lo
@ Lo

llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpressionPrinter.cpp:22

llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212

llvm::VFParamKind::Vector
@ Vector

llvm::isAllOnesConstant
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:12661

llvm::reportFatalUsageError
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858

N
#define N

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35

llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94

llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390

llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137

llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279

llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295

llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368

llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311

llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168

llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318

llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287

llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323

llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331

llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303

llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:204

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:42

llvm::MinMax
Definition: AssumeBundleQueries.h:72

llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:4398

llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:4410

llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:4404