LLVM: lib/Target/AMDGPU/GCNDPPCombine.cpp Source File

//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0

// operand. If any of the use instruction cannot be combined with the mov the

// whole sequence is reverted.

//

// $old = ...

// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,

//                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl

// $res = VALU $dpp_value [, src1]

//

// to

//

// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]

//                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl

//

// Combining rules :

//

// if $row_mask and $bank_mask are fully enabled (0xF) and

//    $bound_ctrl==DPP_BOUND_ZERO or $old==0

// -> $combined_old = undef,

//    $combined_bound_ctrl = DPP_BOUND_ZERO

//

// if the VALU op is binary and

//    $bound_ctrl==DPP_BOUND_OFF and

//    $old==identity value (immediate) for the VALU op

// -> $combined_old = src1,

//    $combined_bound_ctrl = DPP_BOUND_OFF

//

// Otherwise cancel.

//

// The mov_dpp instruction should reside in the same BB as all its uses

//===----------------------------------------------------------------------===//


#include "GCNDPPCombine.h"

#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/CodeGen/MachineFunctionPass.h"


using namespace llvm;


#define DEBUG_TYPE "gcn-dpp-combine"


STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");


namespace {


class GCNDPPCombine {

  MachineRegisterInfo *MRI;

  const SIInstrInfo *TII;

  const GCNSubtarget *ST;


  using RegSubRegPair = TargetInstrInfo::RegSubRegPair;


  MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;


  MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,

                              RegSubRegPair CombOldVGPR,

                              MachineOperand *OldOpnd, bool CombBCZ,

                              bool IsShrinkable) const;


  MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,

                              RegSubRegPair CombOldVGPR, bool CombBCZ,

                              bool IsShrinkable) const;


  bool hasNoImmOrEqual(MachineInstr &MI, AMDGPU::OpName OpndName, int64_t Value,

                       int64_t Mask = -1) const;


  bool combineDPPMov(MachineInstr &MI) const;


  int getDPPOp(unsigned Op, bool IsShrinkable) const;

  bool isShrinkable(MachineInstr &MI) const;


public:

  bool run(MachineFunction &MF);

};


class GCNDPPCombineLegacy : public MachineFunctionPass {

public:

  static char ID;


  GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override;


  StringRef getPassName() const override { return "GCN DPP Combine"; }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    MachineFunctionPass::getAnalysisUsage(AU);

  }


  MachineFunctionProperties getRequiredProperties() const override {

    return MachineFunctionProperties().setIsSSA();

  }

};


} // end anonymous namespace


INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,

                false)


char GCNDPPCombineLegacy::ID = 0;


char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;


FunctionPass *llvm::createGCNDPPCombinePass() {

  return new GCNDPPCombineLegacy();

}


bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {

  unsigned Op = MI.getOpcode();

  if (!TII->isVOP3(Op)) {

    return false;

  }

  if (!TII->hasVALU32BitEncoding(Op)) {

    LLVM_DEBUG(dbgs() << "  Inst hasn't e32 equivalent\n");

    return false;

  }

  // Do not shrink True16 instructions pre-RA to avoid the restriction in

  // register allocation from only being able to use 128 VGPRs

  if (AMDGPU::isTrue16Inst(Op))

    return false;

  if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {

    // Give up if there are any uses of the sdst in carry-out or VOPC.

    // The shrunken form of the instruction would write it to vcc instead of to

    // a virtual register. If we rewrote the uses the shrinking would be

    // possible.

    if (!MRI->use_nodbg_empty(SDst->getReg()))

      return false;

  }

  // check if other than abs|neg modifiers are set (opsel for example)

  const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);

  if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||

      !hasNoImmOrEqual(MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||

      !hasNoImmOrEqual(MI, AMDGPU::OpName::clamp, 0) ||

      !hasNoImmOrEqual(MI, AMDGPU::OpName::omod, 0) ||

      !hasNoImmOrEqual(MI, AMDGPU::OpName::byte_sel, 0)) {

    LLVM_DEBUG(dbgs() << "  Inst has non-default modifiers\n");

    return false;

  }

  return true;

}


int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {

  int DPP32 = AMDGPU::getDPPOp32(Op);

  if (IsShrinkable) {

    assert(DPP32 == -1);

    int E32 = AMDGPU::getVOPe32(Op);

    DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);

  }

  if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1)

    return DPP32;

  int DPP64 = -1;

  if (ST->hasVOP3DPP())

    DPP64 = AMDGPU::getDPPOp64(Op);

  if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1)

    return DPP64;

  return -1;

}


// tracks the register operand definition and returns:

//   1. immediate operand used to initialize the register if found

//   2. nullptr if the register operand is undef

//   3. the operand itself otherwise

MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {

  auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);

  if (!Def)

    return nullptr;


  switch(Def->getOpcode()) {

  default: break;

  case AMDGPU::IMPLICIT_DEF:

    return nullptr;

  case AMDGPU::COPY:

  case AMDGPU::V_MOV_B32_e32:

  case AMDGPU::V_MOV_B64_PSEUDO:

  case AMDGPU::V_MOV_B64_e32:

  case AMDGPU::V_MOV_B64_e64: {

    auto &Op1 = Def->getOperand(1);

    if (Op1.isImm())

      return &Op1;

    break;

  }

  }

  return &OldOpnd;

}


[[maybe_unused]] static unsigned getOperandSize(MachineInstr &MI, unsigned Idx,

                               MachineRegisterInfo &MRI) {

  int16_t RegClass = MI.getDesc().operands()[Idx].RegClass;

  if (RegClass == -1)

    return 0;


  const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();

  return TRI->getRegSizeInBits(*TRI->getRegClass(RegClass));

}


MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,

                                           MachineInstr &MovMI,

                                           RegSubRegPair CombOldVGPR,

                                           bool CombBCZ,

                                           bool IsShrinkable) const {

  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||

         MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||

         MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);


  bool HasVOP3DPP = ST->hasVOP3DPP();

  auto OrigOp = OrigMI.getOpcode();

  if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(OrigOp)) {

    LLVM_DEBUG(

        dbgs() << "  failed: Did not expect any 16-bit uses of dpp values\n");

    return nullptr;

  }

  auto DPPOp = getDPPOp(OrigOp, IsShrinkable);

  if (DPPOp == -1) {

    LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");

    return nullptr;

  }

  int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);

  // Prior checks cover Mask with VOPC condition, but not on purpose

  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);

  assert(RowMaskOpnd && RowMaskOpnd->isImm());

  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);

  assert(BankMaskOpnd && BankMaskOpnd->isImm());

  const bool MaskAllLanes =

      RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;

  (void)MaskAllLanes;

  assert((MaskAllLanes ||

          !(TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&

                                   TII->isVOPC(OrigOpE32)))) &&

         "VOPC cannot form DPP unless mask is full");


  auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,

                         OrigMI.getDebugLoc(), TII->get(DPPOp))

    .setMIFlags(OrigMI.getFlags());


  bool Fail = false;

  do {

    int NumOperands = 0;

    if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {

      DPPInst.add(*Dst);

      ++NumOperands;

    }

    if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {

      if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) {

        DPPInst.add(*SDst);

        ++NumOperands;

      }

      // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst

    }


    const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);

    if (OldIdx != -1) {

      assert(OldIdx == NumOperands);

      assert(isOfRegClass(

          CombOldVGPR,

          *MRI->getRegClass(

              TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),

          *MRI));

      auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);

      DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,

                     CombOldVGPR.SubReg);

      ++NumOperands;

    } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&

                                      TII->isVOPC(OrigOpE32))) {

      // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand

      // because they write to SGPRs not VGPRs

    } else {

      // TODO: this discards MAC/FMA instructions for now, let's add it later

      LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"

                           " TBD\n");

      Fail = true;

      break;

    }


    auto *Mod0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers);

    if (Mod0) {

      assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,

                                          AMDGPU::OpName::src0_modifiers));

      assert(HasVOP3DPP ||

             (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));

      DPPInst.addImm(Mod0->getImm());

      ++NumOperands;

    } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) {

      DPPInst.addImm(0);

      ++NumOperands;

    }

    auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);

    assert(Src0);

    [[maybe_unused]] int Src0Idx = NumOperands;


    DPPInst.add(*Src0);

    DPPInst->getOperand(NumOperands).setIsKill(false);

    ++NumOperands;


    auto *Mod1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1_modifiers);

    if (Mod1) {

      assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,

                                          AMDGPU::OpName::src1_modifiers));

      assert(HasVOP3DPP ||

             (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));

      DPPInst.addImm(Mod1->getImm());

      ++NumOperands;

    } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1_modifiers)) {

      DPPInst.addImm(0);

      ++NumOperands;

    }

    auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);

    if (Src1) {

      assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1) &&

             "dpp version of instruction missing src1");

      // If subtarget does not support SGPRs for src1 operand then the

      // requirements are the same as for src0. We check src0 instead because

      // pseudos are shared between subtargets and allow SGPR for src1 on all.

      if (!ST->hasDPPSrc1SGPR()) {

        assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==

                   getOperandSize(*DPPInst, NumOperands, *MRI) &&

               "Src0 and Src1 operands should have the same size");

      }


      DPPInst.add(*Src1);

      ++NumOperands;

    }


    auto *Mod2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers);

    if (Mod2) {

      assert(NumOperands ==

             AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));

      assert(HasVOP3DPP ||

             (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));

      DPPInst.addImm(Mod2->getImm());

      ++NumOperands;

    }

    auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);

    if (Src2) {

      if (!AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src2)) {

        LLVM_DEBUG(dbgs() << "  failed: dpp does not have src2\n");

        Fail = true;

        break;

      }

      DPPInst.add(*Src2);

      ++NumOperands;

    }


    if (HasVOP3DPP) {

      auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);

      if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) {

        DPPInst.addImm(ClampOpr->getImm());

      }

      auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);

      if (VdstInOpr &&

          AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::vdst_in)) {

        DPPInst.add(*VdstInOpr);

      }

      auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);

      if (OmodOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::omod)) {

        DPPInst.addImm(OmodOpr->getImm());

      }

      // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to

      // all 1.

      if (TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {

        int64_t OpSel = 0;

        OpSel |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_0) << 0) : 0);

        OpSel |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_0) << 1) : 0);

        OpSel |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_0) << 2) : 0);

        if (Mod0 && TII->isVOP3(OrigMI) && !TII->isVOP3P(OrigMI))

          OpSel |= !!(Mod0->getImm() & SISrcMods::DST_OP_SEL) << 3;


        if (OpSel != 0) {

          LLVM_DEBUG(dbgs() << "  failed: op_sel must be zero\n");

          Fail = true;

          break;

        }

        if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel))

          DPPInst.addImm(OpSel);

      }

      if (TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {

        int64_t OpSelHi = 0;

        OpSelHi |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_1) << 0) : 0);

        OpSelHi |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_1) << 1) : 0);

        OpSelHi |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_1) << 2) : 0);


        // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check

        // the bitmask for 3 op_sel_hi bits set

        assert(Src2 && "Expected vop3p with 3 operands");

        if (OpSelHi != 7) {

          LLVM_DEBUG(dbgs() << "  failed: op_sel_hi must be all set to one\n");

          Fail = true;

          break;

        }

        if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel_hi))

          DPPInst.addImm(OpSelHi);

      }

      auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);

      if (NegOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_lo)) {

        DPPInst.addImm(NegOpr->getImm());

      }

      auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);

      if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) {

        DPPInst.addImm(NegHiOpr->getImm());

      }

      auto *ByteSelOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::byte_sel);

      if (ByteSelOpr &&

          AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::byte_sel)) {

        DPPInst.addImm(ByteSelOpr->getImm());

      }

      if (MachineOperand *BitOp3 =

              TII->getNamedOperand(OrigMI, AMDGPU::OpName::bitop3)) {

        assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::bitop3));

        DPPInst.add(*BitOp3);

      }

    }

    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));

    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));

    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));

    DPPInst.addImm(CombBCZ ? 1 : 0);


    constexpr AMDGPU::OpName Srcs[] = {

        AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};


    // FIXME: isOperandLegal expects to operate on an completely built

    // instruction. We should have better legality APIs to check if the

    // candidate operands will be legal without building the instruction first.

    for (auto [I, OpName] : enumerate(Srcs)) {

      int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, OpName);

      if (OpIdx == -1)

        break;


      if (!TII->isOperandLegal(*DPPInst, OpIdx)) {

        LLVM_DEBUG(dbgs() << "  failed: src" << I << " operand is illegal\n");

        Fail = true;

        break;

      }

    }

  } while (false);


  if (Fail) {

    DPPInst.getInstr()->eraseFromParent();

    return nullptr;

  }

  LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());

  return DPPInst.getInstr();

}


static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {

  assert(OldOpnd->isImm());

  switch (OrigMIOp) {

  default: break;

  case AMDGPU::V_ADD_U32_e32:

  case AMDGPU::V_ADD_U32_e64:

  case AMDGPU::V_ADD_CO_U32_e32:

  case AMDGPU::V_ADD_CO_U32_e64:

  case AMDGPU::V_OR_B32_e32:

  case AMDGPU::V_OR_B32_e64:

  case AMDGPU::V_SUBREV_U32_e32:

  case AMDGPU::V_SUBREV_U32_e64:

  case AMDGPU::V_SUBREV_CO_U32_e32:

  case AMDGPU::V_SUBREV_CO_U32_e64:

  case AMDGPU::V_MAX_U32_e32:

  case AMDGPU::V_MAX_U32_e64:

  case AMDGPU::V_XOR_B32_e32:

  case AMDGPU::V_XOR_B32_e64:

    if (OldOpnd->getImm() == 0)

      return true;

    break;

  case AMDGPU::V_AND_B32_e32:

  case AMDGPU::V_AND_B32_e64:

  case AMDGPU::V_MIN_U32_e32:

  case AMDGPU::V_MIN_U32_e64:

    if (static_cast<uint32_t>(OldOpnd->getImm()) ==

        std::numeric_limits<uint32_t>::max())

      return true;

    break;

  case AMDGPU::V_MIN_I32_e32:

  case AMDGPU::V_MIN_I32_e64:

    if (static_cast<int32_t>(OldOpnd->getImm()) ==

        std::numeric_limits<int32_t>::max())

      return true;

    break;

  case AMDGPU::V_MAX_I32_e32:

  case AMDGPU::V_MAX_I32_e64:

    if (static_cast<int32_t>(OldOpnd->getImm()) ==

        std::numeric_limits<int32_t>::min())

      return true;

    break;

  case AMDGPU::V_MUL_I32_I24_e32:

  case AMDGPU::V_MUL_I32_I24_e64:

  case AMDGPU::V_MUL_U32_U24_e32:

  case AMDGPU::V_MUL_U32_U24_e64:

    if (OldOpnd->getImm() == 1)

      return true;

    break;

  }

  return false;

}


MachineInstr *GCNDPPCombine::createDPPInst(

    MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR,

    MachineOperand *OldOpndValue, bool CombBCZ, bool IsShrinkable) const {

  assert(CombOldVGPR.Reg);

  if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {

    auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);

    if (!Src1 || !Src1->isReg()) {

      LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");

      return nullptr;

    }

    if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {

      LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");

      return nullptr;

    }

    CombOldVGPR = getRegSubRegPair(*Src1);

    auto *MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);

    const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg());

    if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) {

      LLVM_DEBUG(dbgs() << "  failed: src1 has wrong register class\n");

      return nullptr;

    }

  }

  return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);

}


// returns true if MI doesn't have OpndName immediate operand or the

// operand has Value

bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, AMDGPU::OpName OpndName,

                                    int64_t Value, int64_t Mask) const {

  auto *Imm = TII->getNamedOperand(MI, OpndName);

  if (!Imm)

    return true;


  assert(Imm->isImm());

  return (Imm->getImm() & Mask) == Value;

}


bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {

  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||

         MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||

         MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);

  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);


  auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);

  assert(DstOpnd && DstOpnd->isReg());

  auto DPPMovReg = DstOpnd->getReg();

  if (DPPMovReg.isPhysical()) {

    LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");

    return false;

  }

  if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {

    LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"

                         " for all uses\n");

    return false;

  }


  auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);

  assert(DppCtrl && DppCtrl->isImm());

  unsigned DppCtrlVal = DppCtrl->getImm();

  if ((MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||

       MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp)) {

    if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP)) {

      LLVM_DEBUG(dbgs() << "  failed: 64 bit dpp move is unsupported\n");

      // Split it.

      return false;

    }

    if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal)) {

      LLVM_DEBUG(dbgs() << "  failed: 64 bit dpp move uses unsupported"

                           " control value\n");

      // Let it split, then control may become legal.

      return false;

    }

  }


  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);

  assert(RowMaskOpnd && RowMaskOpnd->isImm());

  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);

  assert(BankMaskOpnd && BankMaskOpnd->isImm());

  const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&

                            BankMaskOpnd->getImm() == 0xF;


  auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);

  assert(BCZOpnd && BCZOpnd->isImm());

  bool BoundCtrlZero = BCZOpnd->getImm();


  auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);

  auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);

  assert(OldOpnd && OldOpnd->isReg());

  assert(SrcOpnd && SrcOpnd->isReg());

  if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {

    LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");

    return false;

  }


  auto * const OldOpndValue = getOldOpndValue(*OldOpnd);

  // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else

  // We could use: assert(!OldOpndValue || OldOpndValue->isImm())

  // but the third option is used to distinguish undef from non-immediate

  // to reuse IMPLICIT_DEF instruction later

  assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);


  bool CombBCZ = false;


  if (MaskAllLanes && BoundCtrlZero) { // [1]

    CombBCZ = true;

  } else {

    if (!OldOpndValue || !OldOpndValue->isImm()) {

      LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");

      return false;

    }


    if (OldOpndValue->getImm() == 0) {

      if (MaskAllLanes) {

        assert(!BoundCtrlZero); // by check [1]

        CombBCZ = true;

      }

    } else if (BoundCtrlZero) {

      assert(!MaskAllLanes); // by check [1]

      LLVM_DEBUG(dbgs() <<

        "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");

      return false;

    }

  }


  LLVM_DEBUG(dbgs() << "  old=";

    if (!OldOpndValue)

      dbgs() << "undef";

    else

      dbgs() << *OldOpndValue;

    dbgs() << ", bound_ctrl=" << CombBCZ << '\n');


  SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;

  DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;

  auto CombOldVGPR = getRegSubRegPair(*OldOpnd);

  // try to reuse previous old reg if its undefined (IMPLICIT_DEF)

  if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef

    const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg);

    CombOldVGPR = RegSubRegPair(

      MRI->createVirtualRegister(RC));

    auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),

                             TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);

    DPPMIs.push_back(UndefInst.getInstr());

  }


  OrigMIs.push_back(&MovMI);

  bool Rollback = true;

  SmallVector<MachineOperand *, 16> Uses(

      llvm::make_pointer_range(MRI->use_nodbg_operands(DPPMovReg)));


  while (!Uses.empty()) {

    MachineOperand *Use = Uses.pop_back_val();

    Rollback = true;


    auto &OrigMI = *Use->getParent();

    LLVM_DEBUG(dbgs() << "  try: " << OrigMI);


    auto OrigOp = OrigMI.getOpcode();

    assert((TII->get(OrigOp).getSize() != 4 || !AMDGPU::isTrue16Inst(OrigOp)) &&

           "There should not be e32 True16 instructions pre-RA");

    if (OrigOp == AMDGPU::REG_SEQUENCE) {

      Register FwdReg = OrigMI.getOperand(0).getReg();

      unsigned FwdSubReg = 0;


      if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {

        LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"

                             " for all uses\n");

        break;

      }


      unsigned OpNo, E = OrigMI.getNumOperands();

      for (OpNo = 1; OpNo < E; OpNo += 2) {

        if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {

          FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();

          break;

        }

      }


      if (!FwdSubReg)

        break;


      for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {

        if (Op.getSubReg() == FwdSubReg)

          Uses.push_back(&Op);

      }

      RegSeqWithOpNos[&OrigMI].push_back(OpNo);

      continue;

    }


    bool IsShrinkable = isShrinkable(OrigMI);

    if (!(IsShrinkable ||

          ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) ||

            TII->isVOP3(OrigOp)) &&

           ST->hasVOP3DPP()) ||

          TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {

      LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3/3P/C\n");

      break;

    }

    if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) {

      LLVM_DEBUG(dbgs() << "  failed: can't combine v_cmpx\n");

      break;

    }


    auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);

    auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);

    if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]

      LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");

      break;

    }


    auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);

    assert(Src0 && "Src1 without Src0?");

    if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||

                         (Src2 && Src2->isIdenticalTo(*Src0)))) ||

        (Use == Src1 && (Src1->isIdenticalTo(*Src0) ||

                         (Src2 && Src2->isIdenticalTo(*Src1))))) {

      LLVM_DEBUG(

          dbgs()

          << "  " << OrigMI

          << "  failed: DPP register is used more than once per instruction\n");

      break;

    }


    if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP) &&

        AMDGPU::isDPALU_DPP32BitOpc(OrigOp)) {

      LLVM_DEBUG(dbgs() << "  " << OrigMI

                        << "  failed: DPP ALU DPP is not supported\n");

      break;

    }


    if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal) &&

        AMDGPU::isDPALU_DPP(TII->get(OrigOp), *ST)) {

      LLVM_DEBUG(dbgs() << "  " << OrigMI

                        << "  failed: not valid 64-bit DPP control value\n");

      break;

    }


    LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);

    if (Use == Src0) {

      if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,

                                        OldOpndValue, CombBCZ, IsShrinkable)) {

        DPPMIs.push_back(DPPInst);

        Rollback = false;

      }

    } else {

      assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]

      auto *BB = OrigMI.getParent();

      auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);

      BB->insert(OrigMI, NewMI);

      if (TII->commuteInstruction(*NewMI)) {

        LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);

        if (auto *DPPInst =

                createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,

                              IsShrinkable)) {

          DPPMIs.push_back(DPPInst);

          Rollback = false;

        }

      } else

        LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");

      NewMI->eraseFromParent();

    }

    if (Rollback)

      break;

    OrigMIs.push_back(&OrigMI);

  }


  Rollback |= !Uses.empty();


  for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))

    MI->eraseFromParent();


  if (!Rollback) {

    for (auto &S : RegSeqWithOpNos) {

      if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {

        S.first->eraseFromParent();

        continue;

      }

      while (!S.second.empty())

        S.first->getOperand(S.second.pop_back_val()).setIsUndef();

    }

  }


  return !Rollback;

}


bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {

  if (skipFunction(MF.getFunction()))

    return false;


  return GCNDPPCombine().run(MF);

}


bool GCNDPPCombine::run(MachineFunction &MF) {

  ST = &MF.getSubtarget<GCNSubtarget>();

  if (!ST->hasDPP())

    return false;


  MRI = &MF.getRegInfo();

  TII = ST->getInstrInfo();


  bool Changed = false;

  for (auto &MBB : MF) {

    for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {

      if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {

        Changed = true;

        ++NumDPPMovsCombined;

      } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||

                 MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {

        if (ST->hasDPALU_DPP() && combineDPPMov(MI)) {

          Changed = true;

          ++NumDPPMovsCombined;

        } else {

          auto Split = TII->expandMovDPP64(MI);

          for (auto *M : {Split.first, Split.second}) {

            if (M && combineDPPMov(*M))

              ++NumDPPMovsCombined;

          }

          Changed = true;

        }

      }

    }

  }

  return Changed;

}


PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,

                                         MachineFunctionAnalysisManager &) {

  MFPropsModifier _(*this, MF);


  if (MF.getFunction().hasOptNone())

    return PreservedAnalyses::all();


  bool Changed = GCNDPPCombine().run(MF);

  if (!Changed)

    return PreservedAnalyses::all();


  auto PA = getMachineFunctionPassPreservedAnalyses();

  PA.preserveSet<CFGAnalyses>();

  return PA;

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:103

Fail
#define Fail
Definition: AArch64Disassembler.cpp:43

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:347

isIdentityValue
static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd)
Definition: GCNDPPCombine.cpp:453

getOperandSize
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
Definition: GCNDPPCombine.cpp:196

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GCNDPPCombine.cpp:49

GCNDPPCombine.h

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:118

_
#define _
Definition: HexagonMCCodeEmitter.cpp:47

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:110

I
#define I(x, y, z)
Definition: MD5.cpp:58

MachineFunctionPass.h

RegSubRegPair
TargetInstrInfo::RegSubRegPair RegSubRegPair
Definition: MachineSink.cpp:122

TRI
Register const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:2118

OpIdx
MachineInstr unsigned OpIdx
Definition: NVPTXPrologEpilogPass.cpp:56

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56

Uses
Remove Loads Into Fake Uses
Definition: RemoveLoadsIntoFakeUses.cpp:81

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:119

char

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:48

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:33

llvm::DenseMap
Definition: DenseMap.h:700

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314

llvm::Function::hasOptNone
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:700

llvm::GCNDPPCombinePass::run
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MAM)
Definition: GCNDPPCombine.cpp:829

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::HexagonInstrInfo::getSize
unsigned getSize(const MachineInstr &MI) const
Definition: HexagonInstrInfo.cpp:4584

llvm::MFPropsModifier
An RAII based helper class to modify MachineFunctionProperties when running pass.
Definition: MachinePassManager.h:40

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:323

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:184

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunctionPass::getRequiredProperties
virtual MachineFunctionProperties getRequiredProperties() const
Definition: MachineFunctionPass.h:57

llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:137

llvm::MachineFunction
Definition: MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:762

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:733

llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:439

llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:301

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:359

llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590

llvm::MachineInstr::modifiesRegister
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
Definition: MachineInstr.h:1523

llvm::MachineInstr::isCommutable
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MachineInstr.h:1188

llvm::MachineInstr::insert
LLVM_ABI void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
Definition: MachineInstr.cpp:2691

llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595

llvm::MachineInstr::getFlags
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:404

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:556

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:328

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:330

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:368

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:53

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78

llvm::SIInstrInfo
Definition: SIInstrInfo.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35

llvm::Value
LLVM Value Representation.
Definition: Value.h:75

uint32_t

unsigned

OpName
Definition: R600Defines.h:62

llvm::AMDGPU::DPP::DppCtrl
DppCtrl
Definition: SIDefines.h:948

llvm::AMDGPU::VOP3PEncoding::OpSel
OpSel
Definition: SIDefines.h:1056

llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)

llvm::AMDGPU::getDPPOp32
LLVM_READONLY int getDPPOp32(uint16_t Opcode)

llvm::AMDGPU::Imm
@ Imm
Definition: AMDGPURegBankLegalizeRules.h:129

llvm::AMDGPU::isLegalDPALU_DPPControl
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
Definition: AMDGPUBaseInfo.h:1765

llvm::AMDGPU::hasNamedOperand
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
Definition: AMDGPUBaseInfo.h:414

llvm::AMDGPU::isDPALU_DPP32BitOpc
bool isDPALU_DPP32BitOpc(unsigned Opc)
Definition: AMDGPUBaseInfo.cpp:3353

llvm::AMDGPU::isTrue16Inst
bool isTrue16Inst(unsigned Opc)
Definition: AMDGPUBaseInfo.cpp:766

llvm::AMDGPU::isDPALU_DPP
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
Definition: AMDGPUBaseInfo.cpp:3373

llvm::AMDGPU::getDPPOp64
LLVM_READONLY int getDPPOp64(uint16_t Opcode)

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:55

llvm::SISrcMods::ABS
@ ABS
Definition: SIDefines.h:274

llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:277

llvm::SISrcMods::DST_OP_SEL
@ DST_OP_SEL
Definition: SIDefines.h:279

llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:278

llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:273

llvm::logicalview::LVOutputKind::Split
@ Split

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::getRegSubRegPair
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:1584

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:369

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663

llvm::getMachineFunctionPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
Definition: MachinePassManager.cpp:162

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428

llvm::GCNDPPCombineLegacyID
char & GCNDPPCombineLegacyID

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207

llvm::getVRegSubRegDef
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
Definition: SIInstrInfo.cpp:10040

llvm::make_pointer_range
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363

llvm::isOfRegClass
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
Definition: SIInstrInfo.h:1572

llvm::createGCNDPPCombinePass
FunctionPass * createGCNDPPCombinePass()
Definition: GCNDPPCombine.cpp:114

llvm::execMayBeModifiedBeforeAnyUse
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
Definition: SIInstrInfo.cpp:10108

llvm::TargetInstrInfo::RegSubRegPair
A pair composed of a register and a sub-register index.
Definition: TargetInstrInfo.h:526

llvm::TargetInstrInfo::RegSubRegPair::SubReg
unsigned SubReg
Definition: TargetInstrInfo.h:528

llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition: TargetInstrInfo.h:527