LLVM: lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp Source File

//===- AMDGPUWaitSGPRHazards.cpp - Insert waits for SGPR read hazards -----===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Insert s_wait_alu instructions to mitigate SGPR read hazards on GFX12.

//

//===----------------------------------------------------------------------===//


#include "AMDGPUWaitSGPRHazards.h"

#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIInstrInfo.h"

#include "llvm/ADT/SetVector.h"


using namespace llvm;


#define DEBUG_TYPE "amdgpu-wait-sgpr-hazards"


static cl::opt<bool> GlobalEnableSGPRHazardWaits(

    "amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden,

    cl::desc("Enable required s_wait_alu on SGPR hazards"));


static cl::opt<bool> GlobalCullSGPRHazardsOnFunctionBoundary(

    "amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden,

    cl::desc("Cull hazards on function boundaries"));


static cl::opt<bool>

    GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull",

                                   cl::init(false), cl::Hidden,

                                   cl::desc("Cull hazards on memory waits"));


static cl::opt<unsigned> GlobalCullSGPRHazardsMemWaitThreshold(

    "amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden,

    cl::desc("Number of tracked SGPRs before initiating hazard cull on memory "

             "wait"));


namespace {


class AMDGPUWaitSGPRHazards {

public:

  const SIInstrInfo *TII;

  const SIRegisterInfo *TRI;

  const MachineRegisterInfo *MRI;

  unsigned DsNopCount;


  bool EnableSGPRHazardWaits;

  bool CullSGPRHazardsOnFunctionBoundary;

  bool CullSGPRHazardsAtMemWait;

  unsigned CullSGPRHazardsMemWaitThreshold;


  AMDGPUWaitSGPRHazards() {}


  // Return the numeric ID 0-127 for a given SGPR.

  static std::optional<unsigned> sgprNumber(Register Reg,

                                            const SIRegisterInfo &TRI) {

    switch (Reg) {

    case AMDGPU::M0:

    case AMDGPU::EXEC:

    case AMDGPU::EXEC_LO:

    case AMDGPU::EXEC_HI:

    case AMDGPU::SGPR_NULL:

    case AMDGPU::SGPR_NULL64:

      return {};

    default:

      break;

    }

    unsigned RegN = TRI.getHWRegIndex(Reg);

    if (RegN > 127)

      return {};

    return RegN;

  }


  static inline bool isVCC(Register Reg) {

    return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;

  }


  // Adjust global offsets for instructions bundled with S_GETPC_B64 after

  // insertion of a new instruction.

  static void updateGetPCBundle(MachineInstr *NewMI) {

    if (!NewMI->isBundled())

      return;


    // Find start of bundle.

    auto I = NewMI->getIterator();

    while (I->isBundledWithPred())

      I--;

    if (I->isBundle())

      I++;


    // Bail if this is not an S_GETPC bundle.

    if (I->getOpcode() != AMDGPU::S_GETPC_B64)

      return;


    // Update offsets of any references in the bundle.

    const unsigned NewBytes = 4;

    assert(NewMI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&

           "Unexpected instruction insertion in bundle");

    auto NextMI = std::next(NewMI->getIterator());

    auto End = NewMI->getParent()->end();

    while (NextMI != End && NextMI->isBundledWithPred()) {

      for (auto &Operand : NextMI->operands()) {

        if (Operand.isGlobal())

          Operand.setOffset(Operand.getOffset() + NewBytes);

      }

      NextMI++;

    }

  }


  struct HazardState {

    static constexpr unsigned None = 0;

    static constexpr unsigned SALU = (1 << 0);

    static constexpr unsigned VALU = (1 << 1);


    std::bitset<64> Tracked;      // SGPR banks ever read by VALU

    std::bitset<128> SALUHazards; // SGPRs with uncommitted values from SALU

    std::bitset<128> VALUHazards; // SGPRs with uncommitted values from VALU

    unsigned VCCHazard = None;    // Source of current VCC writes

    bool ActiveFlat = false;      // Has unwaited flat instructions


    bool merge(const HazardState &RHS) {

      HazardState Orig(*this);

      *this |= RHS;

      return (*this != Orig);

    }


    bool operator==(const HazardState &RHS) const {

      return Tracked == RHS.Tracked && SALUHazards == RHS.SALUHazards &&

             VALUHazards == RHS.VALUHazards && VCCHazard == RHS.VCCHazard &&

             ActiveFlat == RHS.ActiveFlat;

    }


    bool operator!=(const HazardState &RHS) const { return !(*this == RHS); }


    void operator|=(const HazardState &RHS) {

      Tracked |= RHS.Tracked;

      SALUHazards |= RHS.SALUHazards;

      VALUHazards |= RHS.VALUHazards;

      VCCHazard |= RHS.VCCHazard;

      ActiveFlat |= RHS.ActiveFlat;

    }

  };


  struct BlockHazardState {

    HazardState In;

    HazardState Out;

  };


  DenseMap<const MachineBasicBlock *, BlockHazardState> BlockState;


  static constexpr unsigned WAVE32_NOPS = 4;

  static constexpr unsigned WAVE64_NOPS = 8;


  void insertHazardCull(MachineBasicBlock &MBB,

                        MachineBasicBlock::instr_iterator &MI) {

    assert(!MI->isBundled());

    unsigned Count = DsNopCount;

    while (Count--)

      BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));

  }


  bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {

    enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };


    HazardState State = BlockState[&MBB].In;

    SmallSet<Register, 8> SeenRegs;

    bool Emitted = false;

    unsigned DsNops = 0;


    for (MachineBasicBlock::instr_iterator MI = MBB.instr_begin(),

                                           E = MBB.instr_end();

         MI != E; ++MI) {

      if (MI->isMetaInstruction())

        continue;


      // Clear tracked SGPRs if sufficient DS_NOPs occur

      if (MI->getOpcode() == AMDGPU::DS_NOP) {

        if (++DsNops >= DsNopCount)

          State.Tracked.reset();

        continue;

      }

      DsNops = 0;


      // Snoop FLAT instructions to avoid adding culls before scratch/lds loads.

      // Culls could be disproportionate in cost to load time.

      if (SIInstrInfo::isFLAT(*MI) && !SIInstrInfo::isFLATGlobal(*MI))

        State.ActiveFlat = true;


      // SMEM or VMEM clears hazards

      if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSMRD(*MI)) {

        State.VCCHazard = HazardState::None;

        State.SALUHazards.reset();

        State.VALUHazards.reset();

        continue;

      }


      // Existing S_WAITALU can clear hazards

      if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {

        unsigned int Mask = MI->getOperand(0).getImm();

        if (AMDGPU::DepCtr::decodeFieldVaVcc(Mask) == 0)

          State.VCCHazard &= ~HazardState::VALU;

        if (AMDGPU::DepCtr::decodeFieldSaSdst(Mask) == 0) {

          State.SALUHazards.reset();

          State.VCCHazard &= ~HazardState::SALU;

        }

        if (AMDGPU::DepCtr::decodeFieldVaSdst(Mask) == 0)

          State.VALUHazards.reset();

        continue;

      }


      // Snoop counter waits to insert culls

      if (CullSGPRHazardsAtMemWait &&

          (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT ||

           MI->getOpcode() == AMDGPU::S_WAIT_SAMPLECNT ||

           MI->getOpcode() == AMDGPU::S_WAIT_BVHCNT) &&

          (MI->getOperand(0).isImm() && MI->getOperand(0).getImm() == 0) &&

          (State.Tracked.count() >= CullSGPRHazardsMemWaitThreshold)) {

        if (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT && State.ActiveFlat) {

          State.ActiveFlat = false;

        } else {

          State.Tracked.reset();

          if (Emit)

            insertHazardCull(MBB, MI);

          continue;

        }

      }


      // Process only VALUs and SALUs

      bool IsVALU = SIInstrInfo::isVALU(*MI);

      bool IsSALU = SIInstrInfo::isSALU(*MI);

      if (!IsVALU && !IsSALU)

        continue;


      unsigned Wait = 0;


      auto processOperand = [&](const MachineOperand &Op, bool IsUse) {

        if (!Op.isReg())

          return;

        Register Reg = Op.getReg();

        assert(!Op.getSubReg());

        if (!TRI->isSGPRReg(*MRI, Reg))

          return;


        // Only visit each register once

        if (!SeenRegs.insert(Reg).second)

          return;


        auto RegNumber = sgprNumber(Reg, *TRI);

        if (!RegNumber)

          return;


        // Track SGPRs by pair -- numeric ID of an 64b SGPR pair.

        // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc

        unsigned RegN = *RegNumber;

        unsigned PairN = (RegN >> 1) & 0x3f;


        // Read/write of untracked register is safe; but must record any new

        // reads.

        if (!State.Tracked[PairN]) {

          if (IsVALU && IsUse)

            State.Tracked.set(PairN);

          return;

        }


        uint8_t SGPRCount =

            AMDGPU::getRegBitWidth(*TRI->getRegClassForReg(*MRI, Reg)) / 32;


        if (IsUse) {

          // SALU reading SGPR clears VALU hazards

          if (IsSALU) {

            if (isVCC(Reg)) {

              if (State.VCCHazard & HazardState::VALU)

                State.VCCHazard = HazardState::None;

            } else {

              State.VALUHazards.reset();

            }

          }

          // Compute required waits

          for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {

            Wait |= State.SALUHazards[RegN + RegIdx] ? WA_SALU : 0;

            Wait |= IsVALU && State.VALUHazards[RegN + RegIdx] ? WA_VALU : 0;

          }

          if (isVCC(Reg) && State.VCCHazard) {

            // Note: it's possible for both SALU and VALU to exist if VCC

            // was updated differently by merged predecessors.

            if (State.VCCHazard & HazardState::SALU)

              Wait |= WA_SALU;

            if (State.VCCHazard & HazardState::VALU)

              Wait |= WA_VCC;

          }

        } else {

          // Update hazards

          if (isVCC(Reg)) {

            State.VCCHazard = IsSALU ? HazardState::SALU : HazardState::VALU;

          } else {

            for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {

              if (IsSALU)

                State.SALUHazards.set(RegN + RegIdx);

              else

                State.VALUHazards.set(RegN + RegIdx);

            }

          }

        }

      };


      const bool IsSetPC =

          (MI->isCall() || MI->isReturn() || MI->isIndirectBranch()) &&

          MI->getOpcode() != AMDGPU::S_ENDPGM &&

          MI->getOpcode() != AMDGPU::S_ENDPGM_SAVED;


      // Only consider implicit VCC specified by instruction descriptor.

      const bool HasImplicitVCC =

          llvm::any_of(MI->getDesc().implicit_uses(),

                       [](MCPhysReg Reg) { return isVCC(Reg); }) ||

          llvm::any_of(MI->getDesc().implicit_defs(),

                       [](MCPhysReg Reg) { return isVCC(Reg); });


      if (IsSetPC) {

        // All SGPR writes before a call/return must be flushed as the

        // callee/caller will not will not see the hazard chain.

        if (State.VCCHazard & HazardState::VALU)

          Wait |= WA_VCC;

        if (State.SALUHazards.any() || (State.VCCHazard & HazardState::SALU))

          Wait |= WA_SALU;

        if (State.VALUHazards.any())

          Wait |= WA_VALU;

        if (CullSGPRHazardsOnFunctionBoundary && State.Tracked.any()) {

          State.Tracked.reset();

          if (Emit)

            insertHazardCull(MBB, MI);

        }

      } else {

        // Process uses to determine required wait.

        SeenRegs.clear();

        for (const MachineOperand &Op : MI->all_uses()) {

          if (Op.isImplicit() &&

              (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))

            continue;

          processOperand(Op, true);

        }

      }


      // Apply wait

      if (Wait) {

        unsigned Mask = 0xffff;

        if (Wait & WA_VCC) {

          State.VCCHazard &= ~HazardState::VALU;

          Mask = AMDGPU::DepCtr::encodeFieldVaVcc(Mask, 0);

        }

        if (Wait & WA_SALU) {

          State.SALUHazards.reset();

          State.VCCHazard &= ~HazardState::SALU;

          Mask = AMDGPU::DepCtr::encodeFieldSaSdst(Mask, 0);

        }

        if (Wait & WA_VALU) {

          State.VALUHazards.reset();

          Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);

        }

        if (Emit) {

          auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),

                               TII->get(AMDGPU::S_WAITCNT_DEPCTR))

                           .addImm(Mask);

          updateGetPCBundle(NewMI);

          Emitted = true;

        }

      }


      // On return from a call SGPR state is unknown, so all potential hazards.

      if (MI->isCall() && !CullSGPRHazardsOnFunctionBoundary)

        State.Tracked.set();


      // Update hazards based on defs.

      SeenRegs.clear();

      for (const MachineOperand &Op : MI->all_defs()) {

        if (Op.isImplicit() &&

            (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))

          continue;

        processOperand(Op, false);

      }

    }


    bool Changed = State != BlockState[&MBB].Out;

    if (Emit) {

      assert(!Changed && "Hazard state should not change on emit pass");

      return Emitted;

    }

    if (Changed)

      BlockState[&MBB].Out = State;

    return Changed;

  }


  bool run(MachineFunction &MF) {

    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

    if (!ST.hasVALUReadSGPRHazard())

      return false;


    // Parse settings

    EnableSGPRHazardWaits = GlobalEnableSGPRHazardWaits;

    CullSGPRHazardsOnFunctionBoundary = GlobalCullSGPRHazardsOnFunctionBoundary;

    CullSGPRHazardsAtMemWait = GlobalCullSGPRHazardsAtMemWait;

    CullSGPRHazardsMemWaitThreshold = GlobalCullSGPRHazardsMemWaitThreshold;


    if (!GlobalEnableSGPRHazardWaits.getNumOccurrences())

      EnableSGPRHazardWaits = MF.getFunction().getFnAttributeAsParsedInteger(

          "amdgpu-sgpr-hazard-wait", EnableSGPRHazardWaits);

    if (!GlobalCullSGPRHazardsOnFunctionBoundary.getNumOccurrences())

      CullSGPRHazardsOnFunctionBoundary =

          MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-boundary-cull");

    if (!GlobalCullSGPRHazardsAtMemWait.getNumOccurrences())

      CullSGPRHazardsAtMemWait =

          MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-mem-wait-cull");

    if (!GlobalCullSGPRHazardsMemWaitThreshold.getNumOccurrences())

      CullSGPRHazardsMemWaitThreshold =

          MF.getFunction().getFnAttributeAsParsedInteger(

              "amdgpu-sgpr-hazard-mem-wait-cull-threshold",

              CullSGPRHazardsMemWaitThreshold);


    // Bail if disabled

    if (!EnableSGPRHazardWaits)

      return false;


    TII = ST.getInstrInfo();

    TRI = ST.getRegisterInfo();

    MRI = &MF.getRegInfo();

    DsNopCount = ST.isWave64() ? WAVE64_NOPS : WAVE32_NOPS;


    auto CallingConv = MF.getFunction().getCallingConv();

    if (!AMDGPU::isEntryFunctionCC(CallingConv) &&

        !CullSGPRHazardsOnFunctionBoundary) {

      // Callee must consider all SGPRs as tracked.

      LLVM_DEBUG(dbgs() << "Is called function, track all SGPRs.\n");

      MachineBasicBlock &EntryBlock = MF.front();

      BlockState[&EntryBlock].In.Tracked.set();

    }


    // Calculate the hazard state for each basic block.

    // Iterate until a fixed point is reached.

    // Fixed point is guaranteed as merge function only ever increases

    // the hazard set, and all backedges will cause a merge.

    //

    // Note: we have to take care of the entry block as this technically

    // has an edge from outside the function. Failure to treat this as

    // a merge could prevent fixed point being reached.

    SetVector<MachineBasicBlock *> Worklist;

    for (auto &MBB : reverse(MF))

      Worklist.insert(&MBB);

    while (!Worklist.empty()) {

      auto &MBB = *Worklist.pop_back_val();

      bool Changed = runOnMachineBasicBlock(MBB, false);

      if (Changed) {

        // Note: take a copy of state here in case it is reallocated by map

        HazardState NewState = BlockState[&MBB].Out;

        // Propagate to all successor blocks

        for (auto Succ : MBB.successors()) {

          // We only need to merge hazards at CFG merge points.

          auto &SuccState = BlockState[Succ];

          if (Succ->getSinglePredecessor() && !Succ->isEntryBlock()) {

            if (SuccState.In != NewState) {

              SuccState.In = NewState;

              Worklist.insert(Succ);

            }

          } else if (SuccState.In.merge(NewState)) {

            Worklist.insert(Succ);

          }

        }

      }

    }


    LLVM_DEBUG(dbgs() << "Emit s_wait_alu instructions\n");


    // Final to emit wait instructions.

    bool Changed = false;

    for (auto &MBB : MF)

      Changed |= runOnMachineBasicBlock(MBB, true);


    BlockState.clear();

    return Changed;

  }

};


class AMDGPUWaitSGPRHazardsLegacy : public MachineFunctionPass {

public:

  static char ID;


  AMDGPUWaitSGPRHazardsLegacy() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override {

    return AMDGPUWaitSGPRHazards().run(MF);

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    MachineFunctionPass::getAnalysisUsage(AU);

  }

};


} // namespace


char AMDGPUWaitSGPRHazardsLegacy::ID = 0;


char &llvm::AMDGPUWaitSGPRHazardsLegacyID = AMDGPUWaitSGPRHazardsLegacy::ID;


INITIALIZE_PASS(AMDGPUWaitSGPRHazardsLegacy, DEBUG_TYPE,

                "AMDGPU Insert waits for SGPR read hazards", false, false)


PreservedAnalyses

AMDGPUWaitSGPRHazardsPass::run(MachineFunction &MF,

                               MachineFunctionAnalysisManager &MFAM) {

  if (AMDGPUWaitSGPRHazards().run(MF))

    return PreservedAnalyses::none();

  return PreservedAnalyses::all();

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

GlobalCullSGPRHazardsAtMemWait
static cl::opt< bool > GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on memory waits"))

GlobalCullSGPRHazardsMemWaitThreshold
static cl::opt< unsigned > GlobalCullSGPRHazardsMemWaitThreshold("amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden, cl::desc("Number of tracked SGPRs before initiating hazard cull on memory " "wait"))

GlobalCullSGPRHazardsOnFunctionBoundary
static cl::opt< bool > GlobalCullSGPRHazardsOnFunctionBoundary("amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on function boundaries"))

GlobalEnableSGPRHazardWaits
static cl::opt< bool > GlobalEnableSGPRHazardWaits("amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden, cl::desc("Enable required s_wait_alu on SGPR hazards"))

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUWaitSGPRHazards.cpp:23

AMDGPUWaitSGPRHazards.h

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

End
bool End
Definition: ELF_riscv.cpp:480

updateGetPCBundle
static void updateGetPCBundle(MachineInstr *NewMI)
Definition: GCNHazardRecognizer.cpp:2945

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:112

merge
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Definition: LoopDeletion.cpp:51

I
#define I(x, y, z)
Definition: MD5.cpp:58

TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:2029

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38

SIInstrInfo.h
Interface definition for SIInstrInfo.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SetVector.h
This file implements a set that has insertion order iteration characteristics.

RHS
Value * RHS
Definition: X86PartialReduction.cpp:74

llvm::AMDGPUWaitSGPRHazardsPass
Definition: AMDGPUWaitSGPRHazards.h:17

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:110

llvm::DenseMap
Definition: DenseMap.h:727

llvm::Function::getFnAttributeAsParsedInteger
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:125

llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:339

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition: MachineBasicBlock.h:314

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:341

llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:357

llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:444

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:169

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunction
Definition: MachineFunction.h:267

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:733

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:743

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:704

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:959

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:133

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:71

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:577

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:349

llvm::MachineInstr::isBundled
bool isBundled() const
Return true if this instruction part of a bundle.
Definition: MachineInstr.h:474

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SIInstrInfo
Definition: SIInstrInfo.h:85

llvm::SIInstrInfo::isVMEM
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:441

llvm::SIInstrInfo::isSMRD
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:553

llvm::SIInstrInfo::isSALU
static bool isSALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:417

llvm::SIInstrInfo::isFLATGlobal
static bool isFLATGlobal(const MachineInstr &MI)
Definition: SIInstrInfo.h:637

llvm::SIInstrInfo::isFLAT
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:621

llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:425

llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:32

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SetVector::pop_back_val
value_type pop_back_val()
Definition: SetVector.h:285

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132

llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:204

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181

llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:399

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:132

uint16_t

uint8_t

unsigned

llvm::AMDGPU::DepCtr::decodeFieldVaVcc
unsigned decodeFieldVaVcc(unsigned Encoded)
Definition: AMDGPUBaseInfo.cpp:1738

llvm::AMDGPU::DepCtr::encodeFieldVaVcc
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
Definition: AMDGPUBaseInfo.cpp:1774

llvm::AMDGPU::DepCtr::decodeFieldSaSdst
unsigned decodeFieldSaSdst(unsigned Encoded)
Definition: AMDGPUBaseInfo.cpp:1730

llvm::AMDGPU::DepCtr::decodeFieldVaSdst
unsigned decodeFieldVaSdst(unsigned Encoded)
Definition: AMDGPUBaseInfo.cpp:1734

llvm::AMDGPU::DepCtr::encodeFieldSaSdst
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
Definition: AMDGPUBaseInfo.cpp:1758

llvm::AMDGPU::DepCtr::encodeFieldVaSdst
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
Definition: AMDGPUBaseInfo.cpp:1766

llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:2102

llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
Definition: SIRegisterInfo.cpp:3201

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::SIInstrFlags::VALU
@ VALU
Definition: SIDefines.h:56

llvm::SIInstrFlags::SALU
@ SALU
Definition: SIDefines.h:55

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:621

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:191

llvm::ms_demangle::CallingConv
CallingConv
Definition: MicrosoftDemangleNodes.h:57

llvm::orc::SymbolState::Emitted
@ Emitted
Assigned address, still materializing.

llvm::tgtok::In
@ In
Definition: TGLexer.h:84

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:373

llvm::Wait
@ Wait
Definition: Threading.h:60

llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082

llvm::AMDGPUWaitSGPRHazardsLegacyID
char & AMDGPUWaitSGPRHazardsLegacyID
Definition: AMDGPUWaitSGPRHazards.cpp:506

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition: AddressRanges.h:153

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420

llvm::None
@ None
Definition: CodeGenData.h:106

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpression.cpp:22

llvm::operator|=
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
Definition: SparseBitVector.h:823

llvm::cl::desc
Definition: CommandLine.h:409