LLVM: lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp Source File

//===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// \brief Analyzes if a function potentially memory bound and if a kernel

/// kernel may benefit from limiting number of waves to reduce cache thrashing.

///

//===----------------------------------------------------------------------===//


#include "AMDGPUPerfHintAnalysis.h"

#include "AMDGPU.h"

#include "AMDGPUTargetMachine.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/CallGraph.h"

#include "llvm/Analysis/CallGraphSCCPass.h"

#include "llvm/Analysis/LazyCallGraph.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Target/TargetMachine.h"


using namespace llvm;


#define DEBUG_TYPE "amdgpu-perf-hint"


static cl::opt<unsigned>

    MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,

                   cl::desc("Function mem bound threshold in %"));


static cl::opt<unsigned>

    LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,

                    cl::desc("Kernel limit wave threshold in %"));


static cl::opt<unsigned>

    IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,

             cl::desc("Indirect access memory instruction weight"));


static cl::opt<unsigned>

    LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,

             cl::desc("Large stride memory access weight"));


static cl::opt<unsigned>

    LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,

                      cl::desc("Large stride memory access threshold"));


STATISTIC(NumMemBound, "Number of functions marked as memory bound");

STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");


namespace {


struct AMDGPUPerfHint {

  friend AMDGPUPerfHintAnalysis;


public:

  AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,

                 const SITargetLowering *TLI_)

      : FIM(FIM_), TLI(TLI_) {}


  bool runOnFunction(Function &F);


private:

  struct MemAccessInfo {

    const Value *V = nullptr;

    const Value *Base = nullptr;

    int64_t Offset = 0;

    MemAccessInfo() = default;

    bool isLargeStride(MemAccessInfo &Reference) const;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

    Printable print() const {

      return Printable([this](raw_ostream &OS) {

        OS << "Value: " << *V << '\n'

           << "Base: " << *Base << " Offset: " << Offset << '\n';

      });

    }

#endif

  };


  MemAccessInfo makeMemAccessInfo(Instruction *) const;


  MemAccessInfo LastAccess; // Last memory access info


  AMDGPUPerfHintAnalysis::FuncInfoMap &FIM;


  const DataLayout *DL = nullptr;


  const SITargetLowering *TLI;


  AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);

  static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);

  static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);


  bool isIndirectAccess(const Instruction *Inst) const;


  /// Check if the instruction is large stride.

  /// The purpose is to identify memory access pattern like:

  /// x = a[i];

  /// y = a[i+1000];

  /// z = a[i+2000];

  /// In the above example, the second and third memory access will be marked

  /// large stride memory access.

  bool isLargeStride(const Instruction *Inst);


  bool isGlobalAddr(const Value *V) const;

  bool isLocalAddr(const Value *V) const;

  bool isGlobalLoadUsedInBB(const Instruction &) const;

};


static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType(

    const Instruction *Inst) {

  if (const auto *LI = dyn_cast<LoadInst>(Inst))

    return {LI->getPointerOperand(), LI->getType()};

  if (const auto *SI = dyn_cast<StoreInst>(Inst))

    return {SI->getPointerOperand(), SI->getValueOperand()->getType()};

  if (const auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))

    return {AI->getPointerOperand(), AI->getCompareOperand()->getType()};

  if (const auto *AI = dyn_cast<AtomicRMWInst>(Inst))

    return {AI->getPointerOperand(), AI->getValOperand()->getType()};

  if (const auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))

    return {MI->getRawDest(), Type::getInt8Ty(MI->getContext())};


  return {nullptr, nullptr};

}


bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {

  LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');

  SmallPtrSet<const Value *, 32> WorkSet;

  SmallPtrSet<const Value *, 32> Visited;

  if (const Value *MO = getMemoryInstrPtrAndType(Inst).first) {

    if (isGlobalAddr(MO))

      WorkSet.insert(MO);

  }


  while (!WorkSet.empty()) {

    const Value *V = *WorkSet.begin();

    WorkSet.erase(*WorkSet.begin());

    if (!Visited.insert(V).second)

      continue;

    LLVM_DEBUG(dbgs() << "  check: " << *V << '\n');


    if (const auto *LD = dyn_cast<LoadInst>(V)) {

      const auto *M = LD->getPointerOperand();

      if (isGlobalAddr(M)) {

        LLVM_DEBUG(dbgs() << "    is IA\n");

        return true;

      }

      continue;

    }


    if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {

      const auto *P = GEP->getPointerOperand();

      WorkSet.insert(P);

      for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)

        WorkSet.insert(GEP->getOperand(I));

      continue;

    }


    if (const auto *U = dyn_cast<UnaryInstruction>(V)) {

      WorkSet.insert(U->getOperand(0));

      continue;

    }


    if (const auto *BO = dyn_cast<BinaryOperator>(V)) {

      WorkSet.insert(BO->getOperand(0));

      WorkSet.insert(BO->getOperand(1));

      continue;

    }


    if (const auto *S = dyn_cast<SelectInst>(V)) {

      WorkSet.insert(S->getFalseValue());

      WorkSet.insert(S->getTrueValue());

      continue;

    }


    if (const auto *E = dyn_cast<ExtractElementInst>(V)) {

      WorkSet.insert(E->getVectorOperand());

      continue;

    }


    LLVM_DEBUG(dbgs() << "    dropped\n");

  }


  LLVM_DEBUG(dbgs() << "  is not IA\n");

  return false;

}


// Returns true if the global load `I` is used in its own basic block.

bool AMDGPUPerfHint::isGlobalLoadUsedInBB(const Instruction &I) const {

  const auto *Ld = dyn_cast<LoadInst>(&I);

  if (!Ld)

    return false;

  if (!isGlobalAddr(Ld->getPointerOperand()))

    return false;


  for (const User *Usr : Ld->users()) {

    if (const Instruction *UsrInst = dyn_cast<Instruction>(Usr)) {

      if (UsrInst->getParent() == I.getParent())

        return true;

    }

  }


  return false;

}


AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {

  AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];


  LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');


  for (auto &B : F) {

    LastAccess = MemAccessInfo();

    unsigned UsedGlobalLoadsInBB = 0;

    for (auto &I : B) {

      if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) {

        unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32);

        // TODO: Check if the global load and its user are close to each other

        // instead (Or do this analysis in GCNSchedStrategy?).

        if (isGlobalLoadUsedInBB(I))

          UsedGlobalLoadsInBB += Size;

        if (isIndirectAccess(&I))

          FI.IAMInstCost += Size;

        if (isLargeStride(&I))

          FI.LSMInstCost += Size;

        FI.MemInstCost += Size;

        FI.InstCost += Size;

        continue;

      }

      if (auto *CB = dyn_cast<CallBase>(&I)) {

        Function *Callee = CB->getCalledFunction();

        if (!Callee || Callee->isDeclaration()) {

          ++FI.InstCost;

          continue;

        }

        if (&F == Callee) // Handle immediate recursion

          continue;


        auto Loc = FIM.find(Callee);

        if (Loc == FIM.end())

          continue;


        FI.MemInstCost += Loc->second.MemInstCost;

        FI.InstCost += Loc->second.InstCost;

        FI.IAMInstCost += Loc->second.IAMInstCost;

        FI.LSMInstCost += Loc->second.LSMInstCost;

      } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {

        TargetLoweringBase::AddrMode AM;

        auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);

        AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));

        AM.HasBaseReg = !AM.BaseGV;

        if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),

                                       GEP->getPointerAddressSpace()))

          // Offset will likely be folded into load or store

          continue;

        ++FI.InstCost;

      } else {

        ++FI.InstCost;

      }

    }


    if (!FI.HasDenseGlobalMemAcc) {

      unsigned GlobalMemAccPercentage = UsedGlobalLoadsInBB * 100 / B.size();

      if (GlobalMemAccPercentage > 50) {

        LLVM_DEBUG(dbgs() << "[HasDenseGlobalMemAcc] Set to true since "

                          << B.getName() << " has " << GlobalMemAccPercentage

                          << "% global memory access\n");

        FI.HasDenseGlobalMemAcc = true;

      }

    }

  }


  return &FI;

}


bool AMDGPUPerfHint::runOnFunction(Function &F) {

  const Module &M = *F.getParent();

  DL = &M.getDataLayout();


  if (F.hasFnAttribute("amdgpu-wave-limiter") &&

      F.hasFnAttribute("amdgpu-memory-bound"))

    return false;


  const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);


  LLVM_DEBUG(dbgs() << F.getName() << " MemInst cost: " << Info->MemInstCost

                    << '\n'

                    << " IAMInst cost: " << Info->IAMInstCost << '\n'

                    << " LSMInst cost: " << Info->LSMInstCost << '\n'

                    << " TotalInst cost: " << Info->InstCost << '\n');


  bool Changed = false;


  if (isMemBound(*Info)) {

    LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");

    NumMemBound++;

    F.addFnAttr("amdgpu-memory-bound", "true");

    Changed = true;

  }


  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {

    LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");

    NumLimitWave++;

    F.addFnAttr("amdgpu-wave-limiter", "true");

    Changed = true;

  }


  return Changed;

}


bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {

  // Reverting optimal scheduling in favour of occupancy with basic block(s)

  // having dense global memory access can potentially hurt performance.

  if (FI.HasDenseGlobalMemAcc)

    return true;


  return FI.MemInstCost * 100 / FI.InstCost > MemBoundThresh;

}


bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {

  return ((FI.MemInstCost + FI.IAMInstCost * IAWeight +

           FI.LSMInstCost * LSWeight) * 100 / FI.InstCost) > LimitWaveThresh;

}


bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {

  if (auto *PT = dyn_cast<PointerType>(V->getType())) {

    unsigned As = PT->getAddressSpace();

    // Flat likely points to global too.

    return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;

  }

  return false;

}


bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {

  if (auto *PT = dyn_cast<PointerType>(V->getType()))

    return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;

  return false;

}


bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {

  LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');


  MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));

  bool IsLargeStride = MAI.isLargeStride(LastAccess);

  if (MAI.Base)

    LastAccess = std::move(MAI);


  return IsLargeStride;

}


AMDGPUPerfHint::MemAccessInfo

AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {

  MemAccessInfo MAI;

  const Value *MO = getMemoryInstrPtrAndType(Inst).first;


  LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');

  // Do not treat local-addr memory access as large stride.

  if (isLocalAddr(MO))

    return MAI;


  MAI.V = MO;

  MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);

  return MAI;

}


bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(

    MemAccessInfo &Reference) const {


  if (!Base || !Reference.Base || Base != Reference.Base)

    return false;


  uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset

                                            : Reference.Offset - Offset;

  bool Result = Diff > LargeStrideThresh;

  LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"

               << print() << "<=>\n"

               << Reference.print() << "Result:" << Result << '\n');

  return Result;

}


class AMDGPUPerfHintAnalysisLegacy : public CallGraphSCCPass {

private:

  // FIXME: This is relying on maintaining state between different SCCs.

  AMDGPUPerfHintAnalysis Impl;


public:

  static char ID;


  AMDGPUPerfHintAnalysisLegacy() : CallGraphSCCPass(ID) {}


  bool runOnSCC(CallGraphSCC &SCC) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesAll();

  }

};


} // namespace


bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {

  auto FI = FIM.find(F);

  if (FI == FIM.end())

    return false;


  return AMDGPUPerfHint::isMemBound(FI->second);

}


bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {

  auto FI = FIM.find(F);

  if (FI == FIM.end())

    return false;


  return AMDGPUPerfHint::needLimitWave(FI->second);

}


bool AMDGPUPerfHintAnalysis::runOnSCC(const GCNTargetMachine &TM,

                                      CallGraphSCC &SCC) {

  bool Changed = false;

  for (CallGraphNode *I : SCC) {

    Function *F = I->getFunction();

    if (!F || F->isDeclaration())

      continue;


    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);

    AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());


    if (Analyzer.runOnFunction(*F))

      Changed = true;

  }


  return Changed;

}


bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,

                                 LazyCallGraph &CG) {

  bool Changed = false;


  CG.buildRefSCCs();


  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {

    for (LazyCallGraph::SCC &SCC : RC) {

      if (SCC.size() != 1)

        continue;

      Function &F = SCC.begin()->getFunction();

      // TODO: Skip without norecurse, or interposable?

      if (F.isDeclaration())

        continue;


      const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

      AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());

      if (Analyzer.runOnFunction(F))

        Changed = true;

    }

  }


  return Changed;

}


char AMDGPUPerfHintAnalysisLegacy::ID = 0;

char &llvm::AMDGPUPerfHintAnalysisLegacyID = AMDGPUPerfHintAnalysisLegacy::ID;


INITIALIZE_PASS(AMDGPUPerfHintAnalysisLegacy, DEBUG_TYPE,

                "Analysis if a function is memory bound", true, true)


bool AMDGPUPerfHintAnalysisLegacy::runOnSCC(CallGraphSCC &SCC) {

  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();

  if (!TPC)

    return false;


  const GCNTargetMachine &TM = TPC->getTM<GCNTargetMachine>();

  return Impl.runOnSCC(TM, SCC);

}


PreservedAnalyses AMDGPUPerfHintAnalysisPass::run(Module &M,

                                                  ModuleAnalysisManager &AM) {

  auto &CG = AM.getResult<LazyCallGraphAnalysis>(M);


  bool Changed = Impl->run(TM, CG);

  if (!Changed)

    return PreservedAnalyses::all();


  PreservedAnalyses PA;

  PA.preserve<LazyCallGraphAnalysis>();

  return PA;

}

AMDGPUBaseInfo.h

LargeStrideThresh
static cl::opt< unsigned > LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden, cl::desc("Large stride memory access threshold"))

IAWeight
static cl::opt< unsigned > IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden, cl::desc("Indirect access memory instruction weight"))

LimitWaveThresh
static cl::opt< unsigned > LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden, cl::desc("Kernel limit wave threshold in %"))

LSWeight
static cl::opt< unsigned > LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden, cl::desc("Large stride memory access weight"))

MemBoundThresh
static cl::opt< unsigned > MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden, cl::desc("Function mem bound threshold in %"))

AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:205

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

CallGraphSCCPass.h

CallGraph.h
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...

CommandLine.h

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:103

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:31

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:164

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:110

IntrinsicInst.h

Instructions.h

LazyCallGraph.h
Implements a lazy call graph analysis and related passes for the new pass manager.

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

P
#define P(N)

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56

visit
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Definition: SPIRVPostLegalizer.cpp:125

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:51

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:119

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

TargetSubtargetInfo.h

ValueTracking.h

bool

llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:31

llvm::AMDGPUPerfHintAnalysis::isMemoryBound
bool isMemoryBound(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:407

llvm::AMDGPUPerfHintAnalysis::needsWaveLimiter
bool needsWaveLimiter(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:415

llvm::AMDGPUPerfHintAnalysis::run
bool run(const GCNTargetMachine &TM, LazyCallGraph &CG)
Definition: AMDGPUPerfHintAnalysis.cpp:441

llvm::AMDGPUPerfHintAnalysis::runOnSCC
bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC)
Definition: AMDGPUPerfHintAnalysis.cpp:423

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:48

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:131

llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:162

llvm::CallGraphSCCPass
Definition: CallGraphSCCPass.h:35

llvm::CallGraphSCCPass::runOnSCC
virtual bool runOnSCC(CallGraphSCC &SCC)=0
runOnSCC - This method should be implemented by the subclass to perform whatever action is necessary ...

llvm::CallGraphSCCPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph.
Definition: CallGraphSCCPass.cpp:649

llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:83

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::Function
Definition: Function.h:64

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:81

llvm::Instruction
Definition: Instruction.h:69

llvm::LazyCallGraphAnalysis
An analysis pass which computes the call graph for a module.
Definition: LazyCallGraph.h:1263

llvm::LazyCallGraph::RefSCC
A RefSCC of the call graph.
Definition: LazyCallGraph.h:542

llvm::LazyCallGraph::SCC
An SCC of the call graph.
Definition: LazyCallGraph.h:417

llvm::LazyCallGraph
A lazily constructed view of the call graph of a module.
Definition: LazyCallGraph.h:109

llvm::LazyCallGraph::buildRefSCCs
LLVM_ABI void buildRefSCCs()
Definition: LazyCallGraph.cpp:1935

llvm::LazyCallGraph::postorder_ref_sccs
iterator_range< postorder_ref_scc_iterator > postorder_ref_sccs()
Definition: LazyCallGraph.h:972

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132

llvm::Printable
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38

llvm::SITargetLowering
Definition: SIISelLowering.h:31

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:98

llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401

llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:494

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getInt8Ty
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

llvm::User
Definition: User.h:44

llvm::ValueMap< const Function *, FuncInfo >

llvm::ValueMap::find
iterator find(const KeyT &Val)
Definition: ValueMap.h:160

llvm::ValueMap::end
iterator end()
Definition: ValueMap.h:139

llvm::Value
LLVM Value Representation.
Definition: Value.h:75

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:426

llvm::cl::opt
Definition: CommandLine.h:1429

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53

uint64_t

unsigned

TargetMachine.h

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPUAddrSpace.h:31

llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:551

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1454

llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72

llvm::ARM::ProfileKind::M
@ M

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::M68k::MemAddrModeKind::U
@ U

llvm::M68k::MemAddrModeKind::V
@ V

llvm::SIEncodingFamily::SI
@ SI
Definition: SIDefines.h:36

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444

llvm::logicalview::LVAttributeKind::Reference
@ Reference

llvm::ms_demangle::QualifierMangleMode::Result
@ Result

llvm::pdb::PDB_SymType::Callee
@ Callee

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:477

llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:344

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:399

llvm::AMDGPUPerfHintAnalysisLegacyID
char & AMDGPUPerfHintAnalysisLegacyID
Definition: AMDGPUPerfHintAnalysis.cpp:467

llvm::AMDGPUPerfHintAnalysisPass::Impl
std::unique_ptr< AMDGPUPerfHintAnalysis > Impl
Definition: AMDGPUPerfHintAnalysis.h:67

llvm::AMDGPUPerfHintAnalysisPass::TM
const GCNTargetMachine & TM
Definition: AMDGPUPerfHintAnalysis.h:66

llvm::AMDGPUPerfHintAnalysisPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUPerfHintAnalysis.cpp:481

llvm::AMDGPUPerfHintAnalysis::FuncInfo
Definition: AMDGPUPerfHintAnalysis.h:33

llvm::AMDGPUPerfHintAnalysis::FuncInfo::LSMInstCost
unsigned LSMInstCost
Definition: AMDGPUPerfHintAnalysis.h:37

llvm::AMDGPUPerfHintAnalysis::FuncInfo::InstCost
unsigned InstCost
Definition: AMDGPUPerfHintAnalysis.h:35

llvm::AMDGPUPerfHintAnalysis::FuncInfo::IAMInstCost
unsigned IAMInstCost
Definition: AMDGPUPerfHintAnalysis.h:36

llvm::AMDGPUPerfHintAnalysis::FuncInfo::HasDenseGlobalMemAcc
bool HasDenseGlobalMemAcc
Definition: AMDGPUPerfHintAnalysis.h:38

llvm::AMDGPUPerfHintAnalysis::FuncInfo::MemInstCost
unsigned MemInstCost
Definition: AMDGPUPerfHintAnalysis.h:34

llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Definition: TargetLowering.h:2899

llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2901

llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2900

llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2902

llvm::cl::desc
Definition: CommandLine.h:410