LLVM 22.0.0git
KernelInfo.cpp
Go to the documentation of this file.
1//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the KernelInfoPrinter class used to emit remarks about
10// function properties from a GPU kernel.
11//
12//===----------------------------------------------------------------------===//
13
19#include "llvm/IR/DebugInfo.h"
20#include "llvm/IR/Dominators.h"
22#include "llvm/IR/Metadata.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/PassManager.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "kernel-info"
29
30namespace {
31
32/// Data structure holding function info for kernels.
33class KernelInfo {
34 void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
35
36public:
37 static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
38 TargetMachine *TM);
39
40 /// Whether the function has external linkage and is not a kernel function.
41 bool ExternalNotKernel = false;
42
43 /// Launch bounds.
45
46 /// The number of alloca instructions inside the function, the number of those
47 /// with allocation sizes that cannot be determined at compile time, and the
48 /// sum of the sizes that can be.
49 ///
50 /// With the current implementation for at least some GPU archs,
51 /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
52 /// case the implementation changes.
53 int64_t Allocas = 0;
54 int64_t AllocasDyn = 0;
55 int64_t AllocasStaticSizeSum = 0;
56
57 /// Number of direct/indirect calls (anything derived from CallBase).
58 int64_t DirectCalls = 0;
59 int64_t IndirectCalls = 0;
60
61 /// Number of direct calls made from this function to other functions
62 /// defined in this module.
63 int64_t DirectCallsToDefinedFunctions = 0;
64
65 /// Number of direct calls to inline assembly.
66 int64_t InlineAssemblyCalls = 0;
67
68 /// Number of calls of type InvokeInst.
69 int64_t Invokes = 0;
70
71 /// Target-specific flat address space.
72 unsigned FlatAddrspace;
73
74 /// Number of flat address space memory accesses (via load, store, etc.).
75 int64_t FlatAddrspaceAccesses = 0;
76};
77
78} // end anonymous namespace
79
80static void identifyCallee(OptimizationRemark &R, const Module *M,
81 const Value *V, StringRef Kind = "") {
82 SmallString<100> Name; // might be function name or asm expression
83 if (const Function *F = dyn_cast<Function>(V)) {
84 if (auto *SubProgram = F->getSubprogram()) {
85 if (SubProgram->isArtificial())
86 R << "artificial ";
87 Name = SubProgram->getName();
88 }
89 }
90 if (Name.empty()) {
91 raw_svector_ostream OS(Name);
92 V->printAsOperand(OS, /*PrintType=*/false, M);
93 }
94 if (!Kind.empty())
95 R << Kind << " ";
96 R << "'" << Name << "'";
97}
98
100 identifyCallee(R, F.getParent(), &F, "function");
101}
102
103static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
104 const AllocaInst &Alloca,
105 TypeSize::ScalarTy StaticSize) {
106 ORE.emit([&] {
107 StringRef DbgName;
109 bool Artificial = false;
110 auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
111 if (!DVRs.empty()) {
112 const DbgVariableRecord &DVR = **DVRs.begin();
113 DbgName = DVR.getVariable()->getName();
114 Loc = DVR.getDebugLoc();
115 Artificial = DVR.Variable->isArtificial();
116 }
118 Alloca.getParent());
119 R << "in ";
120 identifyFunction(R, Caller);
121 R << ", ";
122 if (Artificial)
123 R << "artificial ";
124 SmallString<20> ValName;
125 raw_svector_ostream OS(ValName);
126 Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
127 R << "alloca ('" << ValName << "') ";
128 if (!DbgName.empty())
129 R << "for '" << DbgName << "' ";
130 else
131 R << "without debug info ";
132 R << "with ";
133 if (StaticSize)
134 R << "static size of " << itostr(StaticSize) << " bytes";
135 else
136 R << "dynamic size";
137 return R;
138 });
139}
140
141static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
142 const CallBase &Call, StringRef CallKind,
143 StringRef RemarkKind) {
144 ORE.emit([&] {
145 OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
146 R << "in ";
147 identifyFunction(R, Caller);
148 R << ", " << CallKind << ", callee is ";
149 identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
150 return R;
151 });
152}
153
155 const Function &Caller,
156 const Instruction &Inst) {
157 ORE.emit([&] {
158 OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
159 R << "in ";
160 identifyFunction(R, Caller);
161 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
162 R << ", '" << II->getCalledFunction()->getName() << "' call";
163 } else {
164 R << ", '" << Inst.getOpcodeName() << "' instruction";
165 }
166 if (!Inst.getType()->isVoidTy()) {
167 SmallString<20> Name;
168 raw_svector_ostream OS(Name);
169 Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
170 R << " ('" << Name << "')";
171 }
172 R << " accesses memory in flat address space";
173 return R;
174 });
175}
176
177void KernelInfo::updateForBB(const BasicBlock &BB,
179 const Function &F = *BB.getParent();
180 const Module &M = *F.getParent();
181 const DataLayout &DL = M.getDataLayout();
182 for (const Instruction &I : BB.instructionsWithoutDebug()) {
183 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
184 ++Allocas;
185 TypeSize::ScalarTy StaticSize = 0;
186 if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
187 StaticSize = Size->getFixedValue();
188 assert(StaticSize <=
189 (TypeSize::ScalarTy)std::numeric_limits<int64_t>::max());
190 AllocasStaticSizeSum += StaticSize;
191 } else {
192 ++AllocasDyn;
193 }
194 remarkAlloca(ORE, F, *Alloca, StaticSize);
195 } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
196 SmallString<40> CallKind;
197 SmallString<40> RemarkKind;
198 if (Call->isIndirectCall()) {
199 ++IndirectCalls;
200 CallKind += "indirect";
201 RemarkKind += "Indirect";
202 } else {
203 ++DirectCalls;
204 CallKind += "direct";
205 RemarkKind += "Direct";
206 }
207 if (isa<InvokeInst>(Call)) {
208 ++Invokes;
209 CallKind += " invoke";
210 RemarkKind += "Invoke";
211 } else {
212 CallKind += " call";
213 RemarkKind += "Call";
214 }
215 if (!Call->isIndirectCall()) {
216 if (const Function *Callee = Call->getCalledFunction()) {
217 if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
218 ++DirectCallsToDefinedFunctions;
219 CallKind += " to defined function";
220 RemarkKind += "ToDefinedFunction";
221 }
222 } else if (Call->isInlineAsm()) {
223 ++InlineAssemblyCalls;
224 CallKind += " to inline assembly";
225 RemarkKind += "ToInlineAssembly";
226 }
227 }
228 remarkCall(ORE, F, *Call, CallKind, RemarkKind);
229 if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
230 if (MI->getDestAddressSpace() == FlatAddrspace) {
231 ++FlatAddrspaceAccesses;
233 } else if (const AnyMemTransferInst *MT =
235 if (MT->getSourceAddressSpace() == FlatAddrspace) {
236 ++FlatAddrspaceAccesses;
238 }
239 }
240 }
241 } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
242 if (Load->getPointerAddressSpace() == FlatAddrspace) {
243 ++FlatAddrspaceAccesses;
245 }
246 } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
247 if (Store->getPointerAddressSpace() == FlatAddrspace) {
248 ++FlatAddrspaceAccesses;
250 }
251 } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
252 if (At->getPointerAddressSpace() == FlatAddrspace) {
253 ++FlatAddrspaceAccesses;
255 }
256 } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
257 if (At->getPointerAddressSpace() == FlatAddrspace) {
258 ++FlatAddrspaceAccesses;
260 }
261 }
262 }
263}
264
266 StringRef Name, int64_t Value) {
267 ORE.emit([&] {
269 R << "in ";
271 R << ", " << Name << " = " << itostr(Value);
272 return R;
273 });
274}
275
276static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
277 StringRef Name) {
278 if (!F.hasFnAttribute(Name))
279 return std::nullopt;
280 return F.getFnAttributeAsParsedInteger(Name);
281}
282
283void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
284 TargetMachine *TM) {
285 KernelInfo KI;
286 TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
287 KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
288
289 // Record function properties.
290 KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
291 for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
292 if (auto Val = parseFnAttrAsInteger(F, Name))
293 KI.LaunchBounds.push_back({Name, *Val});
294 }
295 TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
296
297 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
298 for (const auto &BB : F)
299 KI.updateForBB(BB, ORE);
300
301#define REMARK_PROPERTY(PROP_NAME) \
302 remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
303 REMARK_PROPERTY(ExternalNotKernel);
304 for (auto LB : KI.LaunchBounds)
305 remarkProperty(ORE, F, LB.first, LB.second);
306 REMARK_PROPERTY(Allocas);
307 REMARK_PROPERTY(AllocasStaticSizeSum);
308 REMARK_PROPERTY(AllocasDyn);
309 REMARK_PROPERTY(DirectCalls);
310 REMARK_PROPERTY(IndirectCalls);
311 REMARK_PROPERTY(DirectCallsToDefinedFunctions);
312 REMARK_PROPERTY(InlineAssemblyCalls);
313 REMARK_PROPERTY(Invokes);
314 REMARK_PROPERTY(FlatAddrspaceAccesses);
315#undef REMARK_PROPERTY
316}
317
320 // Skip it if remarks are not enabled as it will do nothing useful.
321 if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
322 KernelInfo::emitKernelInfo(F, AM, TM);
323 return PreservedAnalyses::all();
324}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller, const CallBase &Call, StringRef CallKind, StringRef RemarkKind)
static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller, const AllocaInst &Alloca, TypeSize::ScalarTy StaticSize)
static std::optional< int64_t > parseFnAttrAsInteger(Function &F, StringRef Name)
static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F, StringRef Name, int64_t Value)
static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE, const Function &Caller, const Instruction &Inst)
#define REMARK_PROPERTY(PROP_NAME)
static void identifyCallee(OptimizationRemark &R, const Module *M, const Value *V, StringRef Kind="")
static void identifyFunction(OptimizationRemark &R, const Function &F)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
This pass exposes codegen information to IR-level passes.
an instruction to allocate memory on the stack
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
StringRef getName() const
DebugLoc getDebugLoc() const
Record of a variable value-assignment, aka a non instruction representation of the dbg....
DbgRecordParamRef< DILocalVariable > Variable
DILocalVariable * getVariable() const
A debug info location.
Definition DebugLoc.h:124
const char * getOpcodeName() const
A wrapper class for inspecting calls to intrinsic functions.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:151
Primary interface to the complete machine description for the target machine.
LLVM_ABI unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
LLVM_ABI void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
Collect kernel launch bounds for F into LB.
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
const ParentTy * getParent() const
Definition ilist_node.h:34
A raw_ostream that writes to an SmallVector or SmallString.
CallInst * Call
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
Definition DebugInfo.cpp:49
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
std::string itostr(int64_t X)