LLVM 22.0.0git
AMDGPUMemoryUtils.cpp
Go to the documentation of this file.
1//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUMemoryUtils.h"
10#include "AMDGPU.h"
16#include "llvm/IR/DataLayout.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
21
22#define DEBUG_TYPE "amdgpu-memory-utils"
23
24using namespace llvm;
25
26namespace llvm::AMDGPU {
27
29 return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
30 GV->getValueType());
31}
32
33// Returns the target extension type of a global variable,
34// which can only be a TargetExtType, an array or single-element struct of it,
35// or their nesting combination.
36// TODO: allow struct of multiple TargetExtType elements of the same type.
37// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
38// - Structs containing barriers in different scope/rank
39// - Structs containing a mixture of barriers and other data.
40// - Globals in other address spaces.
41// - Allocas.
43 Type *Ty = GV.getValueType();
44 while (true) {
45 if (auto *TTy = dyn_cast<TargetExtType>(Ty))
46 return TTy;
47 if (auto *STy = dyn_cast<StructType>(Ty)) {
48 if (STy->getNumElements() != 1)
49 return nullptr;
50 Ty = STy->getElementType(0);
51 continue;
52 }
53 if (auto *ATy = dyn_cast<ArrayType>(Ty)) {
54 Ty = ATy->getElementType();
55 continue;
56 }
57 return nullptr;
58 }
59}
60
62 if (TargetExtType *Ty = getTargetExtType(GV))
63 return Ty->getName() == "amdgcn.named.barrier" ? Ty : nullptr;
64 return nullptr;
65}
66
68 // external zero size addrspace(3) without initializer is dynlds.
69 const Module *M = GV.getParent();
70 const DataLayout &DL = M->getDataLayout();
72 return false;
73 return DL.getTypeAllocSize(GV.getValueType()) == 0;
74}
75
78 return false;
79 }
80 if (isDynamicLDS(GV)) {
81 return true;
82 }
83 if (GV.isConstant()) {
84 // A constant undef variable can't be written to, and any load is
85 // undef, so it should be eliminated by the optimizer. It could be
86 // dropped by the back end if not. This pass skips over it.
87 return false;
88 }
89 if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
90 // Initializers are unimplemented for LDS address space.
91 // Leave such variables in place for consistent error reporting.
92 return false;
93 }
94 return true;
95}
96
98 // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
99 // global may have uses from multiple different functions as a result.
100 // This pass specialises LDS variables with respect to the kernel that
101 // allocates them.
102
103 // This is semantically equivalent to (the unimplemented as slow):
104 // for (auto &F : M.functions())
105 // for (auto &BB : F)
106 // for (auto &I : BB)
107 // for (Use &Op : I.operands())
108 // if (constantExprUsesLDS(Op))
109 // replaceConstantExprInFunction(I, Op);
110
111 SmallVector<Constant *> LDSGlobals;
112 for (auto &GV : M.globals())
114 LDSGlobals.push_back(&GV);
115 return convertUsersOfConstantsToInstructions(LDSGlobals);
116}
117
119 FunctionVariableMap &kernels,
120 FunctionVariableMap &Functions) {
121 // Get uses from the current function, excluding uses by called Functions
122 // Two output variables to avoid walking the globals list twice
123 for (auto &GV : M.globals()) {
125 continue;
126 for (User *V : GV.users()) {
127 if (auto *I = dyn_cast<Instruction>(V)) {
128 Function *F = I->getFunction();
129 if (isKernelLDS(F))
130 kernels[F].insert(&GV);
131 else
132 Functions[F].insert(&GV);
133 }
134 }
135 }
136}
137
138bool isKernelLDS(const Function *F) {
139 return AMDGPU::isKernel(F->getCallingConv());
140}
141
143
144 FunctionVariableMap DirectMapKernel;
145 FunctionVariableMap DirectMapFunction;
146 getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);
147
148 // Collect functions whose address has escaped
149 DenseSet<Function *> AddressTakenFuncs;
150 for (Function &F : M.functions()) {
151 if (!isKernelLDS(&F))
152 if (F.hasAddressTaken(nullptr,
153 /* IgnoreCallbackUses */ false,
154 /* IgnoreAssumeLikeCalls */ false,
155 /* IgnoreLLVMUsed */ true,
156 /* IgnoreArcAttachedCall */ false)) {
157 AddressTakenFuncs.insert(&F);
158 }
159 }
160
161 // Collect variables that are used by functions whose address has escaped
162 DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
163 for (Function *F : AddressTakenFuncs) {
164 set_union(VariablesReachableThroughFunctionPointer, DirectMapFunction[F]);
165 }
166
167 auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
168 assert(!F->isDeclaration());
169 for (const CallGraphNode::CallRecord &R : *CG[F]) {
170 if (!R.second->getFunction())
171 return true;
172 }
173 return false;
174 };
175
176 // Work out which variables are reachable through function calls
177 FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
178
179 // If the function makes any unknown call, assume the worst case that it can
180 // access all variables accessed by functions whose address escaped
181 for (Function &F : M.functions()) {
182 if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
183 if (!isKernelLDS(&F)) {
184 set_union(TransitiveMapFunction[&F],
185 VariablesReachableThroughFunctionPointer);
186 }
187 }
188 }
189
190 // Direct implementation of collecting all variables reachable from each
191 // function
192 for (Function &Func : M.functions()) {
193 if (Func.isDeclaration() || isKernelLDS(&Func))
194 continue;
195
196 DenseSet<Function *> seen; // catches cycles
197 SmallVector<Function *, 4> wip = {&Func};
198
199 while (!wip.empty()) {
200 Function *F = wip.pop_back_val();
201
202 // Can accelerate this by referring to transitive map for functions that
203 // have already been computed, with more care than this
204 set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
205
206 for (const CallGraphNode::CallRecord &R : *CG[F]) {
207 Function *Ith = R.second->getFunction();
208 if (Ith) {
209 if (!seen.contains(Ith)) {
210 seen.insert(Ith);
211 wip.push_back(Ith);
212 }
213 }
214 }
215 }
216 }
217
218 // Collect variables that are transitively used by functions whose address has
219 // escaped
220 for (Function *F : AddressTakenFuncs) {
221 set_union(VariablesReachableThroughFunctionPointer,
222 TransitiveMapFunction[F]);
223 }
224
225 // DirectMapKernel lists which variables are used by the kernel
226 // find the variables which are used through a function call
227 FunctionVariableMap IndirectMapKernel;
228
229 for (Function &Func : M.functions()) {
230 if (Func.isDeclaration() || !isKernelLDS(&Func))
231 continue;
232
233 for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
234 Function *Ith = R.second->getFunction();
235 if (Ith) {
236 set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
237 }
238 }
239
240 // Check if the kernel encounters unknows calls, wheher directly or
241 // indirectly.
242 bool SeesUnknownCalls = [&]() {
243 SmallVector<Function *> WorkList = {CG[&Func]->getFunction()};
245
246 while (!WorkList.empty()) {
247 Function *F = WorkList.pop_back_val();
248
249 for (const CallGraphNode::CallRecord &CallRecord : *CG[F]) {
250 if (!CallRecord.second)
251 continue;
252
253 Function *Callee = CallRecord.second->getFunction();
254 if (!Callee)
255 return true;
256
257 if (Visited.insert(Callee).second)
258 WorkList.push_back(Callee);
259 }
260 }
261 return false;
262 }();
263
264 if (SeesUnknownCalls) {
265 set_union(IndirectMapKernel[&Func],
266 VariablesReachableThroughFunctionPointer);
267 }
268 }
269
270 // Verify that we fall into one of 2 cases:
271 // - All variables are either absolute
272 // or direct mapped dynamic LDS that is not lowered.
273 // this is a re-run of the pass
274 // so we don't have anything to do.
275 // - No variables are absolute.
276 std::optional<bool> HasAbsoluteGVs;
277 bool HasSpecialGVs = false;
278 for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {
279 for (auto &[Fn, GVs] : Map) {
280 for (auto *GV : GVs) {
281 bool IsAbsolute = GV->isAbsoluteSymbolRef();
282 bool IsDirectMapDynLDSGV =
283 AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);
284 if (IsDirectMapDynLDSGV)
285 continue;
286 if (isNamedBarrier(*GV)) {
287 HasSpecialGVs = true;
288 continue;
289 }
290 if (HasAbsoluteGVs.has_value()) {
291 if (*HasAbsoluteGVs != IsAbsolute) {
293 "module cannot mix absolute and non-absolute LDS GVs");
294 }
295 } else
296 HasAbsoluteGVs = IsAbsolute;
297 }
298 }
299 }
300
301 // If we only had absolute GVs, we have nothing to do, return an empty
302 // result.
303 if (HasAbsoluteGVs && *HasAbsoluteGVs)
304 return {FunctionVariableMap(), FunctionVariableMap(), false};
305
306 return {std::move(DirectMapKernel), std::move(IndirectMapKernel),
307 HasSpecialGVs};
308}
309
311 ArrayRef<StringRef> FnAttrs) {
312 for (StringRef Attr : FnAttrs)
313 KernelRoot->removeFnAttr(Attr);
314
315 SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
317 bool SeenUnknownCall = false;
318
319 while (!WorkList.empty()) {
320 Function *F = WorkList.pop_back_val();
321
322 for (auto &CallRecord : *CG[F]) {
323 if (!CallRecord.second)
324 continue;
325
326 Function *Callee = CallRecord.second->getFunction();
327 if (!Callee) {
328 if (!SeenUnknownCall) {
329 SeenUnknownCall = true;
330
331 // If we see any indirect calls, assume nothing about potential
332 // targets.
333 // TODO: This could be refined to possible LDS global users.
334 for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
335 Function *PotentialCallee =
336 ExternalCallRecord.second->getFunction();
337 assert(PotentialCallee);
338 if (!isKernelLDS(PotentialCallee)) {
339 for (StringRef Attr : FnAttrs)
340 PotentialCallee->removeFnAttr(Attr);
341 }
342 }
343 }
344 } else {
345 for (StringRef Attr : FnAttrs)
346 Callee->removeFnAttr(Attr);
347 if (Visited.insert(Callee).second)
348 WorkList.push_back(Callee);
349 }
350 }
351 }
352}
353
355 Instruction *DefInst = Def->getMemoryInst();
356
357 if (isa<FenceInst>(DefInst))
358 return false;
359
360 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
361 switch (II->getIntrinsicID()) {
362 case Intrinsic::amdgcn_s_barrier:
363 case Intrinsic::amdgcn_s_barrier_signal:
364 case Intrinsic::amdgcn_s_barrier_signal_var:
365 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
366 case Intrinsic::amdgcn_s_barrier_init:
367 case Intrinsic::amdgcn_s_barrier_join:
368 case Intrinsic::amdgcn_s_barrier_wait:
369 case Intrinsic::amdgcn_s_barrier_leave:
370 case Intrinsic::amdgcn_s_get_barrier_state:
371 case Intrinsic::amdgcn_wave_barrier:
372 case Intrinsic::amdgcn_sched_barrier:
373 case Intrinsic::amdgcn_sched_group_barrier:
374 case Intrinsic::amdgcn_iglp_opt:
375 return false;
376 default:
377 break;
378 }
379 }
380
381 // Ignore atomics not aliasing with the original load, any atomic is a
382 // universal MemoryDef from MSSA's point of view too, just like a fence.
383 const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
384 return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
385 };
386
387 if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
388 checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
389 return false;
390
391 return true;
392}
393
395 AAResults *AA) {
396 MemorySSAWalker *Walker = MSSA->getWalker();
400
401 LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
402
403 // Start with a nearest dominating clobbering access, it will be either
404 // live on entry (nothing to do, load is not clobbered), MemoryDef, or
405 // MemoryPhi if several MemoryDefs can define this memory state. In that
406 // case add all Defs to WorkList and continue going up and checking all
407 // the definitions of this memory location until the root. When all the
408 // defs are exhausted and came to the entry state we have no clobber.
409 // Along the scan ignore barriers and fences which are considered clobbers
410 // by the MemorySSA, but not really writing anything into the memory.
411 while (!WorkList.empty()) {
412 MemoryAccess *MA = WorkList.pop_back_val();
413 if (!Visited.insert(MA).second)
414 continue;
415
416 if (MSSA->isLiveOnEntryDef(MA))
417 continue;
418
419 if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
420 LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
421
422 if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
423 LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
424 return true;
425 }
426
427 WorkList.push_back(
428 Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
429 continue;
430 }
431
432 const MemoryPhi *Phi = cast<MemoryPhi>(MA);
433 for (const auto &Use : Phi->incoming_values())
434 WorkList.push_back(cast<MemoryAccess>(&Use));
435 }
436
437 LLVM_DEBUG(dbgs() << " -> no clobber\n");
438 return false;
439}
440
441} // end namespace llvm::AMDGPU
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
This file defines generic set operations that may be used on set's of different types,...
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A private abstract base class describing the concept of an individual alias analysis implementation.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
std::pair< std::optional< WeakTrackingVH >, CallGraphNode * > CallRecord
A pair of the calling instruction (a call or invoke) and the call graph node being called.
Definition: CallGraph.h:174
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:72
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:127
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:168
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
const Function & getFunction() const
Definition: Function.h:164
void removeFnAttr(Attribute::AttrKind Kind)
Remove function attributes from this function.
Definition: Function.cpp:685
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:296
Type * getValueType() const
Definition: GlobalValue.h:298
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
An instruction for reading from memory.
Definition: Instructions.h:180
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:371
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Represents phi nodes for memory accesses.
Definition: MemorySSA.h:479
This is the generic walker interface for walkers of MemorySSA.
Definition: MemorySSA.h:1024
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1053
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:702
LLVM_ABI MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:740
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Class to represent target extensions types, which are generally unintrospectable from target-independ...
Definition: DerivedTypes.h:781
StringRef getName() const
Return the name for this target extension type.
Definition: DerivedTypes.h:814
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:169
@ LOCAL_ADDRESS
Address space for local memory.
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
void getUsesOfLDSByFunction(const CallGraph &CG, Module &M, FunctionVariableMap &kernels, FunctionVariableMap &Functions)
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA)
Given a Def clobbering a load from Ptr according to the MSSA check if this is actually a memory updat...
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
static TargetExtType * getTargetExtType(const GlobalVariable &GV)
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
bool isKernelLDS(const Function *F)
bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA)
Check is a Load is clobbered in its function.
DenseMap< Function *, DenseSet< GlobalVariable * > > FunctionVariableMap
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
Definition: SetOperations.h:43
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39