38 ArgAlign =
DL.getABITypeAlign(Ty);
40 return std::pair(Ty, *ArgAlign);
47 EnqueuedBlock.
getMetadata(LLVMContext::MD_associated);
51 auto *VM = cast<ValueAsMetadata>(Associated->
getOperand(0));
53 dyn_cast<GlobalVariable>(VM->getValue()->stripPointerCasts());
55 RuntimeHandle->getSection() !=
".amdgpu.kernel.runtime.handle")
59 TM.getNameWithPrefix(
Name, RuntimeHandle,
60 TM.getObjFileLowering()->getMangler());
61 return Name.str().str();
67 "amdgpu-dump-hsa-metadata",
68 cl::desc(
"Dump AMDGPU HSA Metadata"));
70 "amdgpu-verify-hsa-metadata",
71 cl::desc(
"Verify AMDGPU HSA Metadata"));
73namespace AMDGPU::HSAMD {
80 errs() <<
"AMDGPU HSA Metadata:\n" << HSAMetadataString <<
'\n';
84 errs() <<
"AMDGPU HSA Metadata Parser Test: ";
88 if (!FromHSAMetadataString.
fromYAML(HSAMetadataString)) {
93 std::string ToHSAMetadataString;
95 FromHSAMetadataString.
toYAML(StrOS);
97 errs() << (HSAMetadataString == StrOS.
str() ?
"PASS" :
"FAIL") <<
'\n';
98 if (HSAMetadataString != ToHSAMetadataString) {
99 errs() <<
"Original input: " << HSAMetadataString <<
'\n'
100 <<
"Produced output: " << StrOS.
str() <<
'\n';
104std::optional<StringRef>
107 .Case(
"read_only",
StringRef(
"read_only"))
108 .Case(
"write_only",
StringRef(
"write_only"))
109 .Case(
"read_write",
StringRef(
"read_write"))
140 .
Case(
"image1d_t",
"image")
141 .
Case(
"image1d_array_t",
"image")
142 .
Case(
"image1d_buffer_t",
"image")
143 .
Case(
"image2d_t",
"image")
144 .
Case(
"image2d_array_t",
"image")
145 .
Case(
"image2d_array_depth_t",
"image")
146 .
Case(
"image2d_array_msaa_t",
"image")
147 .
Case(
"image2d_array_msaa_depth_t",
"image")
148 .
Case(
"image2d_depth_t",
"image")
149 .
Case(
"image2d_msaa_t",
"image")
150 .
Case(
"image2d_msaa_depth_t",
"image")
151 .
Case(
"image3d_t",
"image")
152 .
Case(
"sampler_t",
"sampler")
153 .
Case(
"queue_t",
"queue")
156 ?
"dynamic_shared_pointer"
189 auto *VecTy = cast<FixedVectorType>(Ty);
190 auto *ElTy = VecTy->getElementType();
191 auto NumElements = VecTy->getNumElements();
202 if (Node->getNumOperands() != 3)
205 for (
auto &
Op : Node->operands())
206 Dims.push_back(Dims.getDocument()->getNode(
207 mdconst::extract<ConstantInt>(
Op)->getZExtValue()));
225 auto *Node =
Mod.getNamedMetadata(
"llvm.printf.fmts");
230 for (
auto *
Op : Node->operands())
232 Printf.push_back(Printf.getDocument()->getNode(
233 cast<MDString>(
Op->getOperand(0))->getString(),
true));
240 auto *Node = Func.getParent()->getNamedMetadata(
"opencl.ocl.version");
241 if (!Node || !Node->getNumOperands())
243 auto *Op0 = Node->getOperand(0);
244 if (Op0->getNumOperands() <= 1)
250 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
252 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
253 Kern[
".language_version"] = LanguageVersion;
260 if (
auto *Node = Func.getMetadata(
"reqd_work_group_size"))
262 if (
auto *Node = Func.getMetadata(
"work_group_size_hint"))
264 if (
auto *Node = Func.getMetadata(
"vec_type_hint")) {
267 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
268 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
273 if (!HandleName.empty()) {
274 Kern[
".device_enqueue_symbol"] =
278 if (Func.hasFnAttribute(
"device-init"))
280 else if (Func.hasFnAttribute(
"device-fini"))
289 for (
auto &Arg : Func.args()) {
290 if (Arg.hasAttribute(
"amdgpu-hidden-argument"))
298 Kern[
".args"] = Args;
309 Node = Func->getMetadata(
"kernel_arg_name");
310 if (Node && ArgNo < Node->getNumOperands())
311 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
316 Node = Func->getMetadata(
"kernel_arg_type");
317 if (Node && ArgNo < Node->getNumOperands())
318 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
321 Node = Func->getMetadata(
"kernel_arg_base_type");
322 if (Node && ArgNo < Node->getNumOperands())
323 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
329 ActAccQual =
"read_only";
331 ActAccQual =
"write_only";
335 Node = Func->getMetadata(
"kernel_arg_access_qual");
336 if (Node && ArgNo < Node->getNumOperands())
337 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
340 Node = Func->getMetadata(
"kernel_arg_type_qual");
341 if (Node && ArgNo < Node->getNumOperands())
342 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
350 if (
auto *PtrTy = dyn_cast<PointerType>(Ty)) {
362 PointeeAlign,
Name, TypeName, BaseTypeName, ActAccQual,
371 auto Arg = Args.getDocument()->getMapNode();
374 Arg[
".name"] = Arg.getDocument()->getNode(
Name,
true);
375 if (!TypeName.empty())
376 Arg[
".type_name"] = Arg.getDocument()->getNode(TypeName,
true);
377 auto Size =
DL.getTypeAllocSize(Ty);
378 Arg[
".size"] = Arg.getDocument()->getNode(
Size);
380 Arg[
".offset"] = Arg.getDocument()->getNode(
Offset);
382 Arg[
".value_kind"] = Arg.getDocument()->getNode(
ValueKind,
true);
384 Arg[
".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
386 if (
auto *PtrTy = dyn_cast<PointerType>(Ty))
390 Arg[
".address_space"] = Arg.getDocument()->getNode(*Qualifier,
394 Arg[
".access"] = Arg.getDocument()->getNode(*AQ,
true);
397 Arg[
".actual_access"] = Arg.getDocument()->getNode(*AAQ,
true);
400 TypeQual.
split(SplitTypeQuals,
" ", -1,
false);
403 Arg[
".is_const"] = Arg.getDocument()->getNode(
true);
404 else if (Key ==
"restrict")
405 Arg[
".is_restrict"] = Arg.getDocument()->getNode(
true);
406 else if (Key ==
"volatile")
407 Arg[
".is_volatile"] = Arg.getDocument()->getNode(
true);
408 else if (Key ==
"pipe")
409 Arg[
".is_pipe"] = Arg.getDocument()->getNode(
true);
420 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
421 if (!HiddenArgNumBytes)
424 const Module *M = Func.getParent();
425 auto &
DL = M->getDataLayout();
430 if (HiddenArgNumBytes >= 8)
433 if (HiddenArgNumBytes >= 16)
436 if (HiddenArgNumBytes >= 24)
443 if (HiddenArgNumBytes >= 32) {
447 if (M->getNamedMetadata(
"llvm.printf.fmts"))
450 else if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr"))
459 if (HiddenArgNumBytes >= 40) {
460 if (!Func.hasFnAttribute(
"amdgpu-no-default-queue")) {
468 if (HiddenArgNumBytes >= 48) {
469 if (!Func.hasFnAttribute(
"amdgpu-no-completion-action")) {
478 if (HiddenArgNumBytes >= 56) {
479 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
491 unsigned CodeObjectVersion)
const {
498 Align MaxKernArgAlign;
499 Kern[
".kernarg_segment_size"] = Kern.getDocument()->getNode(
501 Kern[
".group_segment_fixed_size"] =
502 Kern.getDocument()->getNode(ProgramInfo.
LDSSize);
503 DelayedExprs->assignDocNode(Kern[
".private_segment_fixed_size"],
506 DelayedExprs->assignDocNode(Kern[
".uses_dynamic_stack"],
512 Kern[
".workgroup_processor_mode"] =
513 Kern.getDocument()->getNode(ProgramInfo.
WgpMode);
516 Kern[
".kernarg_segment_align"] =
517 Kern.getDocument()->getNode(std::max(
Align(4), MaxKernArgAlign).
value());
518 Kern[
".wavefront_size"] =
531 Kern[
".max_flat_workgroup_size"] =
532 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
534 uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();
535 uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();
536 uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();
539 if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0)
540 Kern[
".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
542 if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0)
543 Kern[
".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
545 if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0)
546 Kern[
".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
548 Kern[
".sgpr_spill_count"] =
549 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
550 Kern[
".vgpr_spill_count"] =
551 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
572 std::string HSAMetadataString;
589 auto CodeObjectVersion =
598 Kern[
".name"] = Kern.getDocument()->getNode(Func.getName());
599 Kern[
".symbol"] = Kern.getDocument()->getNode(
600 (
Twine(Func.getName()) +
Twine(
".kd")).str(),
true);
606 Kernels.push_back(Kern);
626 if (ST.getImplicitArgNumBytes(Func) == 0)
629 const Module *M = Func.getParent();
630 auto &
DL = M->getDataLayout();
665 if (M->getNamedMetadata(
"llvm.printf.fmts")) {
672 if (!Func.hasFnAttribute(
"amdgpu-no-hostcall-ptr")) {
679 if (!Func.hasFnAttribute(
"amdgpu-no-multigrid-sync-arg")) {
686 if (!Func.hasFnAttribute(
"amdgpu-no-heap-ptr"))
691 if (!Func.hasFnAttribute(
"amdgpu-no-default-queue")) {
698 if (!Func.hasFnAttribute(
"amdgpu-no-completion-action")) {
717 if (!ST.hasApertureRegs()) {
733 if (Func.getFnAttribute(
"uniform-work-group-size").getValueAsBool())
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Given that RA is a live value
AMD GCN specific subclass of TargetSubtarget.
Module.h This file contains the declarations for the Module class.
Defines struct to track resource usage and hardware flags for kernels and entry functions.
bool isDynamicLDSUsed() const
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
std::string toString() const
This class represents an incoming formal argument to a Function.
LLVM_ABI Type * getParamByRefType() const
If this is a byref argument, return its type.
LLVM_ABI bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
LLVM_ABI bool hasByRefAttr() const
Return true if this argument has the byref attribute.
LLVM_ABI bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Check if an argument has a given attribute.
const Function * getParent() const
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
LLVM_ABI MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
const MDOperand & getOperand(unsigned I) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
@ HalfTyID
16-bit floating point type
@ FloatTyID
32-bit floating point type
@ IntegerTyID
Arbitrary bit width integers.
@ FixedVectorTyID
Fixed width SIMD vector type.
@ DoubleTyID
64-bit floating point type
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
TypeID getTypeID() const
Return the type id for the type.
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
A DocNode that is an array.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Document * getDocument() const
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode getNode()
Create a nil node associated with this Document.
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV6
HSA metadata minor version for code object V6.
constexpr uint32_t VersionMajorV6
HSA metadata major version for code object V6.
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
unsigned getAMDHSACodeObjectVersion(const Module &M)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
LLVM_GET_TYPE_NAME_CONSTEXPR StringRef getTypeName()
We provide a function which tries to compute the (demangled) name of a type statically.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Track resource usage for kernels / entry functions.
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * ScratchSize