LLVM 22.0.0git
AMDGPUHSAMetadataStreamer.cpp
Go to the documentation of this file.
1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
20#include "SIProgramInfo.h"
21#include "llvm/IR/Module.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCExpr.h"
25
26using namespace llvm;
27
28static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
29 const DataLayout &DL) {
30 Type *Ty = Arg.getType();
31 MaybeAlign ArgAlign;
32 if (Arg.hasByRefAttr()) {
33 Ty = Arg.getParamByRefType();
34 ArgAlign = Arg.getParamAlign();
35 }
36
37 if (!ArgAlign)
38 ArgAlign = DL.getABITypeAlign(Ty);
39
40 return std::pair(Ty, *ArgAlign);
41}
42
43/// Find the mangled symbol name for the runtime handle for \p EnqueuedBlock
45 const Function &EnqueuedBlock) {
46 const MDNode *Associated =
47 EnqueuedBlock.getMetadata(LLVMContext::MD_associated);
48 if (!Associated)
49 return "";
50
51 auto *VM = cast<ValueAsMetadata>(Associated->getOperand(0));
52 auto *RuntimeHandle =
53 dyn_cast<GlobalVariable>(VM->getValue()->stripPointerCasts());
54 if (!RuntimeHandle ||
55 RuntimeHandle->getSection() != ".amdgpu.kernel.runtime.handle")
56 return "";
57
59 TM.getNameWithPrefix(Name, RuntimeHandle,
60 TM.getObjFileLowering()->getMangler());
61 return Name.str().str();
62}
63
64namespace llvm {
65
67 "amdgpu-dump-hsa-metadata",
68 cl::desc("Dump AMDGPU HSA Metadata"));
70 "amdgpu-verify-hsa-metadata",
71 cl::desc("Verify AMDGPU HSA Metadata"));
72
73namespace AMDGPU::HSAMD {
74
75//===----------------------------------------------------------------------===//
76// HSAMetadataStreamerV4
77//===----------------------------------------------------------------------===//
78
79void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
80 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
81}
82
83void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
84 errs() << "AMDGPU HSA Metadata Parser Test: ";
85
86 msgpack::Document FromHSAMetadataString;
87
88 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
89 errs() << "FAIL\n";
90 return;
91 }
92
93 std::string ToHSAMetadataString;
94 raw_string_ostream StrOS(ToHSAMetadataString);
95 FromHSAMetadataString.toYAML(StrOS);
96
97 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
98 if (HSAMetadataString != ToHSAMetadataString) {
99 errs() << "Original input: " << HSAMetadataString << '\n'
100 << "Produced output: " << StrOS.str() << '\n';
101 }
102}
103
104std::optional<StringRef>
107 .Case("read_only", StringRef("read_only"))
108 .Case("write_only", StringRef("write_only"))
109 .Case("read_write", StringRef("read_write"))
110 .Default(std::nullopt);
111}
112
114 unsigned AddressSpace) const {
115 switch (AddressSpace) {
117 return StringRef("private");
119 return StringRef("global");
121 return StringRef("constant");
123 return StringRef("local");
125 return StringRef("generic");
127 return StringRef("region");
128 default:
129 return std::nullopt;
130 }
131}
132
135 StringRef BaseTypeName) const {
136 if (TypeQual.contains("pipe"))
137 return "pipe";
138
139 return StringSwitch<StringRef>(BaseTypeName)
140 .Case("image1d_t", "image")
141 .Case("image1d_array_t", "image")
142 .Case("image1d_buffer_t", "image")
143 .Case("image2d_t", "image")
144 .Case("image2d_array_t", "image")
145 .Case("image2d_array_depth_t", "image")
146 .Case("image2d_array_msaa_t", "image")
147 .Case("image2d_array_msaa_depth_t", "image")
148 .Case("image2d_depth_t", "image")
149 .Case("image2d_msaa_t", "image")
150 .Case("image2d_msaa_depth_t", "image")
151 .Case("image3d_t", "image")
152 .Case("sampler_t", "sampler")
153 .Case("queue_t", "queue")
154 .Default(isa<PointerType>(Ty)
156 ? "dynamic_shared_pointer"
157 : "global_buffer")
158 : "by_value");
159}
160
162 bool Signed) const {
163 switch (Ty->getTypeID()) {
164 case Type::IntegerTyID: {
165 if (!Signed)
166 return (Twine('u') + getTypeName(Ty, true)).str();
167
168 auto BitWidth = Ty->getIntegerBitWidth();
169 switch (BitWidth) {
170 case 8:
171 return "char";
172 case 16:
173 return "short";
174 case 32:
175 return "int";
176 case 64:
177 return "long";
178 default:
179 return (Twine('i') + Twine(BitWidth)).str();
180 }
181 }
182 case Type::HalfTyID:
183 return "half";
184 case Type::FloatTyID:
185 return "float";
186 case Type::DoubleTyID:
187 return "double";
189 auto *VecTy = cast<FixedVectorType>(Ty);
190 auto *ElTy = VecTy->getElementType();
191 auto NumElements = VecTy->getNumElements();
192 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
193 }
194 default:
195 return "unknown";
196 }
197}
198
201 auto Dims = HSAMetadataDoc->getArrayNode();
202 if (Node->getNumOperands() != 3)
203 return Dims;
204
205 for (auto &Op : Node->operands())
206 Dims.push_back(Dims.getDocument()->getNode(
207 mdconst::extract<ConstantInt>(Op)->getZExtValue()));
208 return Dims;
209}
210
212 auto Version = HSAMetadataDoc->getArrayNode();
213 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
214 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
215 getRootMetadata("amdhsa.version") = Version;
216}
217
219 const IsaInfo::AMDGPUTargetID &TargetID) {
220 getRootMetadata("amdhsa.target") =
221 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
222}
223
225 auto *Node = Mod.getNamedMetadata("llvm.printf.fmts");
226 if (!Node)
227 return;
228
229 auto Printf = HSAMetadataDoc->getArrayNode();
230 for (auto *Op : Node->operands())
231 if (Op->getNumOperands())
232 Printf.push_back(Printf.getDocument()->getNode(
233 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
234 getRootMetadata("amdhsa.printf") = Printf;
235}
236
238 msgpack::MapDocNode Kern) {
239 // TODO: What about other languages?
240 auto *Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
241 if (!Node || !Node->getNumOperands())
242 return;
243 auto *Op0 = Node->getOperand(0);
244 if (Op0->getNumOperands() <= 1)
245 return;
246
247 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
248 auto LanguageVersion = Kern.getDocument()->getArrayNode();
249 LanguageVersion.push_back(Kern.getDocument()->getNode(
250 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
251 LanguageVersion.push_back(Kern.getDocument()->getNode(
252 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
253 Kern[".language_version"] = LanguageVersion;
254}
255
257 const Function &Func,
258 msgpack::MapDocNode Kern) {
259
260 if (auto *Node = Func.getMetadata("reqd_work_group_size"))
261 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
262 if (auto *Node = Func.getMetadata("work_group_size_hint"))
263 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
264 if (auto *Node = Func.getMetadata("vec_type_hint")) {
265 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
267 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
268 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
269 /*Copy=*/true);
270 }
271
272 std::string HandleName = getEnqueuedBlockSymbolName(TM, Func);
273 if (!HandleName.empty()) {
274 Kern[".device_enqueue_symbol"] =
275 Kern.getDocument()->getNode(std::move(HandleName), /*Copy=*/true);
276 }
277
278 if (Func.hasFnAttribute("device-init"))
279 Kern[".kind"] = Kern.getDocument()->getNode("init");
280 else if (Func.hasFnAttribute("device-fini"))
281 Kern[".kind"] = Kern.getDocument()->getNode("fini");
282}
283
285 msgpack::MapDocNode Kern) {
286 auto &Func = MF.getFunction();
287 unsigned Offset = 0;
288 auto Args = HSAMetadataDoc->getArrayNode();
289 for (auto &Arg : Func.args()) {
290 if (Arg.hasAttribute("amdgpu-hidden-argument"))
291 continue;
292
293 emitKernelArg(Arg, Offset, Args);
294 }
295
296 emitHiddenKernelArgs(MF, Offset, Args);
297
298 Kern[".args"] = Args;
299}
300
302 unsigned &Offset,
304 const auto *Func = Arg.getParent();
305 auto ArgNo = Arg.getArgNo();
306 const MDNode *Node;
307
309 Node = Func->getMetadata("kernel_arg_name");
310 if (Node && ArgNo < Node->getNumOperands())
311 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
312 else if (Arg.hasName())
313 Name = Arg.getName();
314
315 StringRef TypeName;
316 Node = Func->getMetadata("kernel_arg_type");
317 if (Node && ArgNo < Node->getNumOperands())
318 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
319
320 StringRef BaseTypeName;
321 Node = Func->getMetadata("kernel_arg_base_type");
322 if (Node && ArgNo < Node->getNumOperands())
323 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
324
325 StringRef ActAccQual;
326 // Do we really need NoAlias check here?
327 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
328 if (Arg.onlyReadsMemory())
329 ActAccQual = "read_only";
330 else if (Arg.hasAttribute(Attribute::WriteOnly))
331 ActAccQual = "write_only";
332 }
333
334 StringRef AccQual;
335 Node = Func->getMetadata("kernel_arg_access_qual");
336 if (Node && ArgNo < Node->getNumOperands())
337 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
338
339 StringRef TypeQual;
340 Node = Func->getMetadata("kernel_arg_type_qual");
341 if (Node && ArgNo < Node->getNumOperands())
342 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
343
344 const DataLayout &DL = Func->getDataLayout();
345
346 MaybeAlign PointeeAlign;
347 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
348
349 // FIXME: Need to distinguish in memory alignment from pointer alignment.
350 if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
351 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
352 PointeeAlign = Arg.getParamAlign().valueOrOne();
353 }
354
355 // There's no distinction between byval aggregates and raw aggregates.
356 Type *ArgTy;
357 Align ArgAlign;
358 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
359
360 emitKernelArg(DL, ArgTy, ArgAlign,
361 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
362 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
363 AccQual, TypeQual);
364}
365
367 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
368 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
369 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
370 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
371 auto Arg = Args.getDocument()->getMapNode();
372
373 if (!Name.empty())
374 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
375 if (!TypeName.empty())
376 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
377 auto Size = DL.getTypeAllocSize(Ty);
378 Arg[".size"] = Arg.getDocument()->getNode(Size);
379 Offset = alignTo(Offset, Alignment);
380 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
381 Offset += Size;
382 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
383 if (PointeeAlign)
384 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
385
386 if (auto *PtrTy = dyn_cast<PointerType>(Ty))
387 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
388 // Limiting address space to emit only for a certain ValueKind.
389 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
390 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
391 /*Copy=*/true);
392
393 if (auto AQ = getAccessQualifier(AccQual))
394 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
395
396 if (auto AAQ = getAccessQualifier(ActAccQual))
397 Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);
398
399 SmallVector<StringRef, 1> SplitTypeQuals;
400 TypeQual.split(SplitTypeQuals, " ", -1, false);
401 for (StringRef Key : SplitTypeQuals) {
402 if (Key == "const")
403 Arg[".is_const"] = Arg.getDocument()->getNode(true);
404 else if (Key == "restrict")
405 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
406 else if (Key == "volatile")
407 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
408 else if (Key == "pipe")
409 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
410 }
411
412 Args.push_back(Arg);
413}
414
416 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
417 auto &Func = MF.getFunction();
418 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
419
420 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
421 if (!HiddenArgNumBytes)
422 return;
423
424 const Module *M = Func.getParent();
425 auto &DL = M->getDataLayout();
426 auto *Int64Ty = Type::getInt64Ty(Func.getContext());
427
428 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
429
430 if (HiddenArgNumBytes >= 8)
431 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
432 Args);
433 if (HiddenArgNumBytes >= 16)
434 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
435 Args);
436 if (HiddenArgNumBytes >= 24)
437 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
438 Args);
439
440 auto *Int8PtrTy =
441 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
442
443 if (HiddenArgNumBytes >= 32) {
444 // We forbid the use of features requiring hostcall when compiling OpenCL
445 // before code object V5, which makes the mutual exclusion between the
446 // "printf buffer" and "hostcall buffer" here sound.
447 if (M->getNamedMetadata("llvm.printf.fmts"))
448 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
449 Args);
450 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
451 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
452 Args);
453 else
454 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
455 }
456
457 // Emit "default queue" and "completion action" arguments if enqueue kernel is
458 // used, otherwise emit dummy "none" arguments.
459 if (HiddenArgNumBytes >= 40) {
460 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
461 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
462 Args);
463 } else {
464 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
465 }
466 }
467
468 if (HiddenArgNumBytes >= 48) {
469 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
470 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
471 Args);
472 } else {
473 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
474 }
475 }
476
477 // Emit the pointer argument for multi-grid object.
478 if (HiddenArgNumBytes >= 56) {
479 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
480 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
481 Args);
482 } else {
483 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
484 }
485 }
486}
487
490 const SIProgramInfo &ProgramInfo,
491 unsigned CodeObjectVersion) const {
492 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
494 const Function &F = MF.getFunction();
495
496 auto Kern = HSAMetadataDoc->getMapNode();
497
498 Align MaxKernArgAlign;
499 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
500 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
501 Kern[".group_segment_fixed_size"] =
502 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
503 DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"],
504 msgpack::Type::UInt, ProgramInfo.ScratchSize);
505 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
506 DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"],
508 ProgramInfo.DynamicCallStack);
509 }
510
511 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
512 Kern[".workgroup_processor_mode"] =
513 Kern.getDocument()->getNode(ProgramInfo.WgpMode);
514
515 // FIXME: The metadata treats the minimum as 16?
516 Kern[".kernarg_segment_align"] =
517 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
518 Kern[".wavefront_size"] =
519 Kern.getDocument()->getNode(STM.getWavefrontSize());
520 DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt,
521 ProgramInfo.NumSGPR);
522 DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt,
523 ProgramInfo.NumVGPR);
524
525 // Only add AGPR count to metadata for supported devices
526 if (STM.hasMAIInsts()) {
527 DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt,
528 ProgramInfo.NumAccVGPR);
529 }
530
531 Kern[".max_flat_workgroup_size"] =
532 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
533
534 uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();
535 uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();
536 uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();
537
538 // TODO: Should consider 0 invalid and reject in IR verifier.
539 if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0)
540 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
541
542 if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0)
543 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
544
545 if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0)
546 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
547
548 Kern[".sgpr_spill_count"] =
549 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
550 Kern[".vgpr_spill_count"] =
551 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
552
553 return Kern;
554}
555
557 DelayedExprs->resolveDelayedExpressions();
558 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
559}
560
562 const IsaInfo::AMDGPUTargetID &TargetID) {
563 emitVersion();
564 emitTargetID(TargetID);
566 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
567 DelayedExprs->clear();
568}
569
571 DelayedExprs->resolveDelayedExpressions();
572 std::string HSAMetadataString;
573 raw_string_ostream StrOS(HSAMetadataString);
574 HSAMetadataDoc->toYAML(StrOS);
575
576 if (DumpHSAMetadata)
577 dump(StrOS.str());
579 verify(StrOS.str());
580}
581
583 const SIProgramInfo &ProgramInfo) {
584 auto &Func = MF.getFunction();
585 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
586 Func.getCallingConv() != CallingConv::SPIR_KERNEL)
587 return;
588
589 auto CodeObjectVersion =
590 AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());
591 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
592
593 auto Kernels =
594 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
595
596 auto &TM = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
597 {
598 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
599 Kern[".symbol"] = Kern.getDocument()->getNode(
600 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
601 emitKernelLanguage(Func, Kern);
602 emitKernelAttrs(TM, Func, Kern);
603 emitKernelArgs(MF, Kern);
604 }
605
606 Kernels.push_back(Kern);
607}
608
609//===----------------------------------------------------------------------===//
610// HSAMetadataStreamerV5
611//===----------------------------------------------------------------------===//
612
614 auto Version = HSAMetadataDoc->getArrayNode();
615 Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
616 Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
617 getRootMetadata("amdhsa.version") = Version;
618}
619
621 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
622 auto &Func = MF.getFunction();
623 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
624
625 // No implicit kernel argument is used.
626 if (ST.getImplicitArgNumBytes(Func) == 0)
627 return;
628
629 const Module *M = Func.getParent();
630 auto &DL = M->getDataLayout();
632
633 auto *Int64Ty = Type::getInt64Ty(Func.getContext());
634 auto *Int32Ty = Type::getInt32Ty(Func.getContext());
635 auto *Int16Ty = Type::getInt16Ty(Func.getContext());
636
637 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
638 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
639 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
640 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
641
642 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
643 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
644 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
645
646 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
647 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
648 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
649
650 // Reserved for hidden_tool_correlation_id.
651 Offset += 8;
652
653 Offset += 8; // Reserved.
654
655 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
656 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
657 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
658
659 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
660
661 Offset += 6; // Reserved.
662 auto *Int8PtrTy =
663 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
664
665 if (M->getNamedMetadata("llvm.printf.fmts")) {
666 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
667 Args);
668 } else {
669 Offset += 8; // Skipped.
670 }
671
672 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
673 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
674 Args);
675 } else {
676 Offset += 8; // Skipped.
677 }
678
679 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
680 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
681 Args);
682 } else {
683 Offset += 8; // Skipped.
684 }
685
686 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
687 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
688 else
689 Offset += 8; // Skipped.
690
691 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
692 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
693 Args);
694 } else {
695 Offset += 8; // Skipped.
696 }
697
698 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
699 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
700 Args);
701 } else {
702 Offset += 8; // Skipped.
703 }
704
705 // Emit argument for hidden dynamic lds size
706 if (MFI.isDynamicLDSUsed()) {
707 emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
708 Args);
709 } else {
710 Offset += 4; // skipped
711 }
712
713 Offset += 68; // Reserved.
714
715 // hidden_private_base and hidden_shared_base are only when the subtarget has
716 // ApertureRegs.
717 if (!ST.hasApertureRegs()) {
718 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
719 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
720 } else {
721 Offset += 8; // Skipped.
722 }
723
724 if (MFI.getUserSGPRInfo().hasQueuePtr())
725 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
726}
727
729 const Function &Func,
730 msgpack::MapDocNode Kern) {
732
733 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
734 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
735}
736
737//===----------------------------------------------------------------------===//
738// HSAMetadataStreamerV6
739//===----------------------------------------------------------------------===//
740
742 auto Version = HSAMetadataDoc->getArrayNode();
743 Version.push_back(Version.getDocument()->getNode(VersionMajorV6));
744 Version.push_back(Version.getDocument()->getNode(VersionMinorV6));
745 getRootMetadata("amdhsa.version") = Version;
746}
747
748} // end namespace AMDGPU::HSAMD
749} // end namespace llvm
static std::string getEnqueuedBlockSymbolName(const AMDGPUTargetMachine &TM, const Function &EnqueuedBlock)
Find the mangled symbol name for the runtime handle for EnqueuedBlock.
static std::pair< Type *, Align > getArgumentTypeAlign(const Argument &Arg, const DataLayout &DL)
AMDGPU HSA Metadata Streamer.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Given that RA is a live value
std::string Name
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
ppc ctr loops verify
Defines struct to track resource usage and hardware flags for kernels and entry functions.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
std::optional< StringRef > getAddressSpaceQualifier(unsigned AddressSpace) const
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const
std::optional< StringRef > getAccessQualifier(StringRef AccQual) const
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern)
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF, const SIProgramInfo &ProgramInfo, unsigned CodeObjectVersion) const
std::unique_ptr< msgpack::Document > HSAMetadataDoc
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func, msgpack::MapDocNode Kern) override
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern)
std::string getTypeName(Type *Ty, bool Signed) const
void dump(StringRef HSAMetadataString) const
void begin(const Module &Mod, const IsaInfo::AMDGPUTargetID &TargetID) override
void emitKernelArg(const Argument &Arg, unsigned &Offset, msgpack::ArrayDocNode Args)
void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID)
void verify(StringRef HSAMetadataString) const
StringRef getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) override
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func, msgpack::MapDocNode Kern) override
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
LLVM_ABI Type * getParamByRefType() const
If this is a byref argument, return its type.
Definition: Function.cpp:235
LLVM_ABI bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:273
LLVM_ABI bool hasByRefAttr() const
Return true if this argument has the byref attribute.
Definition: Function.cpp:139
LLVM_ABI bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
Definition: Function.cpp:309
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Check if an argument has a given attribute.
Definition: Function.cpp:339
const Function * getParent() const
Definition: Argument.h:44
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:50
LLVM_ABI MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
Definition: Function.cpp:216
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool hasMAIInsts() const
Definition: GCNSubtarget.h:878
bool supportsWGP() const
Definition: GCNSubtarget.h:394
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:576
Metadata node.
Definition: Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1445
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:434
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:70
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:75
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasName() const
Definition: Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
A DocNode that is an array.
ArrayDocNode & getArray(bool Convert=false)
Get an ArrayDocNode for an array node.
Document * getDocument() const
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode getNode()
Create a nil node associated with this Document.
ArrayDocNode getArrayNode()
Create an empty Array node associated with this Document.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
A DocNode that is a map.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:680
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr uint32_t VersionMajorV5
HSA metadata major version for code object V5.
constexpr uint32_t VersionMinorV4
HSA metadata minor version for code object V4.
ValueKind
Value kinds.
constexpr uint32_t VersionMinorV5
HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV6
HSA metadata minor version for code object V6.
constexpr uint32_t VersionMajorV6
HSA metadata major version for code object V6.
constexpr uint32_t VersionMajorV4
HSA metadata major version for code object V4.
unsigned getAMDHSACodeObjectVersion(const Module &M)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))
static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))
LLVM_GET_TYPE_NAME_CONSTEXPR StringRef getTypeName()
We provide a function which tries to compute the (demangled) name of a type statically.
Definition: TypeName.h:40
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:32
const MCExpr * NumSGPR
Definition: SIProgramInfo.h:74
const MCExpr * NumAccVGPR
Definition: SIProgramInfo.h:71
const MCExpr * DynamicCallStack
Definition: SIProgramInfo.h:94
const MCExpr * NumVGPR
Definition: SIProgramInfo.h:69
const MCExpr * ScratchSize
Definition: SIProgramInfo.h:48