LLVM 22.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
20
21#define DEBUG_TYPE "amdgpu-attributor"
22
23using namespace llvm;
24
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
29 cl::init(3));
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
34#include "AMDGPUAttributes.def"
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
42#include "AMDGPUAttributes.def"
44};
45
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49#include "AMDGPUAttributes.def"
50};
51
52// We do not need to note the x workitem or workgroup id because they are always
53// initialized.
54//
55// TODO: We should not add the attributes if the known compile time workgroup
56// size is 1 for y/z.
58intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
59 bool HasApertureRegs, bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
61 switch (ID) {
62 case Intrinsic::amdgcn_workitem_id_x:
63 NonKernelOnly = true;
64 return WORKITEM_ID_X;
65 case Intrinsic::amdgcn_workgroup_id_x:
66 NonKernelOnly = true;
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
70 return WORKITEM_ID_Y;
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
73 return WORKITEM_ID_Z;
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_cluster_id_x:
81 NonKernelOnly = true;
82 return CLUSTER_ID_X;
83 case Intrinsic::amdgcn_cluster_id_y:
84 return CLUSTER_ID_Y;
85 case Intrinsic::amdgcn_cluster_id_z:
86 return CLUSTER_ID_Z;
87 case Intrinsic::amdgcn_lds_kernel_id:
88 return LDS_KERNEL_ID;
89 case Intrinsic::amdgcn_dispatch_ptr:
90 return DISPATCH_PTR;
91 case Intrinsic::amdgcn_dispatch_id:
92 return DISPATCH_ID;
93 case Intrinsic::amdgcn_implicitarg_ptr:
94 return IMPLICIT_ARG_PTR;
95 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
96 // queue_ptr.
97 case Intrinsic::amdgcn_queue_ptr:
98 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
99 return QUEUE_PTR;
100 case Intrinsic::amdgcn_is_shared:
101 case Intrinsic::amdgcn_is_private:
102 if (HasApertureRegs)
103 return NOT_IMPLICIT_INPUT;
104 // Under V5, we need implicitarg_ptr + offsets to access private_base or
105 // shared_base. For pre-V5, however, need to access them through queue_ptr +
106 // offsets.
107 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
108 : QUEUE_PTR;
109 case Intrinsic::trap:
110 case Intrinsic::debugtrap:
111 case Intrinsic::ubsantrap:
112 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
113 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
114 : QUEUE_PTR;
115 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
116 return QUEUE_PTR;
117 default:
118 return NOT_IMPLICIT_INPUT;
119 }
120}
121
122static bool castRequiresQueuePtr(unsigned SrcAS) {
123 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
124}
125
126static bool isDSAddress(const Constant *C) {
128 if (!GV)
129 return false;
130 unsigned AS = GV->getAddressSpace();
132}
133
134/// Returns true if sanitizer attributes are present on a function.
135static bool hasSanitizerAttributes(const Function &F) {
136 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
137 F.hasFnAttribute(Attribute::SanitizeThread) ||
138 F.hasFnAttribute(Attribute::SanitizeMemory) ||
139 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
140 F.hasFnAttribute(Attribute::SanitizeMemTag);
141}
142
143namespace {
144class AMDGPUInformationCache : public InformationCache {
145public:
146 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
148 SetVector<Function *> *CGSCC, TargetMachine &TM)
149 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
150 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
151
152 TargetMachine &TM;
153
154 enum ConstantStatus : uint8_t {
155 NONE = 0,
156 DS_GLOBAL = 1 << 0,
157 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
158 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
159 ADDR_SPACE_CAST_BOTH_TO_FLAT =
160 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
161 };
162
163 /// Check if the subtarget has aperture regs.
164 bool hasApertureRegs(Function &F) {
165 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
166 return ST.hasApertureRegs();
167 }
168
169 /// Check if the subtarget supports GetDoorbellID.
170 bool supportsGetDoorbellID(Function &F) {
171 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
172 return ST.supportsGetDoorbellID();
173 }
174
175 std::optional<std::pair<unsigned, unsigned>>
176 getFlatWorkGroupSizeAttr(const Function &F) const {
177 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
178 if (!R)
179 return std::nullopt;
180 return std::make_pair(R->first, *(R->second));
181 }
182
183 std::pair<unsigned, unsigned>
184 getDefaultFlatWorkGroupSize(const Function &F) const {
185 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
186 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
187 }
188
189 std::pair<unsigned, unsigned>
190 getMaximumFlatWorkGroupRange(const Function &F) {
191 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
192 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
193 }
194
195 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
196 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
197 return ST.getMaxNumWorkGroups(F);
198 }
199
200 /// Get code object version.
201 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
202
203 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
204 /// accounting for the interaction with the passed value to use for
205 /// "amdgpu-flat-work-group-size".
206 std::pair<unsigned, unsigned>
207 getWavesPerEU(const Function &F,
208 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
209 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
210 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
211 }
212
213 std::optional<std::pair<unsigned, unsigned>>
214 getWavesPerEUAttr(const Function &F) {
215 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
216 /*OnlyFirstRequired=*/true);
217 if (!Val)
218 return std::nullopt;
219 if (!Val->second) {
220 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
221 Val->second = ST.getMaxWavesPerEU();
222 }
223 return std::make_pair(Val->first, *(Val->second));
224 }
225
226 std::pair<unsigned, unsigned>
227 getEffectiveWavesPerEU(const Function &F,
228 std::pair<unsigned, unsigned> WavesPerEU,
229 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
230 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
231 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
232 getLDSSize(F));
233 }
234
235 unsigned getMaxWavesPerEU(const Function &F) {
236 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
237 return ST.getMaxWavesPerEU();
238 }
239
240 unsigned getMaxAddrSpace() const override {
242 }
243
244private:
245 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
246 /// local to flat. These casts may require the queue pointer.
247 static uint8_t visitConstExpr(const ConstantExpr *CE) {
248 uint8_t Status = NONE;
249
250 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
251 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
252 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
253 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
254 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
255 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
256 }
257
258 return Status;
259 }
260
261 /// Returns the minimum amount of LDS space used by a workgroup running
262 /// function \p F.
263 static unsigned getLDSSize(const Function &F) {
264 return AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
265 {0, UINT32_MAX}, true)
266 .first;
267 }
268
269 /// Get the constant access bitmap for \p C.
270 uint8_t getConstantAccess(const Constant *C,
271 SmallPtrSetImpl<const Constant *> &Visited) {
272 auto It = ConstantStatus.find(C);
273 if (It != ConstantStatus.end())
274 return It->second;
275
276 uint8_t Result = 0;
277 if (isDSAddress(C))
278 Result = DS_GLOBAL;
279
280 if (const auto *CE = dyn_cast<ConstantExpr>(C))
281 Result |= visitConstExpr(CE);
282
283 for (const Use &U : C->operands()) {
284 const auto *OpC = dyn_cast<Constant>(U);
285 if (!OpC || !Visited.insert(OpC).second)
286 continue;
287
288 Result |= getConstantAccess(OpC, Visited);
289 }
290 return Result;
291 }
292
293public:
294 /// Returns true if \p Fn needs the queue pointer because of \p C.
295 bool needsQueuePtr(const Constant *C, Function &Fn) {
296 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
297 bool HasAperture = hasApertureRegs(Fn);
298
299 // No need to explore the constants.
300 if (!IsNonEntryFunc && HasAperture)
301 return false;
302
303 SmallPtrSet<const Constant *, 8> Visited;
304 uint8_t Access = getConstantAccess(C, Visited);
305
306 // We need to trap on DS globals in non-entry functions.
307 if (IsNonEntryFunc && (Access & DS_GLOBAL))
308 return true;
309
310 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
311 }
312
313 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
314 SmallPtrSet<const Constant *, 8> Visited;
315 uint8_t Access = getConstantAccess(C, Visited);
316 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
317 }
318
319private:
320 /// Used to determine if the Constant needs the queue pointer.
321 DenseMap<const Constant *, uint8_t> ConstantStatus;
322 const unsigned CodeObjectVersion;
323};
324
325struct AAAMDAttributes
326 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
327 AbstractAttribute> {
328 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
329 AbstractAttribute>;
330
331 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
332
333 /// Create an abstract attribute view for the position \p IRP.
334 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
335 Attributor &A);
336
337 /// See AbstractAttribute::getName().
338 StringRef getName() const override { return "AAAMDAttributes"; }
339
340 /// See AbstractAttribute::getIdAddr().
341 const char *getIdAddr() const override { return &ID; }
342
343 /// This function should return true if the type of the \p AA is
344 /// AAAMDAttributes.
345 static bool classof(const AbstractAttribute *AA) {
346 return (AA->getIdAddr() == &ID);
347 }
348
349 /// Unique ID (due to the unique address)
350 static const char ID;
351};
352const char AAAMDAttributes::ID = 0;
353
354struct AAUniformWorkGroupSize
355 : public StateWrapper<BooleanState, AbstractAttribute> {
356 using Base = StateWrapper<BooleanState, AbstractAttribute>;
357 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
358
359 /// Create an abstract attribute view for the position \p IRP.
360 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
361 Attributor &A);
362
363 /// See AbstractAttribute::getName().
364 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
365
366 /// See AbstractAttribute::getIdAddr().
367 const char *getIdAddr() const override { return &ID; }
368
369 /// This function should return true if the type of the \p AA is
370 /// AAAMDAttributes.
371 static bool classof(const AbstractAttribute *AA) {
372 return (AA->getIdAddr() == &ID);
373 }
374
375 /// Unique ID (due to the unique address)
376 static const char ID;
377};
378const char AAUniformWorkGroupSize::ID = 0;
379
380struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
381 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
382 : AAUniformWorkGroupSize(IRP, A) {}
383
384 void initialize(Attributor &A) override {
385 Function *F = getAssociatedFunction();
386 CallingConv::ID CC = F->getCallingConv();
387
388 if (CC != CallingConv::AMDGPU_KERNEL)
389 return;
390
391 bool InitialValue = false;
392 if (F->hasFnAttribute("uniform-work-group-size"))
393 InitialValue =
394 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
395 "true";
396
397 if (InitialValue)
398 indicateOptimisticFixpoint();
399 else
400 indicatePessimisticFixpoint();
401 }
402
403 ChangeStatus updateImpl(Attributor &A) override {
404 ChangeStatus Change = ChangeStatus::UNCHANGED;
405
406 auto CheckCallSite = [&](AbstractCallSite CS) {
407 Function *Caller = CS.getInstruction()->getFunction();
408 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
409 << "->" << getAssociatedFunction()->getName() << "\n");
410
411 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
412 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
413 if (!CallerInfo || !CallerInfo->isValidState())
414 return false;
415
416 Change = Change | clampStateAndIndicateChange(this->getState(),
417 CallerInfo->getState());
418
419 return true;
420 };
421
422 bool AllCallSitesKnown = true;
423 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
424 return indicatePessimisticFixpoint();
425
426 return Change;
427 }
428
429 ChangeStatus manifest(Attributor &A) override {
431 LLVMContext &Ctx = getAssociatedFunction()->getContext();
432
433 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
434 getAssumed() ? "true" : "false"));
435 return A.manifestAttrs(getIRPosition(), AttrList,
436 /* ForceReplace */ true);
437 }
438
439 bool isValidState() const override {
440 // This state is always valid, even when the state is false.
441 return true;
442 }
443
444 const std::string getAsStr(Attributor *) const override {
445 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
446 }
447
448 /// See AbstractAttribute::trackStatistics()
449 void trackStatistics() const override {}
450};
451
452AAUniformWorkGroupSize &
453AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
454 Attributor &A) {
456 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
458 "AAUniformWorkGroupSize is only valid for function position");
459}
460
461struct AAAMDAttributesFunction : public AAAMDAttributes {
462 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
463 : AAAMDAttributes(IRP, A) {}
464
465 void initialize(Attributor &A) override {
466 Function *F = getAssociatedFunction();
467
468 // If the function requires the implicit arg pointer due to sanitizers,
469 // assume it's needed even if explicitly marked as not requiring it.
470 // Flat scratch initialization is needed because `asan_malloc_impl`
471 // calls introduced later in pipeline will have flat scratch accesses.
472 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
473 // implementation for `asan_malloc_impl` is updated.
474 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
475 if (HasSanitizerAttrs) {
476 removeAssumedBits(IMPLICIT_ARG_PTR);
477 removeAssumedBits(HOSTCALL_PTR);
478 removeAssumedBits(FLAT_SCRATCH_INIT);
479 }
480
481 for (auto Attr : ImplicitAttrs) {
482 if (HasSanitizerAttrs &&
483 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
484 Attr.first == FLAT_SCRATCH_INIT))
485 continue;
486
487 if (F->hasFnAttribute(Attr.second))
488 addKnownBits(Attr.first);
489 }
490
491 if (F->isDeclaration())
492 return;
493
494 // Ignore functions with graphics calling conventions, these are currently
495 // not allowed to have kernel arguments.
496 if (AMDGPU::isGraphics(F->getCallingConv())) {
497 indicatePessimisticFixpoint();
498 return;
499 }
500 }
501
502 ChangeStatus updateImpl(Attributor &A) override {
503 Function *F = getAssociatedFunction();
504 // The current assumed state used to determine a change.
505 auto OrigAssumed = getAssumed();
506
507 // Check for Intrinsics and propagate attributes.
508 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
509 *this, this->getIRPosition(), DepClassTy::REQUIRED);
510 if (!AAEdges || !AAEdges->isValidState() ||
511 AAEdges->hasNonAsmUnknownCallee())
512 return indicatePessimisticFixpoint();
513
514 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
515
516 bool NeedsImplicit = false;
517 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
518 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
519 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
520 unsigned COV = InfoCache.getCodeObjectVersion();
521
522 for (Function *Callee : AAEdges->getOptimisticEdges()) {
523 Intrinsic::ID IID = Callee->getIntrinsicID();
524 if (IID == Intrinsic::not_intrinsic) {
525 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
526 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
527 if (!AAAMD || !AAAMD->isValidState())
528 return indicatePessimisticFixpoint();
529 *this &= *AAAMD;
530 continue;
531 }
532
533 bool NonKernelOnly = false;
534 ImplicitArgumentMask AttrMask =
535 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
536 HasApertureRegs, SupportsGetDoorbellID, COV);
537 if (AttrMask != NOT_IMPLICIT_INPUT) {
538 if ((IsNonEntryFunc || !NonKernelOnly))
539 removeAssumedBits(AttrMask);
540 }
541 }
542
543 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
544 if (NeedsImplicit)
545 removeAssumedBits(IMPLICIT_ARG_PTR);
546
547 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
548 // Under V5, we need implicitarg_ptr + offsets to access private_base or
549 // shared_base. We do not actually need queue_ptr.
550 if (COV >= 5)
551 removeAssumedBits(IMPLICIT_ARG_PTR);
552 else
553 removeAssumedBits(QUEUE_PTR);
554 }
555
556 if (funcRetrievesMultigridSyncArg(A, COV)) {
557 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
558 "multigrid_sync_arg needs implicitarg_ptr");
559 removeAssumedBits(MULTIGRID_SYNC_ARG);
560 }
561
562 if (funcRetrievesHostcallPtr(A, COV)) {
563 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
564 removeAssumedBits(HOSTCALL_PTR);
565 }
566
567 if (funcRetrievesHeapPtr(A, COV)) {
568 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
569 removeAssumedBits(HEAP_PTR);
570 }
571
572 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
573 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
574 removeAssumedBits(QUEUE_PTR);
575 }
576
577 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
578 removeAssumedBits(LDS_KERNEL_ID);
579 }
580
581 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
582 removeAssumedBits(DEFAULT_QUEUE);
583
584 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
585 removeAssumedBits(COMPLETION_ACTION);
586
587 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
588 removeAssumedBits(FLAT_SCRATCH_INIT);
589
590 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
591 : ChangeStatus::UNCHANGED;
592 }
593
594 ChangeStatus manifest(Attributor &A) override {
596 LLVMContext &Ctx = getAssociatedFunction()->getContext();
597
598 for (auto Attr : ImplicitAttrs) {
599 if (isKnown(Attr.first))
600 AttrList.push_back(Attribute::get(Ctx, Attr.second));
601 }
602
603 return A.manifestAttrs(getIRPosition(), AttrList,
604 /* ForceReplace */ true);
605 }
606
607 const std::string getAsStr(Attributor *) const override {
608 std::string Str;
609 raw_string_ostream OS(Str);
610 OS << "AMDInfo[";
611 for (auto Attr : ImplicitAttrs)
612 if (isAssumed(Attr.first))
613 OS << ' ' << Attr.second;
614 OS << " ]";
615 return OS.str();
616 }
617
618 /// See AbstractAttribute::trackStatistics()
619 void trackStatistics() const override {}
620
621private:
622 bool checkForQueuePtr(Attributor &A) {
623 Function *F = getAssociatedFunction();
624 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
625
626 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
627
628 bool NeedsQueuePtr = false;
629
630 auto CheckAddrSpaceCasts = [&](Instruction &I) {
631 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
632 if (castRequiresQueuePtr(SrcAS)) {
633 NeedsQueuePtr = true;
634 return false;
635 }
636 return true;
637 };
638
639 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
640
641 // `checkForAllInstructions` is much more cheaper than going through all
642 // instructions, try it first.
643
644 // The queue pointer is not needed if aperture regs is present.
645 if (!HasApertureRegs) {
646 bool UsedAssumedInformation = false;
647 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
648 {Instruction::AddrSpaceCast},
649 UsedAssumedInformation);
650 }
651
652 // If we found that we need the queue pointer, nothing else to do.
653 if (NeedsQueuePtr)
654 return true;
655
656 if (!IsNonEntryFunc && HasApertureRegs)
657 return false;
658
659 for (BasicBlock &BB : *F) {
660 for (Instruction &I : BB) {
661 for (const Use &U : I.operands()) {
662 if (const auto *C = dyn_cast<Constant>(U)) {
663 if (InfoCache.needsQueuePtr(C, *F))
664 return true;
665 }
666 }
667 }
668 }
669
670 return false;
671 }
672
673 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
675 AA::RangeTy Range(Pos, 8);
676 return funcRetrievesImplicitKernelArg(A, Range);
677 }
678
679 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
681 AA::RangeTy Range(Pos, 8);
682 return funcRetrievesImplicitKernelArg(A, Range);
683 }
684
685 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
687 AA::RangeTy Range(Pos, 8);
688 return funcRetrievesImplicitKernelArg(A, Range);
689 }
690
691 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
693 AA::RangeTy Range(Pos, 8);
694 return funcRetrievesImplicitKernelArg(A, Range);
695 }
696
697 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
698 if (COV < 5)
699 return false;
701 return funcRetrievesImplicitKernelArg(A, Range);
702 }
703
704 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
705 if (COV < 5)
706 return false;
708 return funcRetrievesImplicitKernelArg(A, Range);
709 }
710
711 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
712 // Check if this is a call to the implicitarg_ptr builtin and it
713 // is used to retrieve the hostcall pointer. The implicit arg for
714 // hostcall is not used only if every use of the implicitarg_ptr
715 // is a load that clearly does not retrieve any byte of the
716 // hostcall pointer. We check this by tracing all the uses of the
717 // initial call to the implicitarg_ptr intrinsic.
718 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
719 auto &Call = cast<CallBase>(I);
720 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
721 return true;
722
723 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
724 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
725 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
726 return false;
727
728 return PointerInfoAA->forallInterferingAccesses(
729 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
730 return Acc.getRemoteInst()->isDroppable();
731 });
732 };
733
734 bool UsedAssumedInformation = false;
735 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
736 UsedAssumedInformation);
737 }
738
739 bool funcRetrievesLDSKernelId(Attributor &A) {
740 auto DoesNotRetrieve = [&](Instruction &I) {
741 auto &Call = cast<CallBase>(I);
742 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
743 };
744 bool UsedAssumedInformation = false;
745 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
746 UsedAssumedInformation);
747 }
748
749 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
750 // not to be set.
751 bool needFlatScratchInit(Attributor &A) {
752 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
753
754 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
755 // there is a cast from PRIVATE_ADDRESS.
756 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
757 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
759 };
760
761 bool UsedAssumedInformation = false;
762 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
763 {Instruction::AddrSpaceCast},
764 UsedAssumedInformation))
765 return true;
766
767 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
768 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
769
770 Function *F = getAssociatedFunction();
771 for (Instruction &I : instructions(F)) {
772 for (const Use &U : I.operands()) {
773 if (const auto *C = dyn_cast<Constant>(U)) {
774 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
775 return true;
776 }
777 }
778 }
779
780 // Finally check callees.
781
782 // This is called on each callee; false means callee shouldn't have
783 // no-flat-scratch-init.
784 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
785 const auto &CB = cast<CallBase>(I);
786 const Function *Callee = CB.getCalledFunction();
787
788 // Callee == 0 for inline asm or indirect call with known callees.
789 // In the latter case, updateImpl() already checked the callees and we
790 // know their FLAT_SCRATCH_INIT bit is set.
791 // If function has indirect call with unknown callees, the bit is
792 // already removed in updateImpl() and execution won't reach here.
793 if (!Callee)
794 return true;
795
796 return Callee->getIntrinsicID() !=
797 Intrinsic::amdgcn_addrspacecast_nonnull;
798 };
799
800 UsedAssumedInformation = false;
801 // If any callee is false (i.e. need FlatScratchInit),
802 // checkForAllCallLikeInstructions returns false, in which case this
803 // function returns true.
804 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
805 UsedAssumedInformation);
806 }
807};
808
809AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
810 Attributor &A) {
812 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
813 llvm_unreachable("AAAMDAttributes is only valid for function position");
814}
815
816/// Base class to derive different size ranges.
817struct AAAMDSizeRangeAttribute
818 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
819 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
820
821 StringRef AttrName;
822
823 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
824 StringRef AttrName)
825 : Base(IRP, 32), AttrName(AttrName) {}
826
827 /// See AbstractAttribute::trackStatistics()
828 void trackStatistics() const override {}
829
830 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
831 ChangeStatus Change = ChangeStatus::UNCHANGED;
832
833 auto CheckCallSite = [&](AbstractCallSite CS) {
834 Function *Caller = CS.getInstruction()->getFunction();
835 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
836 << "->" << getAssociatedFunction()->getName() << '\n');
837
838 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
839 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
840 if (!CallerInfo || !CallerInfo->isValidState())
841 return false;
842
843 Change |=
844 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
845
846 return true;
847 };
848
849 bool AllCallSitesKnown = true;
850 if (!A.checkForAllCallSites(CheckCallSite, *this,
851 /*RequireAllCallSites=*/true,
852 AllCallSitesKnown))
853 return indicatePessimisticFixpoint();
854
855 return Change;
856 }
857
858 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
859 /// attribute if it is not same as default.
861 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
862 std::pair<unsigned, unsigned> Default) {
863 auto [Min, Max] = Default;
864 unsigned Lower = getAssumed().getLower().getZExtValue();
865 unsigned Upper = getAssumed().getUpper().getZExtValue();
866
867 // Clamp the range to the default value.
868 if (Lower < Min)
869 Lower = Min;
870 if (Upper > Max + 1)
871 Upper = Max + 1;
872
873 // No manifest if the value is invalid or same as default after clamp.
874 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
875 return ChangeStatus::UNCHANGED;
876
877 Function *F = getAssociatedFunction();
878 LLVMContext &Ctx = F->getContext();
879 SmallString<10> Buffer;
880 raw_svector_ostream OS(Buffer);
881 OS << Lower << ',' << Upper - 1;
882 return A.manifestAttrs(getIRPosition(),
883 {Attribute::get(Ctx, AttrName, OS.str())},
884 /*ForceReplace=*/true);
885 }
886
887 const std::string getAsStr(Attributor *) const override {
888 std::string Str;
889 raw_string_ostream OS(Str);
890 OS << getName() << '[';
891 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
892 OS << ']';
893 return OS.str();
894 }
895};
896
897/// Propagate amdgpu-flat-work-group-size attribute.
898struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
899 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
900 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
901
902 void initialize(Attributor &A) override {
903 Function *F = getAssociatedFunction();
904 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
905
906 bool HasAttr = false;
907 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
908 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
909
910 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
911 // We only consider an attribute that is not max range because the front
912 // end always emits the attribute, unfortunately, and sometimes it emits
913 // the max range.
914 if (*Attr != MaxRange) {
915 Range = *Attr;
916 HasAttr = true;
917 }
918 }
919
920 // We don't want to directly clamp the state if it's the max range because
921 // that is basically the worst state.
922 if (Range == MaxRange)
923 return;
924
925 auto [Min, Max] = Range;
926 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
927 IntegerRangeState IRS(CR);
928 clampStateAndIndicateChange(this->getState(), IRS);
929
930 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
931 indicateOptimisticFixpoint();
932 }
933
934 ChangeStatus updateImpl(Attributor &A) override {
935 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
936 }
937
938 /// Create an abstract attribute view for the position \p IRP.
939 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
940 Attributor &A);
941
942 ChangeStatus manifest(Attributor &A) override {
943 Function *F = getAssociatedFunction();
944 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
945 return emitAttributeIfNotDefaultAfterClamp(
946 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
947 }
948
949 /// See AbstractAttribute::getName()
950 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
951
952 /// See AbstractAttribute::getIdAddr()
953 const char *getIdAddr() const override { return &ID; }
954
955 /// This function should return true if the type of the \p AA is
956 /// AAAMDFlatWorkGroupSize
957 static bool classof(const AbstractAttribute *AA) {
958 return (AA->getIdAddr() == &ID);
959 }
960
961 /// Unique ID (due to the unique address)
962 static const char ID;
963};
964
965const char AAAMDFlatWorkGroupSize::ID = 0;
966
967AAAMDFlatWorkGroupSize &
968AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
969 Attributor &A) {
971 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
973 "AAAMDFlatWorkGroupSize is only valid for function position");
974}
975
976struct TupleDecIntegerRangeState : public AbstractState {
977 DecIntegerState<uint32_t> X, Y, Z;
978
979 bool isValidState() const override {
980 return X.isValidState() && Y.isValidState() && Z.isValidState();
981 }
982
983 bool isAtFixpoint() const override {
984 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
985 }
986
987 ChangeStatus indicateOptimisticFixpoint() override {
988 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
989 Z.indicateOptimisticFixpoint();
990 }
991
992 ChangeStatus indicatePessimisticFixpoint() override {
993 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
994 Z.indicatePessimisticFixpoint();
995 }
996
997 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
998 X ^= Other.X;
999 Y ^= Other.Y;
1000 Z ^= Other.Z;
1001 return *this;
1002 }
1003
1004 bool operator==(const TupleDecIntegerRangeState &Other) const {
1005 return X == Other.X && Y == Other.Y && Z == Other.Z;
1006 }
1007
1008 TupleDecIntegerRangeState &getAssumed() { return *this; }
1009 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
1010};
1011
1012using AAAMDMaxNumWorkgroupsState =
1013 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1014
1015/// Propagate amdgpu-max-num-workgroups attribute.
1016struct AAAMDMaxNumWorkgroups
1017 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1018 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1019
1020 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1021
1022 void initialize(Attributor &A) override {
1023 Function *F = getAssociatedFunction();
1024 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1025
1026 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1027
1028 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1029 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1030 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1031
1032 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1033 indicatePessimisticFixpoint();
1034 }
1035
1036 ChangeStatus updateImpl(Attributor &A) override {
1037 ChangeStatus Change = ChangeStatus::UNCHANGED;
1038
1039 auto CheckCallSite = [&](AbstractCallSite CS) {
1040 Function *Caller = CS.getInstruction()->getFunction();
1041 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1042 << "->" << getAssociatedFunction()->getName() << '\n');
1043
1044 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1045 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1046 if (!CallerInfo || !CallerInfo->isValidState())
1047 return false;
1048
1049 Change |=
1050 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1051 return true;
1052 };
1053
1054 bool AllCallSitesKnown = true;
1055 if (!A.checkForAllCallSites(CheckCallSite, *this,
1056 /*RequireAllCallSites=*/true,
1057 AllCallSitesKnown))
1058 return indicatePessimisticFixpoint();
1059
1060 return Change;
1061 }
1062
1063 /// Create an abstract attribute view for the position \p IRP.
1064 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1065 Attributor &A);
1066
1067 ChangeStatus manifest(Attributor &A) override {
1068 Function *F = getAssociatedFunction();
1069 LLVMContext &Ctx = F->getContext();
1070 SmallString<32> Buffer;
1071 raw_svector_ostream OS(Buffer);
1072 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1073
1074 // TODO: Should annotate loads of the group size for this to do anything
1075 // useful.
1076 return A.manifestAttrs(
1077 getIRPosition(),
1078 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1079 /* ForceReplace= */ true);
1080 }
1081
1082 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1083
1084 const std::string getAsStr(Attributor *) const override {
1085 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1086 raw_string_ostream OS(Buffer);
1087 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1088 << ']';
1089 return OS.str();
1090 }
1091
1092 const char *getIdAddr() const override { return &ID; }
1093
1094 /// This function should return true if the type of the \p AA is
1095 /// AAAMDMaxNumWorkgroups
1096 static bool classof(const AbstractAttribute *AA) {
1097 return (AA->getIdAddr() == &ID);
1098 }
1099
1100 void trackStatistics() const override {}
1101
1102 /// Unique ID (due to the unique address)
1103 static const char ID;
1104};
1105
1106const char AAAMDMaxNumWorkgroups::ID = 0;
1107
1108AAAMDMaxNumWorkgroups &
1109AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1111 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1112 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1113}
1114
1115/// Propagate amdgpu-waves-per-eu attribute.
1116struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1117 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1118 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1119
1120 void initialize(Attributor &A) override {
1121 Function *F = getAssociatedFunction();
1122 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1123
1124 // If the attribute exists, we will honor it if it is not the default.
1125 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1126 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1127 1U, InfoCache.getMaxWavesPerEU(*F)};
1128 if (*Attr != MaxWavesPerEURange) {
1129 auto [Min, Max] = *Attr;
1130 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1131 IntegerRangeState RangeState(Range);
1132 this->getState() = RangeState;
1133 indicateOptimisticFixpoint();
1134 return;
1135 }
1136 }
1137
1138 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1139 indicatePessimisticFixpoint();
1140 }
1141
1142 ChangeStatus updateImpl(Attributor &A) override {
1143 ChangeStatus Change = ChangeStatus::UNCHANGED;
1144
1145 auto CheckCallSite = [&](AbstractCallSite CS) {
1146 Function *Caller = CS.getInstruction()->getFunction();
1147 Function *Func = getAssociatedFunction();
1148 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1149 << "->" << Func->getName() << '\n');
1150 (void)Func;
1151
1152 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1153 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1154 if (!CallerAA || !CallerAA->isValidState())
1155 return false;
1156
1157 ConstantRange Assumed = getAssumed();
1158 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1159 CallerAA->getAssumed().getLower().getZExtValue());
1160 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1161 CallerAA->getAssumed().getUpper().getZExtValue());
1162 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1163 IntegerRangeState RangeState(Range);
1164 getState() = RangeState;
1165 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1166 : ChangeStatus::CHANGED;
1167
1168 return true;
1169 };
1170
1171 bool AllCallSitesKnown = true;
1172 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1173 return indicatePessimisticFixpoint();
1174
1175 return Change;
1176 }
1177
1178 /// Create an abstract attribute view for the position \p IRP.
1179 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1180 Attributor &A);
1181
1182 ChangeStatus manifest(Attributor &A) override {
1183 Function *F = getAssociatedFunction();
1184 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1185 return emitAttributeIfNotDefaultAfterClamp(
1186 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1187 }
1188
1189 /// See AbstractAttribute::getName()
1190 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1191
1192 /// See AbstractAttribute::getIdAddr()
1193 const char *getIdAddr() const override { return &ID; }
1194
1195 /// This function should return true if the type of the \p AA is
1196 /// AAAMDWavesPerEU
1197 static bool classof(const AbstractAttribute *AA) {
1198 return (AA->getIdAddr() == &ID);
1199 }
1200
1201 /// Unique ID (due to the unique address)
1202 static const char ID;
1203};
1204
1205const char AAAMDWavesPerEU::ID = 0;
1206
1207AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1208 Attributor &A) {
1210 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1211 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1212}
1213
1214/// Compute the minimum number of AGPRs required to allocate the inline asm.
1215static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1216 const CallBase &Call) {
1217 unsigned ArgNo = 0;
1218 unsigned ResNo = 0;
1219 unsigned AGPRDefCount = 0;
1220 unsigned AGPRUseCount = 0;
1221 unsigned MaxPhysReg = 0;
1222 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1223
1224 // TODO: Overestimates due to not accounting for tied operands
1225 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1226 Type *Ty = nullptr;
1227 switch (CI.Type) {
1228 case InlineAsm::isOutput: {
1229 Ty = Call.getType();
1230 if (auto *STy = dyn_cast<StructType>(Ty))
1231 Ty = STy->getElementType(ResNo);
1232 ++ResNo;
1233 break;
1234 }
1235 case InlineAsm::isInput: {
1236 Ty = Call.getArgOperand(ArgNo++)->getType();
1237 break;
1238 }
1239 case InlineAsm::isLabel:
1240 continue;
1242 // Parse the physical register reference.
1243 break;
1244 }
1245
1246 for (StringRef Code : CI.Codes) {
1247 unsigned RegCount = 0;
1248 if (Code.starts_with("a")) {
1249 // Virtual register, compute number of registers based on the type.
1250 //
1251 // We ought to be going through TargetLowering to get the number of
1252 // registers, but we should avoid the dependence on CodeGen here.
1253 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1254 } else {
1255 // Physical register reference
1256 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1257 if (Kind == 'a') {
1258 RegCount = NumRegs;
1259 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1260 }
1261
1262 continue;
1263 }
1264
1265 if (CI.Type == InlineAsm::isOutput) {
1266 // Apply tuple alignment requirement
1267 //
1268 // TODO: This is more conservative than necessary.
1269 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1270
1271 AGPRDefCount += RegCount;
1272 if (CI.isEarlyClobber) {
1273 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1274 AGPRUseCount += RegCount;
1275 }
1276 } else {
1277 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1278 AGPRUseCount += RegCount;
1279 }
1280 }
1281 }
1282
1283 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1284
1285 // TODO: This is overly conservative. If there are any physical registers,
1286 // allocate any virtual registers after them so we don't have to solve optimal
1287 // packing.
1288 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1289}
1290
1291struct AAAMDGPUMinAGPRAlloc
1292 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1293 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1294 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1295
1296 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1297 Attributor &A) {
1299 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1301 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1302 }
1303
1304 void initialize(Attributor &A) override {
1305 Function *F = getAssociatedFunction();
1306 auto [MinNumAGPR, MaxNumAGPR] =
1307 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1308 /*OnlyFirstRequired=*/true);
1309 if (MinNumAGPR == 0)
1310 indicateOptimisticFixpoint();
1311 }
1312
1313 const std::string getAsStr(Attributor *A) const override {
1314 std::string Str = "amdgpu-agpr-alloc=";
1315 raw_string_ostream OS(Str);
1316 OS << getAssumed();
1317 return OS.str();
1318 }
1319
1320 void trackStatistics() const override {}
1321
1322 ChangeStatus updateImpl(Attributor &A) override {
1323 DecIntegerState<> Maximum;
1324
1325 // Check for cases which require allocation of AGPRs. The only cases where
1326 // AGPRs are required are if there are direct references to AGPRs, so inline
1327 // assembly and special intrinsics.
1328 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1329 const auto &CB = cast<CallBase>(I);
1330 const Value *CalleeOp = CB.getCalledOperand();
1331
1332 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1333 // Technically, the inline asm could be invoking a call to an unknown
1334 // external function that requires AGPRs, but ignore that.
1335 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1336 Maximum.takeAssumedMaximum(NumRegs);
1337 return true;
1338 }
1339
1340 switch (CB.getIntrinsicID()) {
1342 break;
1343 case Intrinsic::write_register:
1344 case Intrinsic::read_register:
1345 case Intrinsic::read_volatile_register: {
1346 const MDString *RegName = cast<MDString>(
1348 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1349 ->getOperand(0));
1350 auto [Kind, RegIdx, NumRegs] =
1352 if (Kind == 'a')
1353 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1354
1355 return true;
1356 }
1357 default:
1358 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1359 // required to use AGPRs.
1360 return true;
1361 }
1362
1363 // TODO: Handle callsite attributes
1364 auto *CBEdges = A.getAAFor<AACallEdges>(
1365 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1366 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1368 return false;
1369 }
1370
1371 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1372 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1373 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1374 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1376 return false;
1377 }
1378
1379 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1380 }
1381
1382 return true;
1383 };
1384
1385 bool UsedAssumedInformation = false;
1386 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1387 UsedAssumedInformation))
1388 return indicatePessimisticFixpoint();
1389
1390 return clampStateAndIndicateChange(getState(), Maximum);
1391 }
1392
1393 ChangeStatus manifest(Attributor &A) override {
1394 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1395 SmallString<4> Buffer;
1396 raw_svector_ostream OS(Buffer);
1397 OS << getAssumed();
1398
1399 return A.manifestAttrs(
1400 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1401 }
1402
1403 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1404 const char *getIdAddr() const override { return &ID; }
1405
1406 /// This function should return true if the type of the \p AA is
1407 /// AAAMDGPUMinAGPRAllocs
1408 static bool classof(const AbstractAttribute *AA) {
1409 return (AA->getIdAddr() == &ID);
1410 }
1411
1412 static const char ID;
1413};
1414
1415const char AAAMDGPUMinAGPRAlloc::ID = 0;
1416
1417/// An abstract attribute to propagate the function attribute
1418/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1419struct AAAMDGPUClusterDims
1420 : public StateWrapper<BooleanState, AbstractAttribute> {
1421 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1422 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1423
1424 /// Create an abstract attribute view for the position \p IRP.
1425 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1426 Attributor &A);
1427
1428 /// See AbstractAttribute::getName().
1429 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1430
1431 /// See AbstractAttribute::getIdAddr().
1432 const char *getIdAddr() const override { return &ID; }
1433
1434 /// This function should return true if the type of the \p AA is
1435 /// AAAMDGPUClusterDims.
1436 static bool classof(const AbstractAttribute *AA) {
1437 return AA->getIdAddr() == &ID;
1438 }
1439
1440 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1441
1442 /// Unique ID (due to the unique address)
1443 static const char ID;
1444};
1445
1446const char AAAMDGPUClusterDims::ID = 0;
1447
1448struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1449 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1450 : AAAMDGPUClusterDims(IRP, A) {}
1451
1452 void initialize(Attributor &A) override {
1453 Function *F = getAssociatedFunction();
1454 assert(F && "empty associated function");
1455
1457
1458 // No matter what a kernel function has, it is final.
1459 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1460 if (Attr.isUnknown())
1461 indicatePessimisticFixpoint();
1462 else
1463 indicateOptimisticFixpoint();
1464 }
1465 }
1466
1467 const std::string getAsStr(Attributor *A) const override {
1468 if (!getAssumed() || Attr.isUnknown())
1469 return "unknown";
1470 if (Attr.isNoCluster())
1471 return "no";
1472 if (Attr.isVariableDims())
1473 return "variable";
1474 return Attr.to_string();
1475 }
1476
1477 void trackStatistics() const override {}
1478
1479 ChangeStatus updateImpl(Attributor &A) override {
1480 auto OldState = Attr;
1481
1482 auto CheckCallSite = [&](AbstractCallSite CS) {
1483 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1484 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1485 DepClassTy::REQUIRED);
1486 if (!CallerAA || !CallerAA->isValidState())
1487 return false;
1488
1489 return merge(CallerAA->getClusterDims());
1490 };
1491
1492 bool UsedAssumedInformation = false;
1493 if (!A.checkForAllCallSites(CheckCallSite, *this,
1494 /*RequireAllCallSites=*/true,
1495 UsedAssumedInformation))
1496 return indicatePessimisticFixpoint();
1497
1498 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1499 }
1500
1501 ChangeStatus manifest(Attributor &A) override {
1502 if (Attr.isUnknown())
1503 return ChangeStatus::UNCHANGED;
1504 return A.manifestAttrs(
1505 getIRPosition(),
1506 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1507 Attr.to_string())},
1508 /*ForceReplace=*/true);
1509 }
1510
1511 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1512 return Attr;
1513 }
1514
1515private:
1516 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1517 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1518 // propagation.
1519 if (Attr.isUnknown() && Other.isUnknown())
1520 return true;
1521
1522 // Case 2: The other is determined, but we are unknown yet, we simply take
1523 // the other's value.
1524 if (Attr.isUnknown()) {
1525 Attr = Other;
1526 return true;
1527 }
1528
1529 // Case 3: We are determined but the other is unknown yet, we simply keep
1530 // everything unchanged.
1531 if (Other.isUnknown())
1532 return true;
1533
1534 // After this point, both are determined.
1535
1536 // Case 4: If they are same, we do nothing.
1537 if (Attr == Other)
1538 return true;
1539
1540 // Now they are not same.
1541
1542 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1543 // would hold), then it is unknown whether cluster will be used, and the
1544 // state is final, unlike case 1.
1545 if (Attr.isNoCluster() || Other.isNoCluster()) {
1546 Attr.setUnknown();
1547 return false;
1548 }
1549
1550 // Case 6: Both of us use cluster, but the dims are different, so the result
1551 // is, cluster is used, but we just don't have a fixed dims.
1552 Attr.setVariableDims();
1553 return true;
1554 }
1555
1556 AMDGPU::ClusterDimsAttr Attr;
1557
1558 static constexpr const char AttrName[] = "amdgpu-cluster-dims";
1559};
1560
1561AAAMDGPUClusterDims &
1562AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1564 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1565 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1566}
1567
1568static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1569 AMDGPUAttributorOptions Options,
1570 ThinOrFullLTOPhase LTOPhase) {
1571 SetVector<Function *> Functions;
1572 for (Function &F : M) {
1573 if (!F.isIntrinsic())
1574 Functions.insert(&F);
1575 }
1576
1577 CallGraphUpdater CGUpdater;
1579 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1580 DenseSet<const char *> Allowed(
1581 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1582 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1583 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1584 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1587 &AAAMDGPUClusterDims::ID});
1588
1589 AttributorConfig AC(CGUpdater);
1590 AC.IsClosedWorldModule = Options.IsClosedWorld;
1591 AC.Allowed = &Allowed;
1592 AC.IsModulePass = true;
1593 AC.DefaultInitializeLiveInternals = false;
1594 AC.IndirectCalleeSpecializationCallback =
1595 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1596 Function &Callee, unsigned NumAssumedCallees) {
1597 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1598 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1599 };
1600 AC.IPOAmendableCB = [](const Function &F) {
1601 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1602 };
1603
1604 Attributor A(Functions, InfoCache, AC);
1605
1606 LLVM_DEBUG({
1607 StringRef LTOPhaseStr = to_string(LTOPhase);
1608 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1609 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1610 << (AC.IsClosedWorldModule ? "" : "not ")
1611 << "assumed to be a closed world.\n";
1612 });
1613
1614 for (auto *F : Functions) {
1615 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1616 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1617 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1618 CallingConv::ID CC = F->getCallingConv();
1619 if (!AMDGPU::isEntryFunctionCC(CC)) {
1620 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1621 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1622 }
1623
1624 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1625 if (!F->isDeclaration() && ST.hasClusters())
1626 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1627
1628 if (ST.hasGFX90AInsts())
1629 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1630
1631 for (auto &I : instructions(F)) {
1632 Value *Ptr = nullptr;
1633 if (auto *LI = dyn_cast<LoadInst>(&I))
1634 Ptr = LI->getPointerOperand();
1635 else if (auto *SI = dyn_cast<StoreInst>(&I))
1636 Ptr = SI->getPointerOperand();
1637 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1638 Ptr = RMW->getPointerOperand();
1639 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1640 Ptr = CmpX->getPointerOperand();
1641
1642 if (Ptr) {
1643 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1644 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1645 }
1646 }
1647 }
1648
1649 return A.run() == ChangeStatus::CHANGED;
1650}
1651} // namespace
1652
1655
1658 AnalysisGetter AG(FAM);
1659
1660 // TODO: Probably preserves CFG
1661 return runImpl(M, AG, TM, Options, LTOPhase) ? PreservedAnalyses::none()
1663}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:992
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:115
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:656
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.