LLVM 22.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
20
21#define DEBUG_TYPE "amdgpu-attributor"
22
23using namespace llvm;
24
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
29 cl::init(3));
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
34#include "AMDGPUAttributes.def"
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
42#include "AMDGPUAttributes.def"
44};
45
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49#include "AMDGPUAttributes.def"
50};
51
52// We do not need to note the x workitem or workgroup id because they are always
53// initialized.
54//
55// TODO: We should not add the attributes if the known compile time workgroup
56// size is 1 for y/z.
58intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
59 bool HasApertureRegs, bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
61 switch (ID) {
62 case Intrinsic::amdgcn_workitem_id_x:
63 NonKernelOnly = true;
64 return WORKITEM_ID_X;
65 case Intrinsic::amdgcn_workgroup_id_x:
66 NonKernelOnly = true;
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
70 return WORKITEM_ID_Y;
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
73 return WORKITEM_ID_Z;
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_cluster_id_x:
81 NonKernelOnly = true;
82 return CLUSTER_ID_X;
83 case Intrinsic::amdgcn_cluster_id_y:
84 return CLUSTER_ID_Y;
85 case Intrinsic::amdgcn_cluster_id_z:
86 return CLUSTER_ID_Z;
87 case Intrinsic::amdgcn_lds_kernel_id:
88 return LDS_KERNEL_ID;
89 case Intrinsic::amdgcn_dispatch_ptr:
90 return DISPATCH_PTR;
91 case Intrinsic::amdgcn_dispatch_id:
92 return DISPATCH_ID;
93 case Intrinsic::amdgcn_implicitarg_ptr:
94 return IMPLICIT_ARG_PTR;
95 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
96 // queue_ptr.
97 case Intrinsic::amdgcn_queue_ptr:
98 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
99 return QUEUE_PTR;
100 case Intrinsic::amdgcn_is_shared:
101 case Intrinsic::amdgcn_is_private:
102 if (HasApertureRegs)
103 return NOT_IMPLICIT_INPUT;
104 // Under V5, we need implicitarg_ptr + offsets to access private_base or
105 // shared_base. For pre-V5, however, need to access them through queue_ptr +
106 // offsets.
107 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
108 : QUEUE_PTR;
109 case Intrinsic::trap:
110 case Intrinsic::debugtrap:
111 case Intrinsic::ubsantrap:
112 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
113 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
114 : QUEUE_PTR;
115 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
116 return QUEUE_PTR;
117 default:
118 return NOT_IMPLICIT_INPUT;
119 }
120}
121
122static bool castRequiresQueuePtr(unsigned SrcAS) {
123 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
124}
125
126static bool isDSAddress(const Constant *C) {
128 if (!GV)
129 return false;
130 unsigned AS = GV->getAddressSpace();
132}
133
134/// Returns true if sanitizer attributes are present on a function.
135static bool hasSanitizerAttributes(const Function &F) {
136 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
137 F.hasFnAttribute(Attribute::SanitizeThread) ||
138 F.hasFnAttribute(Attribute::SanitizeMemory) ||
139 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
140 F.hasFnAttribute(Attribute::SanitizeMemTag);
141}
142
143namespace {
144class AMDGPUInformationCache : public InformationCache {
145public:
146 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
148 SetVector<Function *> *CGSCC, TargetMachine &TM)
149 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
150 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
151
152 TargetMachine &TM;
153
154 enum ConstantStatus : uint8_t {
155 NONE = 0,
156 DS_GLOBAL = 1 << 0,
157 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
158 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
159 ADDR_SPACE_CAST_BOTH_TO_FLAT =
160 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
161 };
162
163 /// Check if the subtarget has aperture regs.
164 bool hasApertureRegs(Function &F) {
165 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
166 return ST.hasApertureRegs();
167 }
168
169 /// Check if the subtarget supports GetDoorbellID.
170 bool supportsGetDoorbellID(Function &F) {
171 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
172 return ST.supportsGetDoorbellID();
173 }
174
175 std::optional<std::pair<unsigned, unsigned>>
176 getFlatWorkGroupSizeAttr(const Function &F) const {
177 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
178 if (!R)
179 return std::nullopt;
180 return std::make_pair(R->first, *(R->second));
181 }
182
183 std::pair<unsigned, unsigned>
184 getDefaultFlatWorkGroupSize(const Function &F) const {
185 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
186 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
187 }
188
189 std::pair<unsigned, unsigned>
190 getMaximumFlatWorkGroupRange(const Function &F) {
191 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
192 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
193 }
194
195 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
196 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
197 return ST.getMaxNumWorkGroups(F);
198 }
199
200 /// Get code object version.
201 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
202
203 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
204 /// accounting for the interaction with the passed value to use for
205 /// "amdgpu-flat-work-group-size".
206 std::pair<unsigned, unsigned>
207 getWavesPerEU(const Function &F,
208 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
209 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
210 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
211 }
212
213 std::optional<std::pair<unsigned, unsigned>>
214 getWavesPerEUAttr(const Function &F) {
215 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
216 /*OnlyFirstRequired=*/true);
217 if (!Val)
218 return std::nullopt;
219 if (!Val->second) {
220 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
221 Val->second = ST.getMaxWavesPerEU();
222 }
223 return std::make_pair(Val->first, *(Val->second));
224 }
225
226 std::pair<unsigned, unsigned>
227 getEffectiveWavesPerEU(const Function &F,
228 std::pair<unsigned, unsigned> WavesPerEU,
229 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
230 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
231 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
232 getLDSSize(F));
233 }
234
235 unsigned getMaxWavesPerEU(const Function &F) {
236 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
237 return ST.getMaxWavesPerEU();
238 }
239
240 unsigned getMaxAddrSpace() const override {
242 }
243
244private:
245 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
246 /// local to flat. These casts may require the queue pointer.
247 static uint8_t visitConstExpr(const ConstantExpr *CE) {
248 uint8_t Status = NONE;
249
250 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
251 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
252 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
253 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
254 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
255 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
256 }
257
258 return Status;
259 }
260
261 /// Returns the minimum amount of LDS space used by a workgroup running
262 /// function \p F.
263 static unsigned getLDSSize(const Function &F) {
264 return AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
265 {0, UINT32_MAX}, true)
266 .first;
267 }
268
269 /// Get the constant access bitmap for \p C.
270 uint8_t getConstantAccess(const Constant *C,
271 SmallPtrSetImpl<const Constant *> &Visited) {
272 auto It = ConstantStatus.find(C);
273 if (It != ConstantStatus.end())
274 return It->second;
275
276 uint8_t Result = 0;
277 if (isDSAddress(C))
278 Result = DS_GLOBAL;
279
280 if (const auto *CE = dyn_cast<ConstantExpr>(C))
281 Result |= visitConstExpr(CE);
282
283 for (const Use &U : C->operands()) {
284 const auto *OpC = dyn_cast<Constant>(U);
285 if (!OpC || !Visited.insert(OpC).second)
286 continue;
287
288 Result |= getConstantAccess(OpC, Visited);
289 }
290 return Result;
291 }
292
293public:
294 /// Returns true if \p Fn needs the queue pointer because of \p C.
295 bool needsQueuePtr(const Constant *C, Function &Fn) {
296 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
297 bool HasAperture = hasApertureRegs(Fn);
298
299 // No need to explore the constants.
300 if (!IsNonEntryFunc && HasAperture)
301 return false;
302
303 SmallPtrSet<const Constant *, 8> Visited;
304 uint8_t Access = getConstantAccess(C, Visited);
305
306 // We need to trap on DS globals in non-entry functions.
307 if (IsNonEntryFunc && (Access & DS_GLOBAL))
308 return true;
309
310 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
311 }
312
313 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
314 SmallPtrSet<const Constant *, 8> Visited;
315 uint8_t Access = getConstantAccess(C, Visited);
316 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
317 }
318
319private:
320 /// Used to determine if the Constant needs the queue pointer.
321 DenseMap<const Constant *, uint8_t> ConstantStatus;
322 const unsigned CodeObjectVersion;
323};
324
325struct AAAMDAttributes
326 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
327 AbstractAttribute> {
328 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
329 AbstractAttribute>;
330
331 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
332
333 /// Create an abstract attribute view for the position \p IRP.
334 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
335 Attributor &A);
336
337 /// See AbstractAttribute::getName().
338 StringRef getName() const override { return "AAAMDAttributes"; }
339
340 /// See AbstractAttribute::getIdAddr().
341 const char *getIdAddr() const override { return &ID; }
342
343 /// This function should return true if the type of the \p AA is
344 /// AAAMDAttributes.
345 static bool classof(const AbstractAttribute *AA) {
346 return (AA->getIdAddr() == &ID);
347 }
348
349 /// Unique ID (due to the unique address)
350 static const char ID;
351};
352const char AAAMDAttributes::ID = 0;
353
354struct AAUniformWorkGroupSize
355 : public StateWrapper<BooleanState, AbstractAttribute> {
356 using Base = StateWrapper<BooleanState, AbstractAttribute>;
357 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
358
359 /// Create an abstract attribute view for the position \p IRP.
360 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
361 Attributor &A);
362
363 /// See AbstractAttribute::getName().
364 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
365
366 /// See AbstractAttribute::getIdAddr().
367 const char *getIdAddr() const override { return &ID; }
368
369 /// This function should return true if the type of the \p AA is
370 /// AAAMDAttributes.
371 static bool classof(const AbstractAttribute *AA) {
372 return (AA->getIdAddr() == &ID);
373 }
374
375 /// Unique ID (due to the unique address)
376 static const char ID;
377};
378const char AAUniformWorkGroupSize::ID = 0;
379
380struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
381 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
382 : AAUniformWorkGroupSize(IRP, A) {}
383
384 void initialize(Attributor &A) override {
385 Function *F = getAssociatedFunction();
386 CallingConv::ID CC = F->getCallingConv();
387
388 if (CC != CallingConv::AMDGPU_KERNEL)
389 return;
390
391 bool InitialValue = false;
392 if (F->hasFnAttribute("uniform-work-group-size"))
393 InitialValue =
394 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
395 "true";
396
397 if (InitialValue)
398 indicateOptimisticFixpoint();
399 else
400 indicatePessimisticFixpoint();
401 }
402
403 ChangeStatus updateImpl(Attributor &A) override {
404 ChangeStatus Change = ChangeStatus::UNCHANGED;
405
406 auto CheckCallSite = [&](AbstractCallSite CS) {
407 Function *Caller = CS.getInstruction()->getFunction();
408 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
409 << "->" << getAssociatedFunction()->getName() << "\n");
410
411 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
412 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
413 if (!CallerInfo || !CallerInfo->isValidState())
414 return false;
415
416 Change = Change | clampStateAndIndicateChange(this->getState(),
417 CallerInfo->getState());
418
419 return true;
420 };
421
422 bool AllCallSitesKnown = true;
423 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
424 return indicatePessimisticFixpoint();
425
426 return Change;
427 }
428
429 ChangeStatus manifest(Attributor &A) override {
431 LLVMContext &Ctx = getAssociatedFunction()->getContext();
432
433 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
434 getAssumed() ? "true" : "false"));
435 return A.manifestAttrs(getIRPosition(), AttrList,
436 /* ForceReplace */ true);
437 }
438
439 bool isValidState() const override {
440 // This state is always valid, even when the state is false.
441 return true;
442 }
443
444 const std::string getAsStr(Attributor *) const override {
445 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
446 }
447
448 /// See AbstractAttribute::trackStatistics()
449 void trackStatistics() const override {}
450};
451
452AAUniformWorkGroupSize &
453AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
454 Attributor &A) {
456 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
458 "AAUniformWorkGroupSize is only valid for function position");
459}
460
461struct AAAMDAttributesFunction : public AAAMDAttributes {
462 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
463 : AAAMDAttributes(IRP, A) {}
464
465 void initialize(Attributor &A) override {
466 Function *F = getAssociatedFunction();
467
468 // If the function requires the implicit arg pointer due to sanitizers,
469 // assume it's needed even if explicitly marked as not requiring it.
470 // Flat scratch initialization is needed because `asan_malloc_impl`
471 // calls introduced later in pipeline will have flat scratch accesses.
472 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
473 // implementation for `asan_malloc_impl` is updated.
474 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
475 if (HasSanitizerAttrs) {
476 removeAssumedBits(IMPLICIT_ARG_PTR);
477 removeAssumedBits(HOSTCALL_PTR);
478 removeAssumedBits(FLAT_SCRATCH_INIT);
479 }
480
481 for (auto Attr : ImplicitAttrs) {
482 if (HasSanitizerAttrs &&
483 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
484 Attr.first == FLAT_SCRATCH_INIT))
485 continue;
486
487 if (F->hasFnAttribute(Attr.second))
488 addKnownBits(Attr.first);
489 }
490
491 if (F->isDeclaration())
492 return;
493
494 // Ignore functions with graphics calling conventions, these are currently
495 // not allowed to have kernel arguments.
496 if (AMDGPU::isGraphics(F->getCallingConv())) {
497 indicatePessimisticFixpoint();
498 return;
499 }
500 }
501
502 ChangeStatus updateImpl(Attributor &A) override {
503 Function *F = getAssociatedFunction();
504 // The current assumed state used to determine a change.
505 auto OrigAssumed = getAssumed();
506
507 // Check for Intrinsics and propagate attributes.
508 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
509 *this, this->getIRPosition(), DepClassTy::REQUIRED);
510 if (!AAEdges || !AAEdges->isValidState() ||
511 AAEdges->hasNonAsmUnknownCallee())
512 return indicatePessimisticFixpoint();
513
514 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
515
516 bool NeedsImplicit = false;
517 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
518 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
519 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
520 unsigned COV = InfoCache.getCodeObjectVersion();
521
522 for (Function *Callee : AAEdges->getOptimisticEdges()) {
523 Intrinsic::ID IID = Callee->getIntrinsicID();
524 if (IID == Intrinsic::not_intrinsic) {
525 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
526 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
527 if (!AAAMD || !AAAMD->isValidState())
528 return indicatePessimisticFixpoint();
529 *this &= *AAAMD;
530 continue;
531 }
532
533 bool NonKernelOnly = false;
534 ImplicitArgumentMask AttrMask =
535 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
536 HasApertureRegs, SupportsGetDoorbellID, COV);
537 if (AttrMask != NOT_IMPLICIT_INPUT) {
538 if ((IsNonEntryFunc || !NonKernelOnly))
539 removeAssumedBits(AttrMask);
540 }
541 }
542
543 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
544 if (NeedsImplicit)
545 removeAssumedBits(IMPLICIT_ARG_PTR);
546
547 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
548 // Under V5, we need implicitarg_ptr + offsets to access private_base or
549 // shared_base. We do not actually need queue_ptr.
550 if (COV >= 5)
551 removeAssumedBits(IMPLICIT_ARG_PTR);
552 else
553 removeAssumedBits(QUEUE_PTR);
554 }
555
556 if (funcRetrievesMultigridSyncArg(A, COV)) {
557 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
558 "multigrid_sync_arg needs implicitarg_ptr");
559 removeAssumedBits(MULTIGRID_SYNC_ARG);
560 }
561
562 if (funcRetrievesHostcallPtr(A, COV)) {
563 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
564 removeAssumedBits(HOSTCALL_PTR);
565 }
566
567 if (funcRetrievesHeapPtr(A, COV)) {
568 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
569 removeAssumedBits(HEAP_PTR);
570 }
571
572 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
573 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
574 removeAssumedBits(QUEUE_PTR);
575 }
576
577 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
578 removeAssumedBits(LDS_KERNEL_ID);
579 }
580
581 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
582 removeAssumedBits(DEFAULT_QUEUE);
583
584 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
585 removeAssumedBits(COMPLETION_ACTION);
586
587 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
588 removeAssumedBits(FLAT_SCRATCH_INIT);
589
590 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
591 : ChangeStatus::UNCHANGED;
592 }
593
594 ChangeStatus manifest(Attributor &A) override {
596 LLVMContext &Ctx = getAssociatedFunction()->getContext();
597
598 for (auto Attr : ImplicitAttrs) {
599 if (isKnown(Attr.first))
600 AttrList.push_back(Attribute::get(Ctx, Attr.second));
601 }
602
603 return A.manifestAttrs(getIRPosition(), AttrList,
604 /* ForceReplace */ true);
605 }
606
607 const std::string getAsStr(Attributor *) const override {
608 std::string Str;
609 raw_string_ostream OS(Str);
610 OS << "AMDInfo[";
611 for (auto Attr : ImplicitAttrs)
612 if (isAssumed(Attr.first))
613 OS << ' ' << Attr.second;
614 OS << " ]";
615 return OS.str();
616 }
617
618 /// See AbstractAttribute::trackStatistics()
619 void trackStatistics() const override {}
620
621private:
622 bool checkForQueuePtr(Attributor &A) {
623 Function *F = getAssociatedFunction();
624 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
625
626 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
627
628 bool NeedsQueuePtr = false;
629
630 auto CheckAddrSpaceCasts = [&](Instruction &I) {
631 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
632 if (castRequiresQueuePtr(SrcAS)) {
633 NeedsQueuePtr = true;
634 return false;
635 }
636 return true;
637 };
638
639 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
640
641 // `checkForAllInstructions` is much more cheaper than going through all
642 // instructions, try it first.
643
644 // The queue pointer is not needed if aperture regs is present.
645 if (!HasApertureRegs) {
646 bool UsedAssumedInformation = false;
647 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
648 {Instruction::AddrSpaceCast},
649 UsedAssumedInformation);
650 }
651
652 // If we found that we need the queue pointer, nothing else to do.
653 if (NeedsQueuePtr)
654 return true;
655
656 if (!IsNonEntryFunc && HasApertureRegs)
657 return false;
658
659 for (BasicBlock &BB : *F) {
660 for (Instruction &I : BB) {
661 for (const Use &U : I.operands()) {
662 if (const auto *C = dyn_cast<Constant>(U)) {
663 if (InfoCache.needsQueuePtr(C, *F))
664 return true;
665 }
666 }
667 }
668 }
669
670 return false;
671 }
672
673 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
675 AA::RangeTy Range(Pos, 8);
676 return funcRetrievesImplicitKernelArg(A, Range);
677 }
678
679 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
681 AA::RangeTy Range(Pos, 8);
682 return funcRetrievesImplicitKernelArg(A, Range);
683 }
684
685 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
687 AA::RangeTy Range(Pos, 8);
688 return funcRetrievesImplicitKernelArg(A, Range);
689 }
690
691 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
693 AA::RangeTy Range(Pos, 8);
694 return funcRetrievesImplicitKernelArg(A, Range);
695 }
696
697 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
698 if (COV < 5)
699 return false;
701 return funcRetrievesImplicitKernelArg(A, Range);
702 }
703
704 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
705 if (COV < 5)
706 return false;
708 return funcRetrievesImplicitKernelArg(A, Range);
709 }
710
711 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
712 // Check if this is a call to the implicitarg_ptr builtin and it
713 // is used to retrieve the hostcall pointer. The implicit arg for
714 // hostcall is not used only if every use of the implicitarg_ptr
715 // is a load that clearly does not retrieve any byte of the
716 // hostcall pointer. We check this by tracing all the uses of the
717 // initial call to the implicitarg_ptr intrinsic.
718 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
719 auto &Call = cast<CallBase>(I);
720 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
721 return true;
722
723 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
724 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
725 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
726 return false;
727
728 return PointerInfoAA->forallInterferingAccesses(
729 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
730 return Acc.getRemoteInst()->isDroppable();
731 });
732 };
733
734 bool UsedAssumedInformation = false;
735 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
736 UsedAssumedInformation);
737 }
738
739 bool funcRetrievesLDSKernelId(Attributor &A) {
740 auto DoesNotRetrieve = [&](Instruction &I) {
741 auto &Call = cast<CallBase>(I);
742 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
743 };
744 bool UsedAssumedInformation = false;
745 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
746 UsedAssumedInformation);
747 }
748
749 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
750 // not to be set.
751 bool needFlatScratchInit(Attributor &A) {
752 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
753
754 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
755 // there is a cast from PRIVATE_ADDRESS.
756 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
757 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
759 };
760
761 bool UsedAssumedInformation = false;
762 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
763 {Instruction::AddrSpaceCast},
764 UsedAssumedInformation))
765 return true;
766
767 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
768 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
769
770 Function *F = getAssociatedFunction();
771 for (Instruction &I : instructions(F)) {
772 for (const Use &U : I.operands()) {
773 if (const auto *C = dyn_cast<Constant>(U)) {
774 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
775 return true;
776 }
777 }
778 }
779
780 // Finally check callees.
781
782 // This is called on each callee; false means callee shouldn't have
783 // no-flat-scratch-init.
784 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
785 const auto &CB = cast<CallBase>(I);
786 const Function *Callee = CB.getCalledFunction();
787
788 // Callee == 0 for inline asm or indirect call with known callees.
789 // In the latter case, updateImpl() already checked the callees and we
790 // know their FLAT_SCRATCH_INIT bit is set.
791 // If function has indirect call with unknown callees, the bit is
792 // already removed in updateImpl() and execution won't reach here.
793 if (!Callee)
794 return true;
795
796 return Callee->getIntrinsicID() !=
797 Intrinsic::amdgcn_addrspacecast_nonnull;
798 };
799
800 UsedAssumedInformation = false;
801 // If any callee is false (i.e. need FlatScratchInit),
802 // checkForAllCallLikeInstructions returns false, in which case this
803 // function returns true.
804 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
805 UsedAssumedInformation);
806 }
807};
808
809AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
810 Attributor &A) {
812 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
813 llvm_unreachable("AAAMDAttributes is only valid for function position");
814}
815
816/// Base class to derive different size ranges.
817struct AAAMDSizeRangeAttribute
818 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
819 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
820
821 StringRef AttrName;
822
823 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
824 StringRef AttrName)
825 : Base(IRP, 32), AttrName(AttrName) {}
826
827 /// See AbstractAttribute::trackStatistics()
828 void trackStatistics() const override {}
829
830 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
831 ChangeStatus Change = ChangeStatus::UNCHANGED;
832
833 auto CheckCallSite = [&](AbstractCallSite CS) {
834 Function *Caller = CS.getInstruction()->getFunction();
835 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
836 << "->" << getAssociatedFunction()->getName() << '\n');
837
838 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
839 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
840 if (!CallerInfo || !CallerInfo->isValidState())
841 return false;
842
843 Change |=
844 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
845
846 return true;
847 };
848
849 bool AllCallSitesKnown = true;
850 if (!A.checkForAllCallSites(CheckCallSite, *this,
851 /*RequireAllCallSites=*/true,
852 AllCallSitesKnown))
853 return indicatePessimisticFixpoint();
854
855 return Change;
856 }
857
858 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
859 /// attribute if it is not same as default.
861 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
862 std::pair<unsigned, unsigned> Default) {
863 auto [Min, Max] = Default;
864 unsigned Lower = getAssumed().getLower().getZExtValue();
865 unsigned Upper = getAssumed().getUpper().getZExtValue();
866
867 // Clamp the range to the default value.
868 if (Lower < Min)
869 Lower = Min;
870 if (Upper > Max + 1)
871 Upper = Max + 1;
872
873 // No manifest if the value is invalid or same as default after clamp.
874 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
875 return ChangeStatus::UNCHANGED;
876
877 Function *F = getAssociatedFunction();
878 LLVMContext &Ctx = F->getContext();
879 SmallString<10> Buffer;
880 raw_svector_ostream OS(Buffer);
881 OS << Lower << ',' << Upper - 1;
882 return A.manifestAttrs(getIRPosition(),
883 {Attribute::get(Ctx, AttrName, OS.str())},
884 /*ForceReplace=*/true);
885 }
886
887 const std::string getAsStr(Attributor *) const override {
888 std::string Str;
889 raw_string_ostream OS(Str);
890 OS << getName() << '[';
891 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
892 OS << ']';
893 return OS.str();
894 }
895};
896
897/// Propagate amdgpu-flat-work-group-size attribute.
898struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
899 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
900 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
901
902 void initialize(Attributor &A) override {
903 Function *F = getAssociatedFunction();
904 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
905
906 bool HasAttr = false;
907 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
908 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
909
910 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
911 // We only consider an attribute that is not max range because the front
912 // end always emits the attribute, unfortunately, and sometimes it emits
913 // the max range.
914 if (*Attr != MaxRange) {
915 Range = *Attr;
916 HasAttr = true;
917 }
918 }
919
920 // We don't want to directly clamp the state if it's the max range because
921 // that is basically the worst state.
922 if (Range == MaxRange)
923 return;
924
925 auto [Min, Max] = Range;
926 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
927 IntegerRangeState IRS(CR);
928 clampStateAndIndicateChange(this->getState(), IRS);
929
930 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
931 indicateOptimisticFixpoint();
932 }
933
934 ChangeStatus updateImpl(Attributor &A) override {
935 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
936 }
937
938 /// Create an abstract attribute view for the position \p IRP.
939 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
940 Attributor &A);
941
942 ChangeStatus manifest(Attributor &A) override {
943 Function *F = getAssociatedFunction();
944 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
945 return emitAttributeIfNotDefaultAfterClamp(
946 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
947 }
948
949 /// See AbstractAttribute::getName()
950 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
951
952 /// See AbstractAttribute::getIdAddr()
953 const char *getIdAddr() const override { return &ID; }
954
955 /// This function should return true if the type of the \p AA is
956 /// AAAMDFlatWorkGroupSize
957 static bool classof(const AbstractAttribute *AA) {
958 return (AA->getIdAddr() == &ID);
959 }
960
961 /// Unique ID (due to the unique address)
962 static const char ID;
963};
964
965const char AAAMDFlatWorkGroupSize::ID = 0;
966
967AAAMDFlatWorkGroupSize &
968AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
969 Attributor &A) {
971 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
973 "AAAMDFlatWorkGroupSize is only valid for function position");
974}
975
976struct TupleDecIntegerRangeState : public AbstractState {
977 DecIntegerState<uint32_t> X, Y, Z;
978
979 bool isValidState() const override {
980 return X.isValidState() && Y.isValidState() && Z.isValidState();
981 }
982
983 bool isAtFixpoint() const override {
984 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
985 }
986
987 ChangeStatus indicateOptimisticFixpoint() override {
988 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
989 Z.indicateOptimisticFixpoint();
990 }
991
992 ChangeStatus indicatePessimisticFixpoint() override {
993 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
994 Z.indicatePessimisticFixpoint();
995 }
996
997 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
998 X ^= Other.X;
999 Y ^= Other.Y;
1000 Z ^= Other.Z;
1001 return *this;
1002 }
1003
1004 bool operator==(const TupleDecIntegerRangeState &Other) const {
1005 return X == Other.X && Y == Other.Y && Z == Other.Z;
1006 }
1007
1008 TupleDecIntegerRangeState &getAssumed() { return *this; }
1009 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
1010};
1011
1012using AAAMDMaxNumWorkgroupsState =
1013 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1014
1015/// Propagate amdgpu-max-num-workgroups attribute.
1016struct AAAMDMaxNumWorkgroups
1017 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1018 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1019
1020 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1021
1022 void initialize(Attributor &A) override {
1023 Function *F = getAssociatedFunction();
1024 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1025
1026 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1027
1028 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1029 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1030 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1031
1032 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1033 indicatePessimisticFixpoint();
1034 }
1035
1036 ChangeStatus updateImpl(Attributor &A) override {
1037 ChangeStatus Change = ChangeStatus::UNCHANGED;
1038
1039 auto CheckCallSite = [&](AbstractCallSite CS) {
1040 Function *Caller = CS.getInstruction()->getFunction();
1041 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1042 << "->" << getAssociatedFunction()->getName() << '\n');
1043
1044 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1045 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1046 if (!CallerInfo || !CallerInfo->isValidState())
1047 return false;
1048
1049 Change |=
1050 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1051 return true;
1052 };
1053
1054 bool AllCallSitesKnown = true;
1055 if (!A.checkForAllCallSites(CheckCallSite, *this,
1056 /*RequireAllCallSites=*/true,
1057 AllCallSitesKnown))
1058 return indicatePessimisticFixpoint();
1059
1060 return Change;
1061 }
1062
1063 /// Create an abstract attribute view for the position \p IRP.
1064 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1065 Attributor &A);
1066
1067 ChangeStatus manifest(Attributor &A) override {
1068 Function *F = getAssociatedFunction();
1069 LLVMContext &Ctx = F->getContext();
1070 SmallString<32> Buffer;
1071 raw_svector_ostream OS(Buffer);
1072 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1073
1074 // TODO: Should annotate loads of the group size for this to do anything
1075 // useful.
1076 return A.manifestAttrs(
1077 getIRPosition(),
1078 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1079 /* ForceReplace= */ true);
1080 }
1081
1082 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1083
1084 const std::string getAsStr(Attributor *) const override {
1085 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1086 raw_string_ostream OS(Buffer);
1087 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1088 << ']';
1089 return OS.str();
1090 }
1091
1092 const char *getIdAddr() const override { return &ID; }
1093
1094 /// This function should return true if the type of the \p AA is
1095 /// AAAMDMaxNumWorkgroups
1096 static bool classof(const AbstractAttribute *AA) {
1097 return (AA->getIdAddr() == &ID);
1098 }
1099
1100 void trackStatistics() const override {}
1101
1102 /// Unique ID (due to the unique address)
1103 static const char ID;
1104};
1105
1106const char AAAMDMaxNumWorkgroups::ID = 0;
1107
1108AAAMDMaxNumWorkgroups &
1109AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1111 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1112 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1113}
1114
1115/// Propagate amdgpu-waves-per-eu attribute.
1116struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1117 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1118 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1119
1120 void initialize(Attributor &A) override {
1121 Function *F = getAssociatedFunction();
1122 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1123
1124 // If the attribute exists, we will honor it if it is not the default.
1125 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1126 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1127 1U, InfoCache.getMaxWavesPerEU(*F)};
1128 if (*Attr != MaxWavesPerEURange) {
1129 auto [Min, Max] = *Attr;
1130 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1131 IntegerRangeState RangeState(Range);
1132 this->getState() = RangeState;
1133 indicateOptimisticFixpoint();
1134 return;
1135 }
1136 }
1137
1138 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1139 indicatePessimisticFixpoint();
1140 }
1141
1142 ChangeStatus updateImpl(Attributor &A) override {
1143 ChangeStatus Change = ChangeStatus::UNCHANGED;
1144
1145 auto CheckCallSite = [&](AbstractCallSite CS) {
1146 Function *Caller = CS.getInstruction()->getFunction();
1147 Function *Func = getAssociatedFunction();
1148 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1149 << "->" << Func->getName() << '\n');
1150 (void)Func;
1151
1152 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1153 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1154 if (!CallerAA || !CallerAA->isValidState())
1155 return false;
1156
1157 ConstantRange Assumed = getAssumed();
1158 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1159 CallerAA->getAssumed().getLower().getZExtValue());
1160 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1161 CallerAA->getAssumed().getUpper().getZExtValue());
1162 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1163 IntegerRangeState RangeState(Range);
1164 getState() = RangeState;
1165 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1166 : ChangeStatus::CHANGED;
1167
1168 return true;
1169 };
1170
1171 bool AllCallSitesKnown = true;
1172 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1173 return indicatePessimisticFixpoint();
1174
1175 return Change;
1176 }
1177
1178 /// Create an abstract attribute view for the position \p IRP.
1179 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1180 Attributor &A);
1181
1182 ChangeStatus manifest(Attributor &A) override {
1183 Function *F = getAssociatedFunction();
1184 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1185 return emitAttributeIfNotDefaultAfterClamp(
1186 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1187 }
1188
1189 /// See AbstractAttribute::getName()
1190 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1191
1192 /// See AbstractAttribute::getIdAddr()
1193 const char *getIdAddr() const override { return &ID; }
1194
1195 /// This function should return true if the type of the \p AA is
1196 /// AAAMDWavesPerEU
1197 static bool classof(const AbstractAttribute *AA) {
1198 return (AA->getIdAddr() == &ID);
1199 }
1200
1201 /// Unique ID (due to the unique address)
1202 static const char ID;
1203};
1204
1205const char AAAMDWavesPerEU::ID = 0;
1206
1207AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1208 Attributor &A) {
1210 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1211 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1212}
1213
1214static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
1215 for (const auto &CI : IA->ParseConstraints()) {
1216 for (StringRef Code : CI.Codes) {
1217 Code.consume_front("{");
1218 if (Code.starts_with("a"))
1219 return true;
1220 }
1221 }
1222
1223 return false;
1224}
1225
1226// TODO: Migrate to range merge of amdgpu-agpr-alloc.
1227struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
1228 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1229 AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1230
1231 static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
1232 Attributor &A) {
1234 return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
1235 llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
1236 }
1237
1238 void initialize(Attributor &A) override {
1239 Function *F = getAssociatedFunction();
1240 auto [MinNumAGPR, MaxNumAGPR] =
1241 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1242 /*OnlyFirstRequired=*/true);
1243 if (MinNumAGPR == 0)
1244 indicateOptimisticFixpoint();
1245 }
1246
1247 const std::string getAsStr(Attributor *A) const override {
1248 return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
1249 }
1250
1251 void trackStatistics() const override {}
1252
1253 ChangeStatus updateImpl(Attributor &A) override {
1254 // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1255
1256 auto CheckForNoAGPRs = [&](Instruction &I) {
1257 const auto &CB = cast<CallBase>(I);
1258 const Value *CalleeOp = CB.getCalledOperand();
1259 const Function *Callee = dyn_cast<Function>(CalleeOp);
1260 if (!Callee) {
1261 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1262 return !inlineAsmUsesAGPRs(IA);
1263 return false;
1264 }
1265
1266 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1267 // required to use AGPRs.
1268 if (Callee->isIntrinsic())
1269 return true;
1270
1271 // TODO: Handle callsite attributes
1272 const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
1273 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
1274 return CalleeInfo && CalleeInfo->isValidState() &&
1275 CalleeInfo->getAssumed();
1276 };
1277
1278 bool UsedAssumedInformation = false;
1279 if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
1280 UsedAssumedInformation))
1281 return indicatePessimisticFixpoint();
1282 return ChangeStatus::UNCHANGED;
1283 }
1284
1285 ChangeStatus manifest(Attributor &A) override {
1286 if (!getAssumed())
1287 return ChangeStatus::UNCHANGED;
1288 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1289 return A.manifestAttrs(getIRPosition(),
1290 {Attribute::get(Ctx, "amdgpu-agpr-alloc", "0")});
1291 }
1292
1293 StringRef getName() const override { return "AAAMDGPUNoAGPR"; }
1294 const char *getIdAddr() const override { return &ID; }
1295
1296 /// This function should return true if the type of the \p AA is
1297 /// AAAMDGPUNoAGPRs
1298 static bool classof(const AbstractAttribute *AA) {
1299 return (AA->getIdAddr() == &ID);
1300 }
1301
1302 static const char ID;
1303};
1304
1305const char AAAMDGPUNoAGPR::ID = 0;
1306
1307/// An abstract attribute to propagate the function attribute
1308/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1309struct AAAMDGPUClusterDims
1310 : public StateWrapper<BooleanState, AbstractAttribute> {
1311 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1312 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1313
1314 /// Create an abstract attribute view for the position \p IRP.
1315 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1316 Attributor &A);
1317
1318 /// See AbstractAttribute::getName().
1319 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1320
1321 /// See AbstractAttribute::getIdAddr().
1322 const char *getIdAddr() const override { return &ID; }
1323
1324 /// This function should return true if the type of the \p AA is
1325 /// AAAMDGPUClusterDims.
1326 static bool classof(const AbstractAttribute *AA) {
1327 return AA->getIdAddr() == &ID;
1328 }
1329
1330 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1331
1332 /// Unique ID (due to the unique address)
1333 static const char ID;
1334};
1335
1336const char AAAMDGPUClusterDims::ID = 0;
1337
1338struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1339 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1340 : AAAMDGPUClusterDims(IRP, A) {}
1341
1342 void initialize(Attributor &A) override {
1343 Function *F = getAssociatedFunction();
1344 assert(F && "empty associated function");
1345
1347
1348 // No matter what a kernel function has, it is final.
1349 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1350 if (Attr.isUnknown())
1351 indicatePessimisticFixpoint();
1352 else
1353 indicateOptimisticFixpoint();
1354 }
1355 }
1356
1357 const std::string getAsStr(Attributor *A) const override {
1358 if (!getAssumed() || Attr.isUnknown())
1359 return "unknown";
1360 if (Attr.isNoCluster())
1361 return "no";
1362 if (Attr.isVariableDims())
1363 return "variable";
1364 return Attr.to_string();
1365 }
1366
1367 void trackStatistics() const override {}
1368
1369 ChangeStatus updateImpl(Attributor &A) override {
1370 auto OldState = Attr;
1371
1372 auto CheckCallSite = [&](AbstractCallSite CS) {
1373 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1374 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1375 DepClassTy::REQUIRED);
1376 if (!CallerAA || !CallerAA->isValidState())
1377 return false;
1378
1379 return merge(CallerAA->getClusterDims());
1380 };
1381
1382 bool UsedAssumedInformation = false;
1383 if (!A.checkForAllCallSites(CheckCallSite, *this,
1384 /*RequireAllCallSites=*/true,
1385 UsedAssumedInformation))
1386 return indicatePessimisticFixpoint();
1387
1388 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1389 }
1390
1391 ChangeStatus manifest(Attributor &A) override {
1392 if (Attr.isUnknown())
1393 return ChangeStatus::UNCHANGED;
1394 return A.manifestAttrs(
1395 getIRPosition(),
1396 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1397 Attr.to_string())},
1398 /*ForceReplace=*/true);
1399 }
1400
1401 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1402 return Attr;
1403 }
1404
1405private:
1406 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1407 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1408 // propagation.
1409 if (Attr.isUnknown() && Other.isUnknown())
1410 return true;
1411
1412 // Case 2: The other is determined, but we are unknown yet, we simply take
1413 // the other's value.
1414 if (Attr.isUnknown()) {
1415 Attr = Other;
1416 return true;
1417 }
1418
1419 // Case 3: We are determined but the other is unknown yet, we simply keep
1420 // everything unchanged.
1421 if (Other.isUnknown())
1422 return true;
1423
1424 // After this point, both are determined.
1425
1426 // Case 4: If they are same, we do nothing.
1427 if (Attr == Other)
1428 return true;
1429
1430 // Now they are not same.
1431
1432 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1433 // would hold), then it is unknown whether cluster will be used, and the
1434 // state is final, unlike case 1.
1435 if (Attr.isNoCluster() || Other.isNoCluster()) {
1436 Attr.setUnknown();
1437 return false;
1438 }
1439
1440 // Case 6: Both of us use cluster, but the dims are different, so the result
1441 // is, cluster is used, but we just don't have a fixed dims.
1442 Attr.setVariableDims();
1443 return true;
1444 }
1445
1446 AMDGPU::ClusterDimsAttr Attr;
1447
1448 static constexpr const char AttrName[] = "amdgpu-cluster-dims";
1449};
1450
1451AAAMDGPUClusterDims &
1452AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1454 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1455 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1456}
1457
1458static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1459 AMDGPUAttributorOptions Options,
1460 ThinOrFullLTOPhase LTOPhase) {
1461 SetVector<Function *> Functions;
1462 for (Function &F : M) {
1463 if (!F.isIntrinsic())
1464 Functions.insert(&F);
1465 }
1466
1467 CallGraphUpdater CGUpdater;
1469 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1470 DenseSet<const char *> Allowed(
1471 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1472 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1473 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1476 &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1477
1478 AttributorConfig AC(CGUpdater);
1479 AC.IsClosedWorldModule = Options.IsClosedWorld;
1480 AC.Allowed = &Allowed;
1481 AC.IsModulePass = true;
1482 AC.DefaultInitializeLiveInternals = false;
1483 AC.IndirectCalleeSpecializationCallback =
1484 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1485 Function &Callee, unsigned NumAssumedCallees) {
1486 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1487 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1488 };
1489 AC.IPOAmendableCB = [](const Function &F) {
1490 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1491 };
1492
1493 Attributor A(Functions, InfoCache, AC);
1494
1495 LLVM_DEBUG({
1496 StringRef LTOPhaseStr = to_string(LTOPhase);
1497 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1498 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1499 << (AC.IsClosedWorldModule ? "" : "not ")
1500 << "assumed to be a closed world.\n";
1501 });
1502
1503 for (auto *F : Functions) {
1504 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1505 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1506 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1507 A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
1508 CallingConv::ID CC = F->getCallingConv();
1509 if (!AMDGPU::isEntryFunctionCC(CC)) {
1510 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1511 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1512 }
1513
1514 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1515 if (!F->isDeclaration() && ST.hasClusters())
1516 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1517
1518 for (auto &I : instructions(F)) {
1519 Value *Ptr = nullptr;
1520 if (auto *LI = dyn_cast<LoadInst>(&I))
1521 Ptr = LI->getPointerOperand();
1522 else if (auto *SI = dyn_cast<StoreInst>(&I))
1523 Ptr = SI->getPointerOperand();
1524 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1525 Ptr = RMW->getPointerOperand();
1526 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1527 Ptr = CmpX->getPointerOperand();
1528
1529 if (Ptr) {
1530 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1531 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1532 }
1533 }
1534 }
1535
1536 return A.run() == ChangeStatus::CHANGED;
1537}
1538} // namespace
1539
1542
1545 AnalysisGetter AG(FAM);
1546
1547 // TODO: Probably preserves CFG
1548 return runImpl(M, AG, TM, Options, LTOPhase) ? PreservedAnalyses::none()
1550}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:992
AMD GCN specific subclass of TargetSubtarget.
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
unsigned getAddressSpace() const
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:115
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
@ Other
Any other memory.
Definition ModRef.h:68
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Helper to tie a abstract state implementation to an abstract attribute.