16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
21#define DEBUG_TYPE "amdgpu-attributor"
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34#include "AMDGPUAttributes.def"
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
42#include "AMDGPUAttributes.def"
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49#include "AMDGPUAttributes.def"
59 bool HasApertureRegs,
bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
62 case Intrinsic::amdgcn_workitem_id_x:
65 case Intrinsic::amdgcn_workgroup_id_x:
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_cluster_id_x:
83 case Intrinsic::amdgcn_cluster_id_y:
85 case Intrinsic::amdgcn_cluster_id_z:
87 case Intrinsic::amdgcn_lds_kernel_id:
89 case Intrinsic::amdgcn_dispatch_ptr:
91 case Intrinsic::amdgcn_dispatch_id:
93 case Intrinsic::amdgcn_implicitarg_ptr:
94 return IMPLICIT_ARG_PTR;
97 case Intrinsic::amdgcn_queue_ptr:
100 case Intrinsic::amdgcn_is_shared:
101 case Intrinsic::amdgcn_is_private:
109 case Intrinsic::trap:
110 case Intrinsic::debugtrap:
111 case Intrinsic::ubsantrap:
112 if (SupportsGetDoorBellID)
136 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
137 F.hasFnAttribute(Attribute::SanitizeThread) ||
138 F.hasFnAttribute(Attribute::SanitizeMemory) ||
139 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
140 F.hasFnAttribute(Attribute::SanitizeMemTag);
146 AMDGPUInformationCache(
const Module &M, AnalysisGetter &AG,
148 SetVector<Function *> *
CGSCC, TargetMachine &TM)
154 enum ConstantStatus : uint8_t {
157 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
158 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
159 ADDR_SPACE_CAST_BOTH_TO_FLAT =
160 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
164 bool hasApertureRegs(Function &
F) {
165 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
166 return ST.hasApertureRegs();
170 bool supportsGetDoorbellID(Function &
F) {
171 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
172 return ST.supportsGetDoorbellID();
175 std::optional<std::pair<unsigned, unsigned>>
176 getFlatWorkGroupSizeAttr(
const Function &
F)
const {
180 return std::make_pair(
R->first, *(
R->second));
183 std::pair<unsigned, unsigned>
184 getDefaultFlatWorkGroupSize(
const Function &
F)
const {
185 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
186 return ST.getDefaultFlatWorkGroupSize(
F.getCallingConv());
189 std::pair<unsigned, unsigned>
190 getMaximumFlatWorkGroupRange(
const Function &
F) {
191 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
192 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
195 SmallVector<unsigned> getMaxNumWorkGroups(
const Function &
F) {
196 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
197 return ST.getMaxNumWorkGroups(
F);
201 unsigned getCodeObjectVersion()
const {
return CodeObjectVersion; }
206 std::pair<unsigned, unsigned>
207 getWavesPerEU(
const Function &
F,
208 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
209 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
210 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(
F),
F);
213 std::optional<std::pair<unsigned, unsigned>>
214 getWavesPerEUAttr(
const Function &
F) {
220 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
221 Val->second =
ST.getMaxWavesPerEU();
223 return std::make_pair(Val->first, *(Val->second));
226 std::pair<unsigned, unsigned>
227 getEffectiveWavesPerEU(
const Function &
F,
228 std::pair<unsigned, unsigned> WavesPerEU,
229 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
230 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
231 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
236 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
237 return ST.getMaxWavesPerEU();
240 unsigned getMaxAddrSpace()
const override {
247 static uint8_t visitConstExpr(
const ConstantExpr *CE) {
248 uint8_t Status = NONE;
250 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
251 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
253 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
255 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
263 static unsigned getLDSSize(
const Function &
F) {
265 {0, UINT32_MAX},
true)
270 uint8_t getConstantAccess(
const Constant *
C,
271 SmallPtrSetImpl<const Constant *> &Visited) {
272 auto It = ConstantStatus.find(
C);
273 if (It != ConstantStatus.end())
281 Result |= visitConstExpr(CE);
283 for (
const Use &U :
C->operands()) {
285 if (!OpC || !Visited.
insert(OpC).second)
288 Result |= getConstantAccess(OpC, Visited);
295 bool needsQueuePtr(
const Constant *
C, Function &Fn) {
297 bool HasAperture = hasApertureRegs(Fn);
300 if (!IsNonEntryFunc && HasAperture)
303 SmallPtrSet<const Constant *, 8> Visited;
304 uint8_t
Access = getConstantAccess(
C, Visited);
307 if (IsNonEntryFunc && (
Access & DS_GLOBAL))
310 return !HasAperture && (
Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
313 bool checkConstForAddrSpaceCastFromPrivate(
const Constant *
C) {
314 SmallPtrSet<const Constant *, 8> Visited;
315 uint8_t
Access = getConstantAccess(
C, Visited);
316 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
321 DenseMap<const Constant *, uint8_t> ConstantStatus;
322 const unsigned CodeObjectVersion;
325struct AAAMDAttributes
326 :
public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
328 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
331 AAAMDAttributes(
const IRPosition &IRP, Attributor &
A) : Base(IRP) {}
334 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
338 StringRef
getName()
const override {
return "AAAMDAttributes"; }
341 const char *getIdAddr()
const override {
return &ID; }
345 static bool classof(
const AbstractAttribute *AA) {
350 static const char ID;
352const char AAAMDAttributes::ID = 0;
354struct AAUniformWorkGroupSize
355 :
public StateWrapper<BooleanState, AbstractAttribute> {
356 using Base = StateWrapper<BooleanState, AbstractAttribute>;
357 AAUniformWorkGroupSize(
const IRPosition &IRP, Attributor &
A) : Base(IRP) {}
360 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
364 StringRef
getName()
const override {
return "AAUniformWorkGroupSize"; }
367 const char *getIdAddr()
const override {
return &ID; }
371 static bool classof(
const AbstractAttribute *AA) {
376 static const char ID;
378const char AAUniformWorkGroupSize::ID = 0;
380struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
381 AAUniformWorkGroupSizeFunction(
const IRPosition &IRP, Attributor &
A)
382 : AAUniformWorkGroupSize(IRP,
A) {}
386 CallingConv::ID CC =
F->getCallingConv();
388 if (CC != CallingConv::AMDGPU_KERNEL)
391 bool InitialValue =
false;
392 if (
F->hasFnAttribute(
"uniform-work-group-size"))
394 F->getFnAttribute(
"uniform-work-group-size").getValueAsString() ==
398 indicateOptimisticFixpoint();
400 indicatePessimisticFixpoint();
406 auto CheckCallSite = [&](AbstractCallSite CS) {
409 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
411 const auto *CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
413 if (!CallerInfo || !CallerInfo->isValidState())
417 CallerInfo->getState());
422 bool AllCallSitesKnown =
true;
423 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
424 return indicatePessimisticFixpoint();
431 LLVMContext &Ctx = getAssociatedFunction()->getContext();
433 AttrList.
push_back(Attribute::get(Ctx,
"uniform-work-group-size",
434 getAssumed() ?
"true" :
"false"));
435 return A.manifestAttrs(getIRPosition(), AttrList,
439 bool isValidState()
const override {
444 const std::string getAsStr(Attributor *)
const override {
445 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) +
"]";
449 void trackStatistics()
const override {}
452AAUniformWorkGroupSize &
453AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
456 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP,
A);
458 "AAUniformWorkGroupSize is only valid for function position");
461struct AAAMDAttributesFunction :
public AAAMDAttributes {
462 AAAMDAttributesFunction(
const IRPosition &IRP, Attributor &
A)
463 : AAAMDAttributes(IRP,
A) {}
475 if (HasSanitizerAttrs) {
476 removeAssumedBits(IMPLICIT_ARG_PTR);
477 removeAssumedBits(HOSTCALL_PTR);
478 removeAssumedBits(FLAT_SCRATCH_INIT);
482 if (HasSanitizerAttrs &&
483 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
484 Attr.first == FLAT_SCRATCH_INIT))
487 if (
F->hasFnAttribute(Attr.second))
488 addKnownBits(Attr.first);
491 if (
F->isDeclaration())
497 indicatePessimisticFixpoint();
505 auto OrigAssumed = getAssumed();
508 const AACallEdges *AAEdges =
A.getAAFor<AACallEdges>(
509 *
this, this->getIRPosition(), DepClassTy::REQUIRED);
512 return indicatePessimisticFixpoint();
516 bool NeedsImplicit =
false;
517 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
518 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
519 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
520 unsigned COV = InfoCache.getCodeObjectVersion();
525 const AAAMDAttributes *AAAMD =
A.getAAFor<AAAMDAttributes>(
527 if (!AAAMD || !AAAMD->isValidState())
528 return indicatePessimisticFixpoint();
533 bool NonKernelOnly =
false;
536 HasApertureRegs, SupportsGetDoorbellID, COV);
538 if ((IsNonEntryFunc || !NonKernelOnly))
539 removeAssumedBits(AttrMask);
545 removeAssumedBits(IMPLICIT_ARG_PTR);
547 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(
A)) {
551 removeAssumedBits(IMPLICIT_ARG_PTR);
553 removeAssumedBits(QUEUE_PTR);
556 if (funcRetrievesMultigridSyncArg(
A, COV)) {
557 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
558 "multigrid_sync_arg needs implicitarg_ptr");
559 removeAssumedBits(MULTIGRID_SYNC_ARG);
562 if (funcRetrievesHostcallPtr(
A, COV)) {
563 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
564 removeAssumedBits(HOSTCALL_PTR);
567 if (funcRetrievesHeapPtr(
A, COV)) {
568 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
569 removeAssumedBits(HEAP_PTR);
572 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(
A, COV)) {
573 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
574 removeAssumedBits(QUEUE_PTR);
577 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(
A)) {
578 removeAssumedBits(LDS_KERNEL_ID);
581 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(
A, COV))
582 removeAssumedBits(DEFAULT_QUEUE);
584 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(
A, COV))
585 removeAssumedBits(COMPLETION_ACTION);
587 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(
A))
588 removeAssumedBits(FLAT_SCRATCH_INIT);
590 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
591 : ChangeStatus::UNCHANGED;
596 LLVMContext &Ctx = getAssociatedFunction()->getContext();
599 if (isKnown(Attr.first))
600 AttrList.
push_back(Attribute::get(Ctx, Attr.second));
603 return A.manifestAttrs(getIRPosition(), AttrList,
607 const std::string getAsStr(Attributor *)
const override {
609 raw_string_ostream OS(Str);
612 if (isAssumed(Attr.first))
613 OS <<
' ' << Attr.second;
619 void trackStatistics()
const override {}
622 bool checkForQueuePtr(Attributor &
A) {
626 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
628 bool NeedsQueuePtr =
false;
631 unsigned SrcAS =
static_cast<AddrSpaceCastInst &
>(
I).getSrcAddressSpace();
633 NeedsQueuePtr =
true;
639 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
645 if (!HasApertureRegs) {
646 bool UsedAssumedInformation =
false;
647 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
648 {Instruction::AddrSpaceCast},
649 UsedAssumedInformation);
656 if (!IsNonEntryFunc && HasApertureRegs)
659 for (BasicBlock &BB : *
F) {
660 for (Instruction &
I : BB) {
661 for (
const Use &U :
I.operands()) {
663 if (InfoCache.needsQueuePtr(
C, *
F))
673 bool funcRetrievesMultigridSyncArg(Attributor &
A,
unsigned COV) {
675 AA::RangeTy
Range(Pos, 8);
676 return funcRetrievesImplicitKernelArg(
A,
Range);
679 bool funcRetrievesHostcallPtr(Attributor &
A,
unsigned COV) {
681 AA::RangeTy
Range(Pos, 8);
682 return funcRetrievesImplicitKernelArg(
A,
Range);
685 bool funcRetrievesDefaultQueue(Attributor &
A,
unsigned COV) {
687 AA::RangeTy
Range(Pos, 8);
688 return funcRetrievesImplicitKernelArg(
A,
Range);
691 bool funcRetrievesCompletionAction(Attributor &
A,
unsigned COV) {
693 AA::RangeTy
Range(Pos, 8);
694 return funcRetrievesImplicitKernelArg(
A,
Range);
697 bool funcRetrievesHeapPtr(Attributor &
A,
unsigned COV) {
701 return funcRetrievesImplicitKernelArg(
A,
Range);
704 bool funcRetrievesQueuePtr(Attributor &
A,
unsigned COV) {
708 return funcRetrievesImplicitKernelArg(
A,
Range);
711 bool funcRetrievesImplicitKernelArg(Attributor &
A, AA::RangeTy
Range) {
723 const auto *PointerInfoAA =
A.getAAFor<AAPointerInfo>(
725 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
728 return PointerInfoAA->forallInterferingAccesses(
729 Range, [](
const AAPointerInfo::Access &Acc,
bool IsExact) {
734 bool UsedAssumedInformation =
false;
735 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
736 UsedAssumedInformation);
739 bool funcRetrievesLDSKernelId(Attributor &
A) {
744 bool UsedAssumedInformation =
false;
745 return !
A.checkForAllCallLikeInstructions(DoesNotRetrieve, *
this,
746 UsedAssumedInformation);
751 bool needFlatScratchInit(Attributor &
A) {
752 assert(isAssumed(FLAT_SCRATCH_INIT));
761 bool UsedAssumedInformation =
false;
762 if (!
A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *
this,
763 {Instruction::AddrSpaceCast},
764 UsedAssumedInformation))
768 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
772 for (
const Use &U :
I.operands()) {
774 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(
C))
796 return Callee->getIntrinsicID() !=
797 Intrinsic::amdgcn_addrspacecast_nonnull;
800 UsedAssumedInformation =
false;
804 return !
A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *
this,
805 UsedAssumedInformation);
809AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
812 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP,
A);
817struct AAAMDSizeRangeAttribute
818 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
819 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
823 AAAMDSizeRangeAttribute(
const IRPosition &IRP, Attributor &
A,
825 :
Base(IRP, 32), AttrName(AttrName) {}
828 void trackStatistics()
const override {}
830 template <
class AttributeImpl>
ChangeStatus updateImplImpl(Attributor &
A) {
833 auto CheckCallSite = [&](AbstractCallSite CS) {
836 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
838 const auto *CallerInfo =
A.getAAFor<AttributeImpl>(
840 if (!CallerInfo || !CallerInfo->isValidState())
849 bool AllCallSitesKnown =
true;
850 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
853 return indicatePessimisticFixpoint();
861 emitAttributeIfNotDefaultAfterClamp(Attributor &
A,
862 std::pair<unsigned, unsigned>
Default) {
864 unsigned Lower = getAssumed().getLower().getZExtValue();
865 unsigned Upper = getAssumed().getUpper().getZExtValue();
875 return ChangeStatus::UNCHANGED;
878 LLVMContext &Ctx =
F->getContext();
879 SmallString<10> Buffer;
880 raw_svector_ostream OS(Buffer);
882 return A.manifestAttrs(getIRPosition(),
883 {Attribute::get(Ctx, AttrName, OS.str())},
887 const std::string getAsStr(Attributor *)
const override {
889 raw_string_ostream OS(Str);
891 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
898struct AAAMDFlatWorkGroupSize :
public AAAMDSizeRangeAttribute {
899 AAAMDFlatWorkGroupSize(
const IRPosition &IRP, Attributor &
A)
900 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-flat-work-group-size") {}
904 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
906 bool HasAttr =
false;
907 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*
F);
908 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*
F);
910 if (
auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*
F)) {
914 if (*Attr != MaxRange) {
922 if (
Range == MaxRange)
926 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
927 IntegerRangeState IRS(CR);
931 indicateOptimisticFixpoint();
935 return updateImplImpl<AAAMDFlatWorkGroupSize>(
A);
939 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
944 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
945 return emitAttributeIfNotDefaultAfterClamp(
946 A, InfoCache.getMaximumFlatWorkGroupRange(*
F));
950 StringRef
getName()
const override {
return "AAAMDFlatWorkGroupSize"; }
953 const char *getIdAddr()
const override {
return &
ID; }
957 static bool classof(
const AbstractAttribute *AA) {
962 static const char ID;
965const char AAAMDFlatWorkGroupSize::ID = 0;
967AAAMDFlatWorkGroupSize &
968AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
971 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP,
A);
973 "AAAMDFlatWorkGroupSize is only valid for function position");
976struct TupleDecIntegerRangeState :
public AbstractState {
977 DecIntegerState<uint32_t>
X,
Y, Z;
979 bool isValidState()
const override {
980 return X.isValidState() &&
Y.isValidState() &&
Z.isValidState();
983 bool isAtFixpoint()
const override {
984 return X.isAtFixpoint() &&
Y.isAtFixpoint() &&
Z.isAtFixpoint();
988 return X.indicateOptimisticFixpoint() |
Y.indicateOptimisticFixpoint() |
989 Z.indicateOptimisticFixpoint();
993 return X.indicatePessimisticFixpoint() |
Y.indicatePessimisticFixpoint() |
994 Z.indicatePessimisticFixpoint();
997 TupleDecIntegerRangeState
operator^=(
const TupleDecIntegerRangeState &
Other) {
1008 TupleDecIntegerRangeState &getAssumed() {
return *
this; }
1009 const TupleDecIntegerRangeState &getAssumed()
const {
return *
this; }
1012using AAAMDMaxNumWorkgroupsState =
1013 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1016struct AAAMDMaxNumWorkgroups
1017 :
public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1018 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1020 AAAMDMaxNumWorkgroups(
const IRPosition &IRP, Attributor &
A) :
Base(IRP) {}
1024 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1026 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*
F);
1028 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1029 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1030 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1033 indicatePessimisticFixpoint();
1039 auto CheckCallSite = [&](AbstractCallSite CS) {
1042 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
1044 const auto *CallerInfo =
A.getAAFor<AAAMDMaxNumWorkgroups>(
1046 if (!CallerInfo || !CallerInfo->isValidState())
1054 bool AllCallSitesKnown =
true;
1055 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
1058 return indicatePessimisticFixpoint();
1064 static AAAMDMaxNumWorkgroups &createForPosition(
const IRPosition &IRP,
1069 LLVMContext &Ctx =
F->getContext();
1070 SmallString<32> Buffer;
1071 raw_svector_ostream OS(Buffer);
1072 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed();
1076 return A.manifestAttrs(
1078 {Attribute::get(Ctx,
"amdgpu-max-num-workgroups", OS.str())},
1082 StringRef
getName()
const override {
return "AAAMDMaxNumWorkgroups"; }
1084 const std::string getAsStr(Attributor *)
const override {
1085 std::string Buffer =
"AAAMDMaxNumWorkgroupsState[";
1086 raw_string_ostream OS(Buffer);
1087 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed()
1092 const char *getIdAddr()
const override {
return &
ID; }
1096 static bool classof(
const AbstractAttribute *AA) {
1100 void trackStatistics()
const override {}
1103 static const char ID;
1106const char AAAMDMaxNumWorkgroups::ID = 0;
1108AAAMDMaxNumWorkgroups &
1109AAAMDMaxNumWorkgroups::createForPosition(
const IRPosition &IRP, Attributor &
A) {
1111 return *
new (
A.Allocator) AAAMDMaxNumWorkgroups(IRP,
A);
1112 llvm_unreachable(
"AAAMDMaxNumWorkgroups is only valid for function position");
1116struct AAAMDWavesPerEU :
public AAAMDSizeRangeAttribute {
1117 AAAMDWavesPerEU(
const IRPosition &IRP, Attributor &
A)
1118 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-waves-per-eu") {}
1122 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1125 if (
auto Attr = InfoCache.getWavesPerEUAttr(*
F)) {
1126 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1127 1U, InfoCache.getMaxWavesPerEU(*
F)};
1128 if (*Attr != MaxWavesPerEURange) {
1129 auto [Min,
Max] = *Attr;
1130 ConstantRange
Range(APInt(32, Min), APInt(32, Max + 1));
1131 IntegerRangeState RangeState(
Range);
1132 this->getState() = RangeState;
1133 indicateOptimisticFixpoint();
1139 indicatePessimisticFixpoint();
1145 auto CheckCallSite = [&](AbstractCallSite CS) {
1149 <<
"->" <<
Func->getName() <<
'\n');
1152 const auto *CallerAA =
A.getAAFor<AAAMDWavesPerEU>(
1154 if (!CallerAA || !CallerAA->isValidState())
1157 ConstantRange Assumed = getAssumed();
1159 CallerAA->getAssumed().getLower().getZExtValue());
1161 CallerAA->getAssumed().getUpper().getZExtValue());
1162 ConstantRange
Range(APInt(32, Min), APInt(32, Max));
1163 IntegerRangeState RangeState(
Range);
1164 getState() = RangeState;
1165 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1166 : ChangeStatus::CHANGED;
1171 bool AllCallSitesKnown =
true;
1172 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
1173 return indicatePessimisticFixpoint();
1179 static AAAMDWavesPerEU &createForPosition(
const IRPosition &IRP,
1184 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1185 return emitAttributeIfNotDefaultAfterClamp(
1186 A, {1U, InfoCache.getMaxWavesPerEU(*
F)});
1190 StringRef
getName()
const override {
return "AAAMDWavesPerEU"; }
1193 const char *getIdAddr()
const override {
return &
ID; }
1197 static bool classof(
const AbstractAttribute *AA) {
1202 static const char ID;
1205const char AAAMDWavesPerEU::ID = 0;
1207AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(
const IRPosition &IRP,
1210 return *
new (
A.Allocator) AAAMDWavesPerEU(IRP,
A);
1214static bool inlineAsmUsesAGPRs(
const InlineAsm *IA) {
1215 for (
const auto &CI :
IA->ParseConstraints()) {
1216 for (StringRef Code : CI.Codes) {
1217 Code.consume_front(
"{");
1218 if (
Code.starts_with(
"a"))
1227struct AAAMDGPUNoAGPR :
public StateWrapper<BooleanState, AbstractAttribute> {
1228 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1229 AAAMDGPUNoAGPR(
const IRPosition &IRP, Attributor &
A) :
Base(IRP) {}
1231 static AAAMDGPUNoAGPR &createForPosition(
const IRPosition &IRP,
1234 return *
new (
A.Allocator) AAAMDGPUNoAGPR(IRP,
A);
1240 auto [MinNumAGPR, MaxNumAGPR] =
1243 if (MinNumAGPR == 0)
1244 indicateOptimisticFixpoint();
1247 const std::string getAsStr(Attributor *
A)
const override {
1248 return getAssumed() ?
"amdgpu-no-agpr" :
"amdgpu-maybe-agpr";
1251 void trackStatistics()
const override {}
1258 const Value *CalleeOp = CB.getCalledOperand();
1262 return !inlineAsmUsesAGPRs(IA);
1268 if (
Callee->isIntrinsic())
1272 const auto *CalleeInfo =
A.getAAFor<AAAMDGPUNoAGPR>(
1274 return CalleeInfo && CalleeInfo->isValidState() &&
1275 CalleeInfo->getAssumed();
1278 bool UsedAssumedInformation =
false;
1279 if (!
A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *
this,
1280 UsedAssumedInformation))
1281 return indicatePessimisticFixpoint();
1282 return ChangeStatus::UNCHANGED;
1287 return ChangeStatus::UNCHANGED;
1288 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1289 return A.manifestAttrs(getIRPosition(),
1290 {Attribute::get(Ctx,
"amdgpu-agpr-alloc",
"0")});
1293 StringRef
getName()
const override {
return "AAAMDGPUNoAGPR"; }
1294 const char *getIdAddr()
const override {
return &
ID; }
1298 static bool classof(
const AbstractAttribute *AA) {
1302 static const char ID;
1305const char AAAMDGPUNoAGPR::ID = 0;
1309struct AAAMDGPUClusterDims
1310 :
public StateWrapper<BooleanState, AbstractAttribute> {
1311 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1312 AAAMDGPUClusterDims(
const IRPosition &IRP, Attributor &
A) :
Base(IRP) {}
1315 static AAAMDGPUClusterDims &createForPosition(
const IRPosition &IRP,
1319 StringRef
getName()
const override {
return "AAAMDGPUClusterDims"; }
1322 const char *getIdAddr()
const override {
return &
ID; }
1326 static bool classof(
const AbstractAttribute *AA) {
1330 virtual const AMDGPU::ClusterDimsAttr &getClusterDims()
const = 0;
1333 static const char ID;
1336const char AAAMDGPUClusterDims::ID = 0;
1338struct AAAMDGPUClusterDimsFunction :
public AAAMDGPUClusterDims {
1339 AAAMDGPUClusterDimsFunction(
const IRPosition &IRP, Attributor &
A)
1340 : AAAMDGPUClusterDims(IRP,
A) {}
1344 assert(
F &&
"empty associated function");
1351 indicatePessimisticFixpoint();
1353 indicateOptimisticFixpoint();
1357 const std::string getAsStr(Attributor *
A)
const override {
1367 void trackStatistics()
const override {}
1370 auto OldState = Attr;
1372 auto CheckCallSite = [&](AbstractCallSite CS) {
1373 const auto *CallerAA =
A.getAAFor<AAAMDGPUClusterDims>(
1375 DepClassTy::REQUIRED);
1376 if (!CallerAA || !CallerAA->isValidState())
1379 return merge(CallerAA->getClusterDims());
1382 bool UsedAssumedInformation =
false;
1383 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
1385 UsedAssumedInformation))
1386 return indicatePessimisticFixpoint();
1388 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1393 return ChangeStatus::UNCHANGED;
1394 return A.manifestAttrs(
1396 {Attribute::get(getAssociatedFunction()->
getContext(), AttrName,
1401 const AMDGPU::ClusterDimsAttr &getClusterDims()
const override {
1406 bool merge(
const AMDGPU::ClusterDimsAttr &
Other) {
1421 if (
Other.isUnknown())
1446 AMDGPU::ClusterDimsAttr Attr;
1448 static constexpr const char AttrName[] =
"amdgpu-cluster-dims";
1451AAAMDGPUClusterDims &
1452AAAMDGPUClusterDims::createForPosition(
const IRPosition &IRP, Attributor &
A) {
1454 return *
new (
A.Allocator) AAAMDGPUClusterDimsFunction(IRP,
A);
1455 llvm_unreachable(
"AAAMDGPUClusterDims is only valid for function position");
1458static bool runImpl(
Module &M, AnalysisGetter &AG, TargetMachine &TM,
1459 AMDGPUAttributorOptions
Options,
1461 SetVector<Function *> Functions;
1462 for (Function &
F : M) {
1463 if (!
F.isIntrinsic())
1467 CallGraphUpdater CGUpdater;
1469 AMDGPUInformationCache InfoCache(M, AG,
Allocator,
nullptr, TM);
1470 DenseSet<const char *>
Allowed(
1471 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1473 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1478 AttributorConfig AC(CGUpdater);
1479 AC.IsClosedWorldModule =
Options.IsClosedWorld;
1481 AC.IsModulePass =
true;
1482 AC.DefaultInitializeLiveInternals =
false;
1483 AC.IndirectCalleeSpecializationCallback =
1484 [](Attributor &
A,
const AbstractAttribute &AA, CallBase &CB,
1489 AC.IPOAmendableCB = [](
const Function &
F) {
1490 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1493 Attributor
A(Functions, InfoCache, AC);
1496 StringRef LTOPhaseStr =
to_string(LTOPhase);
1497 dbgs() <<
"[AMDGPUAttributor] Running at phase " << LTOPhaseStr <<
'\n'
1498 <<
"[AMDGPUAttributor] Module " <<
M.getName() <<
" is "
1499 << (AC.IsClosedWorldModule ?
"" :
"not ")
1500 <<
"assumed to be a closed world.\n";
1503 for (
auto *
F : Functions) {
1508 CallingConv::ID CC =
F->getCallingConv();
1514 const GCNSubtarget &
ST =
TM.getSubtarget<GCNSubtarget>(*F);
1515 if (!
F->isDeclaration() &&
ST.hasClusters())
1521 Ptr = LI->getPointerOperand();
1523 Ptr =
SI->getPointerOperand();
1525 Ptr = RMW->getPointerOperand();
1527 Ptr = CmpX->getPointerOperand();
1536 return A.run() == ChangeStatus::CHANGED;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentPositions
static bool castRequiresQueuePtr(unsigned SrcAS)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
std::string to_string() const
bool isVariableDims() const
uint64_t getZExtValue() const
Get zero extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getAddressSpace() const
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
NodeAddr< FuncNode * > Func
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Helper to describe and deal with positions in the LLVM-IR.
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
@ IRP_FUNCTION
An attribute for a function (scope).
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Kind getPositionKind() const
Return the associated position kind.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Helper to tie a abstract state implementation to an abstract attribute.