16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
21#define DEBUG_TYPE "amdgpu-attributor"
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34#include "AMDGPUAttributes.def"
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
42#include "AMDGPUAttributes.def"
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49#include "AMDGPUAttributes.def"
59 bool HasApertureRegs,
bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
62 case Intrinsic::amdgcn_workitem_id_x:
65 case Intrinsic::amdgcn_workgroup_id_x:
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_cluster_id_x:
83 case Intrinsic::amdgcn_cluster_id_y:
85 case Intrinsic::amdgcn_cluster_id_z:
87 case Intrinsic::amdgcn_lds_kernel_id:
89 case Intrinsic::amdgcn_dispatch_ptr:
91 case Intrinsic::amdgcn_dispatch_id:
93 case Intrinsic::amdgcn_implicitarg_ptr:
94 return IMPLICIT_ARG_PTR;
97 case Intrinsic::amdgcn_queue_ptr:
100 case Intrinsic::amdgcn_is_shared:
101 case Intrinsic::amdgcn_is_private:
109 case Intrinsic::trap:
110 case Intrinsic::debugtrap:
111 case Intrinsic::ubsantrap:
112 if (SupportsGetDoorBellID)
138 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
139 F.hasFnAttribute(Attribute::SanitizeThread) ||
140 F.hasFnAttribute(Attribute::SanitizeMemory) ||
141 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
142 F.hasFnAttribute(Attribute::SanitizeMemTag);
148 AMDGPUInformationCache(
const Module &M, AnalysisGetter &AG,
150 SetVector<Function *> *
CGSCC, TargetMachine &TM)
156 enum ConstantStatus : uint8_t {
159 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
160 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
161 ADDR_SPACE_CAST_BOTH_TO_FLAT =
162 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
166 bool hasApertureRegs(Function &
F) {
167 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
168 return ST.hasApertureRegs();
172 bool supportsGetDoorbellID(Function &
F) {
173 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
174 return ST.supportsGetDoorbellID();
177 std::optional<std::pair<unsigned, unsigned>>
178 getFlatWorkGroupSizeAttr(
const Function &
F)
const {
182 return std::make_pair(
R->first, *(
R->second));
185 std::pair<unsigned, unsigned>
186 getDefaultFlatWorkGroupSize(
const Function &
F)
const {
187 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
188 return ST.getDefaultFlatWorkGroupSize(
F.getCallingConv());
191 std::pair<unsigned, unsigned>
192 getMaximumFlatWorkGroupRange(
const Function &
F) {
193 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
194 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
197 SmallVector<unsigned> getMaxNumWorkGroups(
const Function &
F) {
198 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
199 return ST.getMaxNumWorkGroups(
F);
203 unsigned getCodeObjectVersion()
const {
return CodeObjectVersion; }
208 std::pair<unsigned, unsigned>
209 getWavesPerEU(
const Function &
F,
210 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
211 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
212 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(
F),
F);
215 std::optional<std::pair<unsigned, unsigned>>
216 getWavesPerEUAttr(
const Function &
F) {
222 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
223 Val->second =
ST.getMaxWavesPerEU();
225 return std::make_pair(Val->first, *(Val->second));
228 std::pair<unsigned, unsigned>
229 getEffectiveWavesPerEU(
const Function &
F,
230 std::pair<unsigned, unsigned> WavesPerEU,
231 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
232 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
233 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
238 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
239 return ST.getMaxWavesPerEU();
242 unsigned getMaxAddrSpace()
const override {
249 static uint8_t visitConstExpr(
const ConstantExpr *CE) {
250 uint8_t Status = NONE;
252 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
253 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
255 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
257 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
265 static unsigned getLDSSize(
const Function &
F) {
267 {0, UINT32_MAX},
true)
272 uint8_t getConstantAccess(
const Constant *
C,
273 SmallPtrSetImpl<const Constant *> &Visited) {
274 auto It = ConstantStatus.find(
C);
275 if (It != ConstantStatus.end())
283 Result |= visitConstExpr(CE);
285 for (
const Use &U :
C->operands()) {
287 if (!OpC || !Visited.
insert(OpC).second)
290 Result |= getConstantAccess(OpC, Visited);
297 bool needsQueuePtr(
const Constant *
C, Function &Fn) {
299 bool HasAperture = hasApertureRegs(Fn);
302 if (!IsNonEntryFunc && HasAperture)
305 SmallPtrSet<const Constant *, 8> Visited;
306 uint8_t
Access = getConstantAccess(
C, Visited);
309 if (IsNonEntryFunc && (
Access & DS_GLOBAL))
312 return !HasAperture && (
Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
315 bool checkConstForAddrSpaceCastFromPrivate(
const Constant *
C) {
316 SmallPtrSet<const Constant *, 8> Visited;
317 uint8_t
Access = getConstantAccess(
C, Visited);
318 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
323 DenseMap<const Constant *, uint8_t> ConstantStatus;
324 const unsigned CodeObjectVersion;
327struct AAAMDAttributes
328 :
public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
330 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
333 AAAMDAttributes(
const IRPosition &IRP, Attributor &
A) : Base(IRP) {}
336 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
340 StringRef
getName()
const override {
return "AAAMDAttributes"; }
343 const char *getIdAddr()
const override {
return &ID; }
347 static bool classof(
const AbstractAttribute *AA) {
352 static const char ID;
354const char AAAMDAttributes::ID = 0;
356struct AAUniformWorkGroupSize
357 :
public StateWrapper<BooleanState, AbstractAttribute> {
358 using Base = StateWrapper<BooleanState, AbstractAttribute>;
359 AAUniformWorkGroupSize(
const IRPosition &IRP, Attributor &
A) : Base(IRP) {}
362 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
366 StringRef
getName()
const override {
return "AAUniformWorkGroupSize"; }
369 const char *getIdAddr()
const override {
return &ID; }
373 static bool classof(
const AbstractAttribute *AA) {
378 static const char ID;
380const char AAUniformWorkGroupSize::ID = 0;
382struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
383 AAUniformWorkGroupSizeFunction(
const IRPosition &IRP, Attributor &
A)
384 : AAUniformWorkGroupSize(IRP,
A) {}
388 CallingConv::ID CC =
F->getCallingConv();
390 if (CC != CallingConv::AMDGPU_KERNEL)
393 bool InitialValue =
false;
394 if (
F->hasFnAttribute(
"uniform-work-group-size"))
396 F->getFnAttribute(
"uniform-work-group-size").getValueAsString() ==
400 indicateOptimisticFixpoint();
402 indicatePessimisticFixpoint();
408 auto CheckCallSite = [&](AbstractCallSite CS) {
411 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
413 const auto *CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
415 if (!CallerInfo || !CallerInfo->isValidState())
419 CallerInfo->getState());
424 bool AllCallSitesKnown =
true;
425 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
426 return indicatePessimisticFixpoint();
433 LLVMContext &Ctx = getAssociatedFunction()->getContext();
435 AttrList.
push_back(Attribute::get(Ctx,
"uniform-work-group-size",
436 getAssumed() ?
"true" :
"false"));
437 return A.manifestAttrs(getIRPosition(), AttrList,
441 bool isValidState()
const override {
446 const std::string getAsStr(Attributor *)
const override {
447 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) +
"]";
451 void trackStatistics()
const override {}
454AAUniformWorkGroupSize &
455AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
458 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP,
A);
460 "AAUniformWorkGroupSize is only valid for function position");
463struct AAAMDAttributesFunction :
public AAAMDAttributes {
464 AAAMDAttributesFunction(
const IRPosition &IRP, Attributor &
A)
465 : AAAMDAttributes(IRP,
A) {}
474 removeAssumedBits(IMPLICIT_ARG_PTR);
475 removeAssumedBits(HOSTCALL_PTR);
480 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
483 if (
F->hasFnAttribute(Attr.second))
484 addKnownBits(Attr.first);
487 if (
F->isDeclaration())
493 indicatePessimisticFixpoint();
501 auto OrigAssumed = getAssumed();
504 const AACallEdges *AAEdges =
A.getAAFor<AACallEdges>(
505 *
this, this->getIRPosition(), DepClassTy::REQUIRED);
508 return indicatePessimisticFixpoint();
512 bool NeedsImplicit =
false;
513 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
514 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
515 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
516 unsigned COV = InfoCache.getCodeObjectVersion();
521 const AAAMDAttributes *AAAMD =
A.getAAFor<AAAMDAttributes>(
523 if (!AAAMD || !AAAMD->isValidState())
524 return indicatePessimisticFixpoint();
529 bool NonKernelOnly =
false;
532 HasApertureRegs, SupportsGetDoorbellID, COV);
534 if ((IsNonEntryFunc || !NonKernelOnly))
535 removeAssumedBits(AttrMask);
541 removeAssumedBits(IMPLICIT_ARG_PTR);
543 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(
A)) {
547 removeAssumedBits(IMPLICIT_ARG_PTR);
549 removeAssumedBits(QUEUE_PTR);
552 if (funcRetrievesMultigridSyncArg(
A, COV)) {
553 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
554 "multigrid_sync_arg needs implicitarg_ptr");
555 removeAssumedBits(MULTIGRID_SYNC_ARG);
558 if (funcRetrievesHostcallPtr(
A, COV)) {
559 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
560 removeAssumedBits(HOSTCALL_PTR);
563 if (funcRetrievesHeapPtr(
A, COV)) {
564 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
565 removeAssumedBits(HEAP_PTR);
568 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(
A, COV)) {
569 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
570 removeAssumedBits(QUEUE_PTR);
573 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(
A)) {
574 removeAssumedBits(LDS_KERNEL_ID);
577 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(
A, COV))
578 removeAssumedBits(DEFAULT_QUEUE);
580 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(
A, COV))
581 removeAssumedBits(COMPLETION_ACTION);
583 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(
A))
584 removeAssumedBits(FLAT_SCRATCH_INIT);
586 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
587 : ChangeStatus::UNCHANGED;
592 LLVMContext &Ctx = getAssociatedFunction()->getContext();
595 if (isKnown(Attr.first))
596 AttrList.
push_back(Attribute::get(Ctx, Attr.second));
599 return A.manifestAttrs(getIRPosition(), AttrList,
603 const std::string getAsStr(Attributor *)
const override {
605 raw_string_ostream OS(Str);
608 if (isAssumed(Attr.first))
609 OS <<
' ' << Attr.second;
615 void trackStatistics()
const override {}
618 bool checkForQueuePtr(Attributor &
A) {
622 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
624 bool NeedsQueuePtr =
false;
627 unsigned SrcAS =
static_cast<AddrSpaceCastInst &
>(
I).getSrcAddressSpace();
629 NeedsQueuePtr =
true;
635 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
641 if (!HasApertureRegs) {
642 bool UsedAssumedInformation =
false;
643 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
644 {Instruction::AddrSpaceCast},
645 UsedAssumedInformation);
652 if (!IsNonEntryFunc && HasApertureRegs)
655 for (BasicBlock &BB : *
F) {
656 for (Instruction &
I : BB) {
657 for (
const Use &U :
I.operands()) {
659 if (InfoCache.needsQueuePtr(
C, *
F))
669 bool funcRetrievesMultigridSyncArg(Attributor &
A,
unsigned COV) {
671 AA::RangeTy
Range(Pos, 8);
672 return funcRetrievesImplicitKernelArg(
A,
Range);
675 bool funcRetrievesHostcallPtr(Attributor &
A,
unsigned COV) {
677 AA::RangeTy
Range(Pos, 8);
678 return funcRetrievesImplicitKernelArg(
A,
Range);
681 bool funcRetrievesDefaultQueue(Attributor &
A,
unsigned COV) {
683 AA::RangeTy
Range(Pos, 8);
684 return funcRetrievesImplicitKernelArg(
A,
Range);
687 bool funcRetrievesCompletionAction(Attributor &
A,
unsigned COV) {
689 AA::RangeTy
Range(Pos, 8);
690 return funcRetrievesImplicitKernelArg(
A,
Range);
693 bool funcRetrievesHeapPtr(Attributor &
A,
unsigned COV) {
697 return funcRetrievesImplicitKernelArg(
A,
Range);
700 bool funcRetrievesQueuePtr(Attributor &
A,
unsigned COV) {
704 return funcRetrievesImplicitKernelArg(
A,
Range);
707 bool funcRetrievesImplicitKernelArg(Attributor &
A, AA::RangeTy
Range) {
719 const auto *PointerInfoAA =
A.getAAFor<AAPointerInfo>(
721 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
724 return PointerInfoAA->forallInterferingAccesses(
725 Range, [](
const AAPointerInfo::Access &Acc,
bool IsExact) {
730 bool UsedAssumedInformation =
false;
731 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
732 UsedAssumedInformation);
735 bool funcRetrievesLDSKernelId(Attributor &
A) {
740 bool UsedAssumedInformation =
false;
741 return !
A.checkForAllCallLikeInstructions(DoesNotRetrieve, *
this,
742 UsedAssumedInformation);
747 bool needFlatScratchInit(Attributor &
A) {
748 assert(isAssumed(FLAT_SCRATCH_INIT));
757 bool UsedAssumedInformation =
false;
758 if (!
A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *
this,
759 {Instruction::AddrSpaceCast},
760 UsedAssumedInformation))
764 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
768 for (
const Use &U :
I.operands()) {
770 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(
C))
792 return Callee->getIntrinsicID() !=
793 Intrinsic::amdgcn_addrspacecast_nonnull;
796 UsedAssumedInformation =
false;
800 return !
A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *
this,
801 UsedAssumedInformation);
805AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
808 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP,
A);
813struct AAAMDSizeRangeAttribute
814 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
815 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
819 AAAMDSizeRangeAttribute(
const IRPosition &IRP, Attributor &
A,
821 :
Base(IRP, 32), AttrName(AttrName) {}
824 void trackStatistics()
const override {}
826 template <
class AttributeImpl>
ChangeStatus updateImplImpl(Attributor &
A) {
829 auto CheckCallSite = [&](AbstractCallSite CS) {
832 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
834 const auto *CallerInfo =
A.getAAFor<AttributeImpl>(
836 if (!CallerInfo || !CallerInfo->isValidState())
845 bool AllCallSitesKnown =
true;
846 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
849 return indicatePessimisticFixpoint();
857 emitAttributeIfNotDefaultAfterClamp(Attributor &
A,
858 std::pair<unsigned, unsigned>
Default) {
860 unsigned Lower = getAssumed().getLower().getZExtValue();
861 unsigned Upper = getAssumed().getUpper().getZExtValue();
871 return ChangeStatus::UNCHANGED;
874 LLVMContext &Ctx =
F->getContext();
875 SmallString<10> Buffer;
876 raw_svector_ostream OS(Buffer);
878 return A.manifestAttrs(getIRPosition(),
879 {Attribute::get(Ctx, AttrName, OS.str())},
883 const std::string getAsStr(Attributor *)
const override {
885 raw_string_ostream OS(Str);
887 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
894struct AAAMDFlatWorkGroupSize :
public AAAMDSizeRangeAttribute {
895 AAAMDFlatWorkGroupSize(
const IRPosition &IRP, Attributor &
A)
896 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-flat-work-group-size") {}
900 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
902 bool HasAttr =
false;
903 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*
F);
904 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*
F);
906 if (
auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*
F)) {
910 if (*Attr != MaxRange) {
918 if (
Range == MaxRange)
922 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
923 IntegerRangeState IRS(CR);
927 indicateOptimisticFixpoint();
931 return updateImplImpl<AAAMDFlatWorkGroupSize>(
A);
935 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
940 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
941 return emitAttributeIfNotDefaultAfterClamp(
942 A, InfoCache.getMaximumFlatWorkGroupRange(*
F));
946 StringRef
getName()
const override {
return "AAAMDFlatWorkGroupSize"; }
949 const char *getIdAddr()
const override {
return &
ID; }
953 static bool classof(
const AbstractAttribute *AA) {
958 static const char ID;
961const char AAAMDFlatWorkGroupSize::ID = 0;
963AAAMDFlatWorkGroupSize &
964AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
967 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP,
A);
969 "AAAMDFlatWorkGroupSize is only valid for function position");
972struct TupleDecIntegerRangeState :
public AbstractState {
973 DecIntegerState<uint32_t>
X,
Y, Z;
975 bool isValidState()
const override {
976 return X.isValidState() &&
Y.isValidState() &&
Z.isValidState();
979 bool isAtFixpoint()
const override {
980 return X.isAtFixpoint() &&
Y.isAtFixpoint() &&
Z.isAtFixpoint();
984 return X.indicateOptimisticFixpoint() |
Y.indicateOptimisticFixpoint() |
985 Z.indicateOptimisticFixpoint();
989 return X.indicatePessimisticFixpoint() |
Y.indicatePessimisticFixpoint() |
990 Z.indicatePessimisticFixpoint();
993 TupleDecIntegerRangeState
operator^=(
const TupleDecIntegerRangeState &
Other) {
1004 TupleDecIntegerRangeState &getAssumed() {
return *
this; }
1005 const TupleDecIntegerRangeState &getAssumed()
const {
return *
this; }
1008using AAAMDMaxNumWorkgroupsState =
1009 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1012struct AAAMDMaxNumWorkgroups
1013 :
public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1014 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1016 AAAMDMaxNumWorkgroups(
const IRPosition &IRP, Attributor &
A) :
Base(IRP) {}
1020 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1022 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*
F);
1024 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1025 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1026 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1029 indicatePessimisticFixpoint();
1035 auto CheckCallSite = [&](AbstractCallSite CS) {
1038 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
1040 const auto *CallerInfo =
A.getAAFor<AAAMDMaxNumWorkgroups>(
1042 if (!CallerInfo || !CallerInfo->isValidState())
1050 bool AllCallSitesKnown =
true;
1051 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
1054 return indicatePessimisticFixpoint();
1060 static AAAMDMaxNumWorkgroups &createForPosition(
const IRPosition &IRP,
1065 LLVMContext &Ctx =
F->getContext();
1066 SmallString<32> Buffer;
1067 raw_svector_ostream OS(Buffer);
1068 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed();
1072 return A.manifestAttrs(
1074 {Attribute::get(Ctx,
"amdgpu-max-num-workgroups", OS.str())},
1078 StringRef
getName()
const override {
return "AAAMDMaxNumWorkgroups"; }
1080 const std::string getAsStr(Attributor *)
const override {
1081 std::string Buffer =
"AAAMDMaxNumWorkgroupsState[";
1082 raw_string_ostream OS(Buffer);
1083 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed()
1088 const char *getIdAddr()
const override {
return &
ID; }
1092 static bool classof(
const AbstractAttribute *AA) {
1096 void trackStatistics()
const override {}
1099 static const char ID;
1102const char AAAMDMaxNumWorkgroups::ID = 0;
1104AAAMDMaxNumWorkgroups &
1105AAAMDMaxNumWorkgroups::createForPosition(
const IRPosition &IRP, Attributor &
A) {
1107 return *
new (
A.Allocator) AAAMDMaxNumWorkgroups(IRP,
A);
1108 llvm_unreachable(
"AAAMDMaxNumWorkgroups is only valid for function position");
1112struct AAAMDWavesPerEU :
public AAAMDSizeRangeAttribute {
1113 AAAMDWavesPerEU(
const IRPosition &IRP, Attributor &
A)
1114 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-waves-per-eu") {}
1118 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1121 if (
auto Attr = InfoCache.getWavesPerEUAttr(*
F)) {
1122 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1123 1U, InfoCache.getMaxWavesPerEU(*
F)};
1124 if (*Attr != MaxWavesPerEURange) {
1125 auto [Min,
Max] = *Attr;
1126 ConstantRange
Range(APInt(32, Min), APInt(32, Max + 1));
1127 IntegerRangeState RangeState(
Range);
1128 this->getState() = RangeState;
1129 indicateOptimisticFixpoint();
1135 indicatePessimisticFixpoint();
1141 auto CheckCallSite = [&](AbstractCallSite CS) {
1145 <<
"->" <<
Func->getName() <<
'\n');
1148 const auto *CallerAA =
A.getAAFor<AAAMDWavesPerEU>(
1150 if (!CallerAA || !CallerAA->isValidState())
1153 ConstantRange Assumed = getAssumed();
1155 CallerAA->getAssumed().getLower().getZExtValue());
1157 CallerAA->getAssumed().getUpper().getZExtValue());
1158 ConstantRange
Range(APInt(32, Min), APInt(32, Max));
1159 IntegerRangeState RangeState(
Range);
1160 getState() = RangeState;
1161 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1162 : ChangeStatus::CHANGED;
1167 bool AllCallSitesKnown =
true;
1168 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
1169 return indicatePessimisticFixpoint();
1175 static AAAMDWavesPerEU &createForPosition(
const IRPosition &IRP,
1180 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1181 return emitAttributeIfNotDefaultAfterClamp(
1182 A, {1U, InfoCache.getMaxWavesPerEU(*
F)});
1186 StringRef
getName()
const override {
return "AAAMDWavesPerEU"; }
1189 const char *getIdAddr()
const override {
return &
ID; }
1193 static bool classof(
const AbstractAttribute *AA) {
1198 static const char ID;
1201const char AAAMDWavesPerEU::ID = 0;
1203AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(
const IRPosition &IRP,
1206 return *
new (
A.Allocator) AAAMDWavesPerEU(IRP,
A);
1210static bool inlineAsmUsesAGPRs(
const InlineAsm *IA) {
1211 for (
const auto &CI :
IA->ParseConstraints()) {
1212 for (StringRef Code : CI.Codes) {
1213 Code.consume_front(
"{");
1214 if (
Code.starts_with(
"a"))
1224struct AAAMDGPUNoAGPR
1225 :
public IRAttribute<Attribute::NoUnwind,
1226 StateWrapper<BooleanState, AbstractAttribute>,
1228 AAAMDGPUNoAGPR(
const IRPosition &IRP, Attributor &
A) : IRAttribute(IRP) {}
1230 static AAAMDGPUNoAGPR &createForPosition(
const IRPosition &IRP,
1233 return *
new (
A.Allocator) AAAMDGPUNoAGPR(IRP,
A);
1239 auto [MinNumAGPR, MaxNumAGPR] =
1242 if (MinNumAGPR == 0)
1243 indicateOptimisticFixpoint();
1246 const std::string getAsStr(Attributor *
A)
const override {
1247 return getAssumed() ?
"amdgpu-no-agpr" :
"amdgpu-maybe-agpr";
1250 void trackStatistics()
const override {}
1257 const Value *CalleeOp = CB.getCalledOperand();
1261 return !inlineAsmUsesAGPRs(IA);
1267 if (
Callee->isIntrinsic())
1271 const auto *CalleeInfo =
A.getAAFor<AAAMDGPUNoAGPR>(
1273 return CalleeInfo && CalleeInfo->isValidState() &&
1274 CalleeInfo->getAssumed();
1277 bool UsedAssumedInformation =
false;
1278 if (!
A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *
this,
1279 UsedAssumedInformation))
1280 return indicatePessimisticFixpoint();
1281 return ChangeStatus::UNCHANGED;
1286 return ChangeStatus::UNCHANGED;
1287 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1288 return A.manifestAttrs(getIRPosition(),
1289 {Attribute::get(Ctx,
"amdgpu-agpr-alloc",
"0")});
1292 StringRef
getName()
const override {
return "AAAMDGPUNoAGPR"; }
1293 const char *getIdAddr()
const override {
return &
ID; }
1297 static bool classof(
const AbstractAttribute *AA) {
1301 static const char ID;
1304const char AAAMDGPUNoAGPR::ID = 0;
1308struct AAAMDGPUClusterDims
1309 :
public StateWrapper<BooleanState, AbstractAttribute> {
1310 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1311 AAAMDGPUClusterDims(
const IRPosition &IRP, Attributor &
A) :
Base(IRP) {}
1314 static AAAMDGPUClusterDims &createForPosition(
const IRPosition &IRP,
1318 StringRef
getName()
const override {
return "AAAMDGPUClusterDims"; }
1321 const char *getIdAddr()
const override {
return &
ID; }
1325 static bool classof(
const AbstractAttribute *AA) {
1329 virtual const AMDGPU::ClusterDimsAttr &getClusterDims()
const = 0;
1332 static const char ID;
1335const char AAAMDGPUClusterDims::ID = 0;
1337struct AAAMDGPUClusterDimsFunction :
public AAAMDGPUClusterDims {
1338 AAAMDGPUClusterDimsFunction(
const IRPosition &IRP, Attributor &
A)
1339 : AAAMDGPUClusterDims(IRP,
A) {}
1343 assert(
F &&
"empty associated function");
1350 indicatePessimisticFixpoint();
1352 indicateOptimisticFixpoint();
1356 const std::string getAsStr(Attributor *
A)
const override {
1366 void trackStatistics()
const override {}
1369 auto OldState = Attr;
1371 auto CheckCallSite = [&](AbstractCallSite CS) {
1372 const auto *CallerAA =
A.getAAFor<AAAMDGPUClusterDims>(
1374 DepClassTy::REQUIRED);
1375 if (!CallerAA || !CallerAA->isValidState())
1378 return merge(CallerAA->getClusterDims());
1381 bool UsedAssumedInformation =
false;
1382 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
1384 UsedAssumedInformation))
1385 return indicatePessimisticFixpoint();
1387 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1392 return ChangeStatus::UNCHANGED;
1393 return A.manifestAttrs(
1395 {Attribute::get(getAssociatedFunction()->
getContext(), AttrName,
1400 const AMDGPU::ClusterDimsAttr &getClusterDims()
const override {
1405 bool merge(
const AMDGPU::ClusterDimsAttr &
Other) {
1420 if (
Other.isUnknown())
1445 AMDGPU::ClusterDimsAttr Attr;
1447 static constexpr const char AttrName[] =
"amdgpu-cluster-dims";
1450AAAMDGPUClusterDims &
1451AAAMDGPUClusterDims::createForPosition(
const IRPosition &IRP, Attributor &
A) {
1453 return *
new (
A.Allocator) AAAMDGPUClusterDimsFunction(IRP,
A);
1454 llvm_unreachable(
"AAAMDGPUClusterDims is only valid for function position");
1457static bool runImpl(
Module &M, AnalysisGetter &AG, TargetMachine &TM,
1458 AMDGPUAttributorOptions
Options,
1460 SetVector<Function *> Functions;
1461 for (Function &
F : M) {
1462 if (!
F.isIntrinsic())
1466 CallGraphUpdater CGUpdater;
1468 AMDGPUInformationCache InfoCache(M, AG,
Allocator,
nullptr, TM);
1469 DenseSet<const char *>
Allowed(
1470 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1472 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1477 AttributorConfig AC(CGUpdater);
1478 AC.IsClosedWorldModule =
Options.IsClosedWorld;
1480 AC.IsModulePass =
true;
1481 AC.DefaultInitializeLiveInternals =
false;
1482 AC.IndirectCalleeSpecializationCallback =
1483 [](Attributor &
A,
const AbstractAttribute &AA, CallBase &CB,
1488 AC.IPOAmendableCB = [](
const Function &
F) {
1489 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1492 Attributor
A(Functions, InfoCache, AC);
1495 StringRef LTOPhaseStr =
to_string(LTOPhase);
1496 dbgs() <<
"[AMDGPUAttributor] Running at phase " << LTOPhaseStr <<
'\n'
1497 <<
"[AMDGPUAttributor] Module " <<
M.getName() <<
" is "
1498 << (AC.IsClosedWorldModule ?
"" :
"not ")
1499 <<
"assumed to be a closed world.\n";
1502 for (
auto *
F : Functions) {
1507 CallingConv::ID CC =
F->getCallingConv();
1513 const GCNSubtarget &
ST =
TM.getSubtarget<GCNSubtarget>(*F);
1514 if (!
F->isDeclaration() &&
ST.hasClusters())
1520 Ptr = LI->getPointerOperand();
1522 Ptr =
SI->getPointerOperand();
1524 Ptr = RMW->getPointerOperand();
1526 Ptr = CmpX->getPointerOperand();
1535 return A.run() == ChangeStatus::CHANGED;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
static bool castRequiresQueuePtr(unsigned SrcAS)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
std::string to_string() const
bool isVariableDims() const
uint64_t getZExtValue() const
Get zero extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getAddressSpace() const
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
NodeAddr< FuncNode * > Func
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Helper to describe and deal with positions in the LLVM-IR.
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
@ IRP_FUNCTION
An attribute for a function (scope).
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Kind getPositionKind() const
Return the associated position kind.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Helper to tie a abstract state implementation to an abstract attribute.