16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
21#define DEBUG_TYPE "amdgpu-attributor"
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34#include "AMDGPUAttributes.def"
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
42#include "AMDGPUAttributes.def"
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49#include "AMDGPUAttributes.def"
59 bool HasApertureRegs,
bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
62 case Intrinsic::amdgcn_workitem_id_x:
65 case Intrinsic::amdgcn_workgroup_id_x:
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_lds_kernel_id:
82 case Intrinsic::amdgcn_dispatch_ptr:
84 case Intrinsic::amdgcn_dispatch_id:
86 case Intrinsic::amdgcn_implicitarg_ptr:
87 return IMPLICIT_ARG_PTR;
90 case Intrinsic::amdgcn_queue_ptr:
93 case Intrinsic::amdgcn_is_shared:
94 case Intrinsic::amdgcn_is_private:
102 case Intrinsic::trap:
103 case Intrinsic::debugtrap:
104 case Intrinsic::ubsantrap:
105 if (SupportsGetDoorBellID)
131 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
132 F.hasFnAttribute(Attribute::SanitizeThread) ||
133 F.hasFnAttribute(Attribute::SanitizeMemory) ||
134 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
135 F.hasFnAttribute(Attribute::SanitizeMemTag);
141 AMDGPUInformationCache(
const Module &M, AnalysisGetter &AG,
143 SetVector<Function *> *
CGSCC, TargetMachine &TM)
149 enum ConstantStatus : uint8_t {
152 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
153 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
154 ADDR_SPACE_CAST_BOTH_TO_FLAT =
155 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
159 bool hasApertureRegs(Function &
F) {
160 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
161 return ST.hasApertureRegs();
165 bool supportsGetDoorbellID(Function &
F) {
166 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
167 return ST.supportsGetDoorbellID();
170 std::optional<std::pair<unsigned, unsigned>>
171 getFlatWorkGroupSizeAttr(
const Function &
F)
const {
175 return std::make_pair(
R->first, *(
R->second));
178 std::pair<unsigned, unsigned>
179 getDefaultFlatWorkGroupSize(
const Function &
F)
const {
180 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
181 return ST.getDefaultFlatWorkGroupSize(
F.getCallingConv());
184 std::pair<unsigned, unsigned>
185 getMaximumFlatWorkGroupRange(
const Function &
F) {
186 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
187 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
190 SmallVector<unsigned> getMaxNumWorkGroups(
const Function &
F) {
191 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
192 return ST.getMaxNumWorkGroups(
F);
196 unsigned getCodeObjectVersion()
const {
return CodeObjectVersion; }
201 std::pair<unsigned, unsigned>
202 getWavesPerEU(
const Function &
F,
203 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
204 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
205 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(
F),
F);
208 std::optional<std::pair<unsigned, unsigned>>
209 getWavesPerEUAttr(
const Function &
F) {
215 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
216 Val->second =
ST.getMaxWavesPerEU();
218 return std::make_pair(Val->first, *(Val->second));
221 std::pair<unsigned, unsigned>
222 getEffectiveWavesPerEU(
const Function &
F,
223 std::pair<unsigned, unsigned> WavesPerEU,
224 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
225 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
226 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
231 const GCNSubtarget &
ST = TM.getSubtarget<GCNSubtarget>(
F);
232 return ST.getMaxWavesPerEU();
235 unsigned getMaxAddrSpace()
const override {
242 static uint8_t visitConstExpr(
const ConstantExpr *CE) {
243 uint8_t Status = NONE;
245 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
246 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
248 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
250 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
258 static unsigned getLDSSize(
const Function &
F) {
260 {0, UINT32_MAX},
true)
265 uint8_t getConstantAccess(
const Constant *
C,
266 SmallPtrSetImpl<const Constant *> &Visited) {
267 auto It = ConstantStatus.find(
C);
268 if (It != ConstantStatus.end())
276 Result |= visitConstExpr(CE);
278 for (
const Use &U :
C->operands()) {
280 if (!OpC || !Visited.
insert(OpC).second)
283 Result |= getConstantAccess(OpC, Visited);
290 bool needsQueuePtr(
const Constant *
C, Function &Fn) {
292 bool HasAperture = hasApertureRegs(Fn);
295 if (!IsNonEntryFunc && HasAperture)
298 SmallPtrSet<const Constant *, 8> Visited;
299 uint8_t
Access = getConstantAccess(
C, Visited);
302 if (IsNonEntryFunc && (
Access & DS_GLOBAL))
305 return !HasAperture && (
Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
308 bool checkConstForAddrSpaceCastFromPrivate(
const Constant *
C) {
309 SmallPtrSet<const Constant *, 8> Visited;
310 uint8_t
Access = getConstantAccess(
C, Visited);
311 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
316 DenseMap<const Constant *, uint8_t> ConstantStatus;
317 const unsigned CodeObjectVersion;
320struct AAAMDAttributes
321 :
public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
323 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
326 AAAMDAttributes(
const IRPosition &IRP, Attributor &
A) : Base(IRP) {}
329 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
333 StringRef
getName()
const override {
return "AAAMDAttributes"; }
336 const char *getIdAddr()
const override {
return &ID; }
340 static bool classof(
const AbstractAttribute *AA) {
345 static const char ID;
347const char AAAMDAttributes::ID = 0;
349struct AAUniformWorkGroupSize
350 :
public StateWrapper<BooleanState, AbstractAttribute> {
351 using Base = StateWrapper<BooleanState, AbstractAttribute>;
352 AAUniformWorkGroupSize(
const IRPosition &IRP, Attributor &
A) : Base(IRP) {}
355 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
359 StringRef
getName()
const override {
return "AAUniformWorkGroupSize"; }
362 const char *getIdAddr()
const override {
return &ID; }
366 static bool classof(
const AbstractAttribute *AA) {
371 static const char ID;
373const char AAUniformWorkGroupSize::ID = 0;
375struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
376 AAUniformWorkGroupSizeFunction(
const IRPosition &IRP, Attributor &
A)
377 : AAUniformWorkGroupSize(IRP,
A) {}
381 CallingConv::ID CC =
F->getCallingConv();
383 if (CC != CallingConv::AMDGPU_KERNEL)
386 bool InitialValue =
false;
387 if (
F->hasFnAttribute(
"uniform-work-group-size"))
389 F->getFnAttribute(
"uniform-work-group-size").getValueAsString() ==
393 indicateOptimisticFixpoint();
395 indicatePessimisticFixpoint();
401 auto CheckCallSite = [&](AbstractCallSite CS) {
404 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
406 const auto *CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
408 if (!CallerInfo || !CallerInfo->isValidState())
412 CallerInfo->getState());
417 bool AllCallSitesKnown =
true;
418 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
419 return indicatePessimisticFixpoint();
426 LLVMContext &Ctx = getAssociatedFunction()->getContext();
428 AttrList.
push_back(Attribute::get(Ctx,
"uniform-work-group-size",
429 getAssumed() ?
"true" :
"false"));
430 return A.manifestAttrs(getIRPosition(), AttrList,
434 bool isValidState()
const override {
439 const std::string getAsStr(Attributor *)
const override {
440 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) +
"]";
444 void trackStatistics()
const override {}
447AAUniformWorkGroupSize &
448AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
451 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP,
A);
453 "AAUniformWorkGroupSize is only valid for function position");
456struct AAAMDAttributesFunction :
public AAAMDAttributes {
457 AAAMDAttributesFunction(
const IRPosition &IRP, Attributor &
A)
458 : AAAMDAttributes(IRP,
A) {}
467 removeAssumedBits(IMPLICIT_ARG_PTR);
468 removeAssumedBits(HOSTCALL_PTR);
473 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
476 if (
F->hasFnAttribute(Attr.second))
477 addKnownBits(Attr.first);
480 if (
F->isDeclaration())
486 indicatePessimisticFixpoint();
494 auto OrigAssumed = getAssumed();
497 const AACallEdges *AAEdges =
A.getAAFor<AACallEdges>(
498 *
this, this->getIRPosition(), DepClassTy::REQUIRED);
501 return indicatePessimisticFixpoint();
505 bool NeedsImplicit =
false;
506 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
507 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
508 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
509 unsigned COV = InfoCache.getCodeObjectVersion();
514 const AAAMDAttributes *AAAMD =
A.getAAFor<AAAMDAttributes>(
516 if (!AAAMD || !AAAMD->isValidState())
517 return indicatePessimisticFixpoint();
522 bool NonKernelOnly =
false;
525 HasApertureRegs, SupportsGetDoorbellID, COV);
527 if ((IsNonEntryFunc || !NonKernelOnly))
528 removeAssumedBits(AttrMask);
534 removeAssumedBits(IMPLICIT_ARG_PTR);
536 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(
A)) {
540 removeAssumedBits(IMPLICIT_ARG_PTR);
542 removeAssumedBits(QUEUE_PTR);
545 if (funcRetrievesMultigridSyncArg(
A, COV)) {
546 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
547 "multigrid_sync_arg needs implicitarg_ptr");
548 removeAssumedBits(MULTIGRID_SYNC_ARG);
551 if (funcRetrievesHostcallPtr(
A, COV)) {
552 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
553 removeAssumedBits(HOSTCALL_PTR);
556 if (funcRetrievesHeapPtr(
A, COV)) {
557 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
558 removeAssumedBits(HEAP_PTR);
561 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(
A, COV)) {
562 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
563 removeAssumedBits(QUEUE_PTR);
566 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(
A)) {
567 removeAssumedBits(LDS_KERNEL_ID);
570 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(
A, COV))
571 removeAssumedBits(DEFAULT_QUEUE);
573 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(
A, COV))
574 removeAssumedBits(COMPLETION_ACTION);
576 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(
A))
577 removeAssumedBits(FLAT_SCRATCH_INIT);
579 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
580 : ChangeStatus::UNCHANGED;
585 LLVMContext &Ctx = getAssociatedFunction()->getContext();
588 if (isKnown(Attr.first))
589 AttrList.
push_back(Attribute::get(Ctx, Attr.second));
592 return A.manifestAttrs(getIRPosition(), AttrList,
596 const std::string getAsStr(Attributor *)
const override {
598 raw_string_ostream OS(Str);
601 if (isAssumed(Attr.first))
602 OS <<
' ' << Attr.second;
608 void trackStatistics()
const override {}
611 bool checkForQueuePtr(Attributor &
A) {
615 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
617 bool NeedsQueuePtr =
false;
620 unsigned SrcAS =
static_cast<AddrSpaceCastInst &
>(
I).getSrcAddressSpace();
622 NeedsQueuePtr =
true;
628 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
634 if (!HasApertureRegs) {
635 bool UsedAssumedInformation =
false;
636 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
637 {Instruction::AddrSpaceCast},
638 UsedAssumedInformation);
645 if (!IsNonEntryFunc && HasApertureRegs)
648 for (BasicBlock &BB : *
F) {
649 for (Instruction &
I : BB) {
650 for (
const Use &U :
I.operands()) {
652 if (InfoCache.needsQueuePtr(
C, *
F))
662 bool funcRetrievesMultigridSyncArg(Attributor &
A,
unsigned COV) {
664 AA::RangeTy
Range(Pos, 8);
665 return funcRetrievesImplicitKernelArg(
A,
Range);
668 bool funcRetrievesHostcallPtr(Attributor &
A,
unsigned COV) {
670 AA::RangeTy
Range(Pos, 8);
671 return funcRetrievesImplicitKernelArg(
A,
Range);
674 bool funcRetrievesDefaultQueue(Attributor &
A,
unsigned COV) {
676 AA::RangeTy
Range(Pos, 8);
677 return funcRetrievesImplicitKernelArg(
A,
Range);
680 bool funcRetrievesCompletionAction(Attributor &
A,
unsigned COV) {
682 AA::RangeTy
Range(Pos, 8);
683 return funcRetrievesImplicitKernelArg(
A,
Range);
686 bool funcRetrievesHeapPtr(Attributor &
A,
unsigned COV) {
690 return funcRetrievesImplicitKernelArg(
A,
Range);
693 bool funcRetrievesQueuePtr(Attributor &
A,
unsigned COV) {
697 return funcRetrievesImplicitKernelArg(
A,
Range);
700 bool funcRetrievesImplicitKernelArg(Attributor &
A, AA::RangeTy
Range) {
712 const auto *PointerInfoAA =
A.getAAFor<AAPointerInfo>(
714 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
717 return PointerInfoAA->forallInterferingAccesses(
718 Range, [](
const AAPointerInfo::Access &Acc,
bool IsExact) {
723 bool UsedAssumedInformation =
false;
724 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
725 UsedAssumedInformation);
728 bool funcRetrievesLDSKernelId(Attributor &
A) {
733 bool UsedAssumedInformation =
false;
734 return !
A.checkForAllCallLikeInstructions(DoesNotRetrieve, *
this,
735 UsedAssumedInformation);
740 bool needFlatScratchInit(Attributor &
A) {
741 assert(isAssumed(FLAT_SCRATCH_INIT));
750 bool UsedAssumedInformation =
false;
751 if (!
A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *
this,
752 {Instruction::AddrSpaceCast},
753 UsedAssumedInformation))
757 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
761 for (
const Use &U :
I.operands()) {
763 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(
C))
785 return Callee->getIntrinsicID() !=
786 Intrinsic::amdgcn_addrspacecast_nonnull;
789 UsedAssumedInformation =
false;
793 return !
A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *
this,
794 UsedAssumedInformation);
798AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
801 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP,
A);
806struct AAAMDSizeRangeAttribute
807 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
808 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
812 AAAMDSizeRangeAttribute(
const IRPosition &IRP, Attributor &
A,
814 :
Base(IRP, 32), AttrName(AttrName) {}
817 void trackStatistics()
const override {}
819 template <
class AttributeImpl>
ChangeStatus updateImplImpl(Attributor &
A) {
822 auto CheckCallSite = [&](AbstractCallSite CS) {
825 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
827 const auto *CallerInfo =
A.getAAFor<AttributeImpl>(
829 if (!CallerInfo || !CallerInfo->isValidState())
838 bool AllCallSitesKnown =
true;
839 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
842 return indicatePessimisticFixpoint();
850 emitAttributeIfNotDefaultAfterClamp(Attributor &
A,
851 std::pair<unsigned, unsigned>
Default) {
853 unsigned Lower = getAssumed().getLower().getZExtValue();
854 unsigned Upper = getAssumed().getUpper().getZExtValue();
864 return ChangeStatus::UNCHANGED;
867 LLVMContext &Ctx =
F->getContext();
868 SmallString<10> Buffer;
869 raw_svector_ostream OS(Buffer);
871 return A.manifestAttrs(getIRPosition(),
872 {Attribute::get(Ctx, AttrName, OS.str())},
876 const std::string getAsStr(Attributor *)
const override {
878 raw_string_ostream OS(Str);
880 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
887struct AAAMDFlatWorkGroupSize :
public AAAMDSizeRangeAttribute {
888 AAAMDFlatWorkGroupSize(
const IRPosition &IRP, Attributor &
A)
889 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-flat-work-group-size") {}
893 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
895 bool HasAttr =
false;
896 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*
F);
897 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*
F);
899 if (
auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*
F)) {
903 if (*Attr != MaxRange) {
911 if (
Range == MaxRange)
915 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
916 IntegerRangeState IRS(CR);
920 indicateOptimisticFixpoint();
924 return updateImplImpl<AAAMDFlatWorkGroupSize>(
A);
928 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
933 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
934 return emitAttributeIfNotDefaultAfterClamp(
935 A, InfoCache.getMaximumFlatWorkGroupRange(*
F));
939 StringRef
getName()
const override {
return "AAAMDFlatWorkGroupSize"; }
942 const char *getIdAddr()
const override {
return &
ID; }
946 static bool classof(
const AbstractAttribute *AA) {
951 static const char ID;
954const char AAAMDFlatWorkGroupSize::ID = 0;
956AAAMDFlatWorkGroupSize &
957AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
960 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP,
A);
962 "AAAMDFlatWorkGroupSize is only valid for function position");
965struct TupleDecIntegerRangeState :
public AbstractState {
966 DecIntegerState<uint32_t>
X,
Y, Z;
968 bool isValidState()
const override {
969 return X.isValidState() &&
Y.isValidState() &&
Z.isValidState();
972 bool isAtFixpoint()
const override {
973 return X.isAtFixpoint() &&
Y.isAtFixpoint() &&
Z.isAtFixpoint();
977 return X.indicateOptimisticFixpoint() |
Y.indicateOptimisticFixpoint() |
978 Z.indicateOptimisticFixpoint();
982 return X.indicatePessimisticFixpoint() |
Y.indicatePessimisticFixpoint() |
983 Z.indicatePessimisticFixpoint();
986 TupleDecIntegerRangeState
operator^=(
const TupleDecIntegerRangeState &
Other) {
997 TupleDecIntegerRangeState &getAssumed() {
return *
this; }
998 const TupleDecIntegerRangeState &getAssumed()
const {
return *
this; }
1001using AAAMDMaxNumWorkgroupsState =
1002 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1005struct AAAMDMaxNumWorkgroups
1006 :
public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1007 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1009 AAAMDMaxNumWorkgroups(
const IRPosition &IRP, Attributor &
A) :
Base(IRP) {}
1013 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1015 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*
F);
1017 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1018 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1019 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1022 indicatePessimisticFixpoint();
1028 auto CheckCallSite = [&](AbstractCallSite CS) {
1031 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
1033 const auto *CallerInfo =
A.getAAFor<AAAMDMaxNumWorkgroups>(
1035 if (!CallerInfo || !CallerInfo->isValidState())
1043 bool AllCallSitesKnown =
true;
1044 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
1047 return indicatePessimisticFixpoint();
1053 static AAAMDMaxNumWorkgroups &createForPosition(
const IRPosition &IRP,
1058 LLVMContext &Ctx =
F->getContext();
1059 SmallString<32> Buffer;
1060 raw_svector_ostream OS(Buffer);
1061 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed();
1065 return A.manifestAttrs(
1067 {Attribute::get(Ctx,
"amdgpu-max-num-workgroups", OS.str())},
1071 StringRef
getName()
const override {
return "AAAMDMaxNumWorkgroups"; }
1073 const std::string getAsStr(Attributor *)
const override {
1074 std::string Buffer =
"AAAMDMaxNumWorkgroupsState[";
1075 raw_string_ostream OS(Buffer);
1076 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed()
1081 const char *getIdAddr()
const override {
return &
ID; }
1085 static bool classof(
const AbstractAttribute *AA) {
1089 void trackStatistics()
const override {}
1092 static const char ID;
1095const char AAAMDMaxNumWorkgroups::ID = 0;
1097AAAMDMaxNumWorkgroups &
1098AAAMDMaxNumWorkgroups::createForPosition(
const IRPosition &IRP, Attributor &
A) {
1100 return *
new (
A.Allocator) AAAMDMaxNumWorkgroups(IRP,
A);
1101 llvm_unreachable(
"AAAMDMaxNumWorkgroups is only valid for function position");
1105struct AAAMDWavesPerEU :
public AAAMDSizeRangeAttribute {
1106 AAAMDWavesPerEU(
const IRPosition &IRP, Attributor &
A)
1107 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-waves-per-eu") {}
1111 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1114 if (
auto Attr = InfoCache.getWavesPerEUAttr(*
F)) {
1115 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1116 1U, InfoCache.getMaxWavesPerEU(*
F)};
1117 if (*Attr != MaxWavesPerEURange) {
1118 auto [Min,
Max] = *Attr;
1119 ConstantRange
Range(APInt(32, Min), APInt(32, Max + 1));
1120 IntegerRangeState RangeState(
Range);
1121 this->getState() = RangeState;
1122 indicateOptimisticFixpoint();
1128 indicatePessimisticFixpoint();
1134 auto CheckCallSite = [&](AbstractCallSite CS) {
1138 <<
"->" <<
Func->getName() <<
'\n');
1141 const auto *CallerAA =
A.getAAFor<AAAMDWavesPerEU>(
1143 if (!CallerAA || !CallerAA->isValidState())
1146 ConstantRange Assumed = getAssumed();
1148 CallerAA->getAssumed().getLower().getZExtValue());
1150 CallerAA->getAssumed().getUpper().getZExtValue());
1151 ConstantRange
Range(APInt(32, Min), APInt(32, Max));
1152 IntegerRangeState RangeState(
Range);
1153 getState() = RangeState;
1154 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1155 : ChangeStatus::CHANGED;
1160 bool AllCallSitesKnown =
true;
1161 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
1162 return indicatePessimisticFixpoint();
1168 static AAAMDWavesPerEU &createForPosition(
const IRPosition &IRP,
1173 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1174 return emitAttributeIfNotDefaultAfterClamp(
1175 A, {1U, InfoCache.getMaxWavesPerEU(*
F)});
1179 StringRef
getName()
const override {
return "AAAMDWavesPerEU"; }
1182 const char *getIdAddr()
const override {
return &
ID; }
1186 static bool classof(
const AbstractAttribute *AA) {
1191 static const char ID;
1194const char AAAMDWavesPerEU::ID = 0;
1196AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(
const IRPosition &IRP,
1199 return *
new (
A.Allocator) AAAMDWavesPerEU(IRP,
A);
1203static bool inlineAsmUsesAGPRs(
const InlineAsm *IA) {
1204 for (
const auto &CI :
IA->ParseConstraints()) {
1205 for (StringRef Code : CI.Codes) {
1206 Code.consume_front(
"{");
1207 if (
Code.starts_with(
"a"))
1217struct AAAMDGPUNoAGPR
1218 :
public IRAttribute<Attribute::NoUnwind,
1219 StateWrapper<BooleanState, AbstractAttribute>,
1221 AAAMDGPUNoAGPR(
const IRPosition &IRP, Attributor &
A) : IRAttribute(IRP) {}
1223 static AAAMDGPUNoAGPR &createForPosition(
const IRPosition &IRP,
1226 return *
new (
A.Allocator) AAAMDGPUNoAGPR(IRP,
A);
1232 auto [MinNumAGPR, MaxNumAGPR] =
1235 if (MinNumAGPR == 0)
1236 indicateOptimisticFixpoint();
1239 const std::string getAsStr(Attributor *
A)
const override {
1240 return getAssumed() ?
"amdgpu-no-agpr" :
"amdgpu-maybe-agpr";
1243 void trackStatistics()
const override {}
1250 const Value *CalleeOp = CB.getCalledOperand();
1254 return !inlineAsmUsesAGPRs(IA);
1260 if (
Callee->isIntrinsic())
1264 const auto *CalleeInfo =
A.getAAFor<AAAMDGPUNoAGPR>(
1266 return CalleeInfo && CalleeInfo->isValidState() &&
1267 CalleeInfo->getAssumed();
1270 bool UsedAssumedInformation =
false;
1271 if (!
A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *
this,
1272 UsedAssumedInformation))
1273 return indicatePessimisticFixpoint();
1274 return ChangeStatus::UNCHANGED;
1279 return ChangeStatus::UNCHANGED;
1280 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1281 return A.manifestAttrs(getIRPosition(),
1282 {Attribute::get(Ctx,
"amdgpu-agpr-alloc",
"0")});
1285 StringRef
getName()
const override {
return "AAAMDGPUNoAGPR"; }
1286 const char *getIdAddr()
const override {
return &
ID; }
1290 static bool classof(
const AbstractAttribute *AA) {
1294 static const char ID;
1297const char AAAMDGPUNoAGPR::ID = 0;
1299static bool runImpl(
Module &M, AnalysisGetter &AG, TargetMachine &TM,
1300 AMDGPUAttributorOptions
Options,
1302 SetVector<Function *> Functions;
1303 for (Function &
F : M) {
1304 if (!
F.isIntrinsic())
1308 CallGraphUpdater CGUpdater;
1310 AMDGPUInformationCache InfoCache(M, AG,
Allocator,
nullptr, TM);
1311 DenseSet<const char *>
Allowed(
1312 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1314 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1319 AttributorConfig AC(CGUpdater);
1320 AC.IsClosedWorldModule =
Options.IsClosedWorld;
1322 AC.IsModulePass =
true;
1323 AC.DefaultInitializeLiveInternals =
false;
1324 AC.IndirectCalleeSpecializationCallback =
1325 [](Attributor &
A,
const AbstractAttribute &AA, CallBase &CB,
1330 AC.IPOAmendableCB = [](
const Function &
F) {
1331 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1334 Attributor
A(Functions, InfoCache, AC);
1337 StringRef LTOPhaseStr =
to_string(LTOPhase);
1338 dbgs() <<
"[AMDGPUAttributor] Running at phase " << LTOPhaseStr <<
'\n'
1339 <<
"[AMDGPUAttributor] Module " <<
M.getName() <<
" is "
1340 << (AC.IsClosedWorldModule ?
"" :
"not ")
1341 <<
"assumed to be a closed world.\n";
1344 for (
auto *
F : Functions) {
1349 CallingConv::ID CC =
F->getCallingConv();
1358 Ptr = LI->getPointerOperand();
1360 Ptr =
SI->getPointerOperand();
1362 Ptr = RMW->getPointerOperand();
1364 Ptr = CmpX->getPointerOperand();
1373 return A.run() == ChangeStatus::CHANGED;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
static bool castRequiresQueuePtr(unsigned SrcAS)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
uint64_t getZExtValue() const
Get zero extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getAddressSpace() const
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Helper to describe and deal with positions in the LLVM-IR.
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
@ IRP_FUNCTION
An attribute for a function (scope).
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Kind getPositionKind() const
Return the associated position kind.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Helper to tie a abstract state implementation to an abstract attribute.