16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
21#define DEBUG_TYPE "amdgpu-attributor"
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34#include "AMDGPUAttributes.def"
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
42#include "AMDGPUAttributes.def"
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49#include "AMDGPUAttributes.def"
59 bool HasApertureRegs,
bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
62 case Intrinsic::amdgcn_workitem_id_x:
65 case Intrinsic::amdgcn_workgroup_id_x:
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_lds_kernel_id:
82 case Intrinsic::amdgcn_dispatch_ptr:
84 case Intrinsic::amdgcn_dispatch_id:
86 case Intrinsic::amdgcn_implicitarg_ptr:
87 return IMPLICIT_ARG_PTR;
90 case Intrinsic::amdgcn_queue_ptr:
93 case Intrinsic::amdgcn_is_shared:
94 case Intrinsic::amdgcn_is_private:
102 case Intrinsic::trap:
103 case Intrinsic::debugtrap:
104 case Intrinsic::ubsantrap:
105 if (SupportsGetDoorBellID)
131 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
132 F.hasFnAttribute(Attribute::SanitizeThread) ||
133 F.hasFnAttribute(Attribute::SanitizeMemory) ||
134 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
135 F.hasFnAttribute(Attribute::SanitizeMemTag);
149 enum ConstantStatus :
uint8_t {
152 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
153 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
154 ADDR_SPACE_CAST_BOTH_TO_FLAT =
155 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
161 return ST.hasApertureRegs();
165 bool supportsGetDoorbellID(
Function &
F) {
167 return ST.supportsGetDoorbellID();
170 std::optional<std::pair<unsigned, unsigned>>
171 getFlatWorkGroupSizeAttr(
const Function &
F)
const {
175 return std::make_pair(
R->first, *(
R->second));
178 std::pair<unsigned, unsigned>
179 getDefaultFlatWorkGroupSize(
const Function &
F)
const {
181 return ST.getDefaultFlatWorkGroupSize(
F.getCallingConv());
184 std::pair<unsigned, unsigned>
185 getMaximumFlatWorkGroupRange(
const Function &
F) {
187 return {
ST.getMinFlatWorkGroupSize(),
ST.getMaxFlatWorkGroupSize()};
192 return ST.getMaxNumWorkGroups(
F);
196 unsigned getCodeObjectVersion()
const {
return CodeObjectVersion; }
201 std::pair<unsigned, unsigned>
203 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
205 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(
F),
F);
208 std::optional<std::pair<unsigned, unsigned>>
216 Val->second =
ST.getMaxWavesPerEU();
218 return std::make_pair(Val->first, *(Val->second));
221 std::pair<unsigned, unsigned>
222 getEffectiveWavesPerEU(
const Function &
F,
223 std::pair<unsigned, unsigned> WavesPerEU,
224 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
226 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
232 return ST.getMaxWavesPerEU();
245 if (
CE->getOpcode() == Instruction::AddrSpaceCast) {
246 unsigned SrcAS =
CE->getOperand(0)->getType()->getPointerAddressSpace();
248 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
250 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
258 static unsigned getLDSSize(
const Function &
F) {
260 {0, UINT32_MAX},
true)
267 auto It = ConstantStatus.find(
C);
268 if (It != ConstantStatus.end())
275 if (
const auto *CE = dyn_cast<ConstantExpr>(
C))
276 Result |= visitConstExpr(CE);
278 for (
const Use &U :
C->operands()) {
279 const auto *OpC = dyn_cast<Constant>(U);
280 if (!OpC || !Visited.
insert(OpC).second)
283 Result |= getConstantAccess(OpC, Visited);
292 bool HasAperture = hasApertureRegs(Fn);
295 if (!IsNonEntryFunc && HasAperture)
302 if (IsNonEntryFunc && (
Access & DS_GLOBAL))
305 return !HasAperture && (
Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
308 bool checkConstForAddrSpaceCastFromPrivate(
const Constant *
C) {
311 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
317 const unsigned CodeObjectVersion;
320struct AAAMDAttributes
321 :
public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
329 static AAAMDAttributes &createForPosition(
const IRPosition &IRP,
336 const char *getIdAddr()
const override {
return &
ID; }
345 static const char ID;
347const char AAAMDAttributes::ID = 0;
349struct AAUniformWorkGroupSize
350 :
public StateWrapper<BooleanState, AbstractAttribute> {
355 static AAUniformWorkGroupSize &createForPosition(
const IRPosition &IRP,
362 const char *getIdAddr()
const override {
return &
ID; }
371 static const char ID;
373const char AAUniformWorkGroupSize::ID = 0;
375struct AAUniformWorkGroupSizeFunction :
public AAUniformWorkGroupSize {
377 : AAUniformWorkGroupSize(IRP,
A) {}
386 bool InitialValue =
false;
387 if (
F->hasFnAttribute(
"uniform-work-group-size"))
389 F->getFnAttribute(
"uniform-work-group-size").getValueAsString() ==
393 indicateOptimisticFixpoint();
395 indicatePessimisticFixpoint();
404 <<
"->" << getAssociatedFunction()->
getName() <<
"\n");
406 const auto *CallerInfo =
A.getAAFor<AAUniformWorkGroupSize>(
408 if (!CallerInfo || !CallerInfo->isValidState())
412 CallerInfo->getState());
417 bool AllCallSitesKnown =
true;
418 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
419 return indicatePessimisticFixpoint();
426 LLVMContext &Ctx = getAssociatedFunction()->getContext();
429 getAssumed() ?
"true" :
"false"));
430 return A.manifestAttrs(getIRPosition(), AttrList,
434 bool isValidState()
const override {
439 const std::string getAsStr(
Attributor *)
const override {
440 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) +
"]";
444 void trackStatistics()
const override {}
447AAUniformWorkGroupSize &
448AAUniformWorkGroupSize::createForPosition(
const IRPosition &IRP,
451 return *
new (
A.Allocator) AAUniformWorkGroupSizeFunction(IRP,
A);
453 "AAUniformWorkGroupSize is only valid for function position");
456struct AAAMDAttributesFunction :
public AAAMDAttributes {
458 : AAAMDAttributes(IRP,
A) {}
467 removeAssumedBits(IMPLICIT_ARG_PTR);
468 removeAssumedBits(HOSTCALL_PTR);
473 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
476 if (
F->hasFnAttribute(Attr.second))
477 addKnownBits(Attr.first);
480 if (
F->isDeclaration())
486 indicatePessimisticFixpoint();
494 auto OrigAssumed = getAssumed();
498 *
this, this->getIRPosition(), DepClassTy::REQUIRED);
501 return indicatePessimisticFixpoint();
505 bool NeedsImplicit =
false;
506 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
507 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
508 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*
F);
509 unsigned COV = InfoCache.getCodeObjectVersion();
514 const AAAMDAttributes *AAAMD =
A.getAAFor<AAAMDAttributes>(
516 if (!AAAMD || !AAAMD->isValidState())
517 return indicatePessimisticFixpoint();
522 bool NonKernelOnly =
false;
525 HasApertureRegs, SupportsGetDoorbellID, COV);
527 if ((IsNonEntryFunc || !NonKernelOnly))
528 removeAssumedBits(AttrMask);
534 removeAssumedBits(IMPLICIT_ARG_PTR);
536 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(
A)) {
540 removeAssumedBits(IMPLICIT_ARG_PTR);
542 removeAssumedBits(QUEUE_PTR);
545 if (funcRetrievesMultigridSyncArg(
A, COV)) {
546 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
547 "multigrid_sync_arg needs implicitarg_ptr");
548 removeAssumedBits(MULTIGRID_SYNC_ARG);
551 if (funcRetrievesHostcallPtr(
A, COV)) {
552 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"hostcall needs implicitarg_ptr");
553 removeAssumedBits(HOSTCALL_PTR);
556 if (funcRetrievesHeapPtr(
A, COV)) {
557 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"heap_ptr needs implicitarg_ptr");
558 removeAssumedBits(HEAP_PTR);
561 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(
A, COV)) {
562 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"queue_ptr needs implicitarg_ptr");
563 removeAssumedBits(QUEUE_PTR);
566 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(
A)) {
567 removeAssumedBits(LDS_KERNEL_ID);
570 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(
A, COV))
571 removeAssumedBits(DEFAULT_QUEUE);
573 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(
A, COV))
574 removeAssumedBits(COMPLETION_ACTION);
576 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(
A))
577 removeAssumedBits(FLAT_SCRATCH_INIT);
579 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
580 : ChangeStatus::UNCHANGED;
585 LLVMContext &Ctx = getAssociatedFunction()->getContext();
588 if (isKnown(Attr.first))
592 return A.manifestAttrs(getIRPosition(), AttrList,
596 const std::string getAsStr(
Attributor *)
const override {
601 if (isAssumed(Attr.first))
602 OS <<
' ' << Attr.second;
608 void trackStatistics()
const override {}
615 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
617 bool NeedsQueuePtr =
false;
622 NeedsQueuePtr =
true;
628 bool HasApertureRegs = InfoCache.hasApertureRegs(*
F);
634 if (!HasApertureRegs) {
635 bool UsedAssumedInformation =
false;
636 A.checkForAllInstructions(CheckAddrSpaceCasts, *
this,
637 {Instruction::AddrSpaceCast},
638 UsedAssumedInformation);
645 if (!IsNonEntryFunc && HasApertureRegs)
650 for (
const Use &U :
I.operands()) {
651 if (
const auto *
C = dyn_cast<Constant>(U)) {
652 if (InfoCache.needsQueuePtr(
C, *
F))
662 bool funcRetrievesMultigridSyncArg(
Attributor &
A,
unsigned COV) {
665 return funcRetrievesImplicitKernelArg(
A,
Range);
668 bool funcRetrievesHostcallPtr(
Attributor &
A,
unsigned COV) {
671 return funcRetrievesImplicitKernelArg(
A,
Range);
674 bool funcRetrievesDefaultQueue(
Attributor &
A,
unsigned COV) {
677 return funcRetrievesImplicitKernelArg(
A,
Range);
680 bool funcRetrievesCompletionAction(
Attributor &
A,
unsigned COV) {
683 return funcRetrievesImplicitKernelArg(
A,
Range);
686 bool funcRetrievesHeapPtr(
Attributor &
A,
unsigned COV) {
690 return funcRetrievesImplicitKernelArg(
A,
Range);
693 bool funcRetrievesQueuePtr(
Attributor &
A,
unsigned COV) {
697 return funcRetrievesImplicitKernelArg(
A,
Range);
708 auto &
Call = cast<CallBase>(
I);
709 if (
Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
714 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
723 bool UsedAssumedInformation =
false;
724 return !
A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *
this,
725 UsedAssumedInformation);
730 auto &
Call = cast<CallBase>(
I);
731 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
733 bool UsedAssumedInformation =
false;
734 return !
A.checkForAllCallLikeInstructions(DoesNotRetrieve, *
this,
735 UsedAssumedInformation);
741 assert(isAssumed(FLAT_SCRATCH_INIT));
746 return cast<AddrSpaceCastInst>(
I).getSrcAddressSpace() !=
750 bool UsedAssumedInformation =
false;
751 if (!
A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *
this,
752 {Instruction::AddrSpaceCast},
753 UsedAssumedInformation))
757 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
761 for (
const Use &U :
I.operands()) {
762 if (
const auto *
C = dyn_cast<Constant>(U)) {
763 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(
C))
774 const auto &CB = cast<CallBase>(
I);
785 return Callee->getIntrinsicID() !=
786 Intrinsic::amdgcn_addrspacecast_nonnull;
789 UsedAssumedInformation =
false;
793 return !
A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *
this,
794 UsedAssumedInformation);
798AAAMDAttributes &AAAMDAttributes::createForPosition(
const IRPosition &IRP,
801 return *
new (
A.Allocator) AAAMDAttributesFunction(IRP,
A);
806struct AAAMDSizeRangeAttribute
807 :
public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
814 :
Base(IRP, 32), AttrName(AttrName) {}
817 void trackStatistics()
const override {}
825 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
829 if (!CallerInfo || !CallerInfo->isValidState())
838 bool AllCallSitesKnown =
true;
839 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
842 return indicatePessimisticFixpoint();
851 std::pair<unsigned, unsigned>
Default) {
853 unsigned Lower = getAssumed().getLower().getZExtValue();
854 unsigned Upper = getAssumed().getUpper().getZExtValue();
871 return A.manifestAttrs(getIRPosition(),
876 const std::string getAsStr(
Attributor *)
const override {
880 OS << getAssumed().getLower() <<
',' << getAssumed().getUpper() - 1;
887struct AAAMDFlatWorkGroupSize :
public AAAMDSizeRangeAttribute {
889 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-flat-work-group-size") {}
893 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
895 bool HasAttr =
false;
896 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*
F);
897 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*
F);
899 if (
auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*
F)) {
903 if (*Attr != MaxRange) {
911 if (
Range == MaxRange)
920 indicateOptimisticFixpoint();
924 return updateImplImpl<AAAMDFlatWorkGroupSize>(
A);
928 static AAAMDFlatWorkGroupSize &createForPosition(
const IRPosition &IRP,
933 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
934 return emitAttributeIfNotDefaultAfterClamp(
935 A, InfoCache.getMaximumFlatWorkGroupRange(*
F));
942 const char *getIdAddr()
const override {
return &
ID; }
951 static const char ID;
954const char AAAMDFlatWorkGroupSize::ID = 0;
956AAAMDFlatWorkGroupSize &
957AAAMDFlatWorkGroupSize::createForPosition(
const IRPosition &IRP,
960 return *
new (
A.Allocator) AAAMDFlatWorkGroupSize(IRP,
A);
962 "AAAMDFlatWorkGroupSize is only valid for function position");
969 return X.isValidState() &&
Y.isValidState() &&
Z.isValidState();
973 return X.isAtFixpoint() &&
Y.isAtFixpoint() &&
Z.isAtFixpoint();
977 return X.indicateOptimisticFixpoint() |
Y.indicateOptimisticFixpoint() |
978 Z.indicateOptimisticFixpoint();
982 return X.indicatePessimisticFixpoint() |
Y.indicatePessimisticFixpoint() |
983 Z.indicatePessimisticFixpoint();
986 TupleDecIntegerRangeState
operator^=(
const TupleDecIntegerRangeState &
Other) {
997 TupleDecIntegerRangeState &getAssumed() {
return *
this; }
998 const TupleDecIntegerRangeState &getAssumed()
const {
return *
this; }
1001using AAAMDMaxNumWorkgroupsState =
1005struct AAAMDMaxNumWorkgroups
1006 :
public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1013 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1017 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1018 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1019 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1022 indicatePessimisticFixpoint();
1031 <<
"->" << getAssociatedFunction()->
getName() <<
'\n');
1033 const auto *CallerInfo =
A.getAAFor<AAAMDMaxNumWorkgroups>(
1035 if (!CallerInfo || !CallerInfo->isValidState())
1043 bool AllCallSitesKnown =
true;
1044 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
1047 return indicatePessimisticFixpoint();
1053 static AAAMDMaxNumWorkgroups &createForPosition(
const IRPosition &IRP,
1061 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed();
1065 return A.manifestAttrs(
1073 const std::string getAsStr(
Attributor *)
const override {
1074 std::string Buffer =
"AAAMDMaxNumWorkgroupsState[";
1076 OS <<
X.getAssumed() <<
',' <<
Y.getAssumed() <<
',' <<
Z.getAssumed()
1081 const char *getIdAddr()
const override {
return &
ID; }
1089 void trackStatistics()
const override {}
1092 static const char ID;
1095const char AAAMDMaxNumWorkgroups::ID = 0;
1097AAAMDMaxNumWorkgroups &
1100 return *
new (
A.Allocator) AAAMDMaxNumWorkgroups(IRP,
A);
1101 llvm_unreachable(
"AAAMDMaxNumWorkgroups is only valid for function position");
1105struct AAAMDWavesPerEU :
public AAAMDSizeRangeAttribute {
1107 : AAAMDSizeRangeAttribute(IRP,
A,
"amdgpu-waves-per-eu") {}
1111 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1114 if (
auto Attr = InfoCache.getWavesPerEUAttr(*
F)) {
1115 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1116 1U, InfoCache.getMaxWavesPerEU(*
F)};
1117 if (*Attr != MaxWavesPerEURange) {
1118 auto [Min,
Max] = *Attr;
1121 this->getState() = RangeState;
1122 indicateOptimisticFixpoint();
1128 indicatePessimisticFixpoint();
1138 <<
"->" <<
Func->getName() <<
'\n');
1141 const auto *CallerAA =
A.getAAFor<AAAMDWavesPerEU>(
1143 if (!CallerAA || !CallerAA->isValidState())
1148 CallerAA->getAssumed().getLower().getZExtValue());
1150 CallerAA->getAssumed().getUpper().getZExtValue());
1153 getState() = RangeState;
1154 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1155 : ChangeStatus::CHANGED;
1160 bool AllCallSitesKnown =
true;
1161 if (!
A.checkForAllCallSites(CheckCallSite, *
this,
true, AllCallSitesKnown))
1162 return indicatePessimisticFixpoint();
1168 static AAAMDWavesPerEU &createForPosition(
const IRPosition &IRP,
1173 auto &InfoCache =
static_cast<AMDGPUInformationCache &
>(
A.getInfoCache());
1174 return emitAttributeIfNotDefaultAfterClamp(
1175 A, {1U, InfoCache.getMaxWavesPerEU(*
F)});
1182 const char *getIdAddr()
const override {
return &
ID; }
1191 static const char ID;
1194const char AAAMDWavesPerEU::ID = 0;
1196AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(
const IRPosition &IRP,
1199 return *
new (
A.Allocator) AAAMDWavesPerEU(IRP,
A);
1203static bool inlineAsmUsesAGPRs(
const InlineAsm *IA) {
1204 for (
const auto &CI :
IA->ParseConstraints()) {
1206 Code.consume_front(
"{");
1207 if (
Code.starts_with(
"a"))
1217struct AAAMDGPUNoAGPR
1219 StateWrapper<BooleanState, AbstractAttribute>,
1223 static AAAMDGPUNoAGPR &createForPosition(
const IRPosition &IRP,
1226 return *
new (
A.Allocator) AAAMDGPUNoAGPR(IRP,
A);
1232 auto [MinNumAGPR, MaxNumAGPR] =
1235 if (MinNumAGPR == 0)
1236 indicateOptimisticFixpoint();
1239 const std::string getAsStr(
Attributor *
A)
const override {
1240 return getAssumed() ?
"amdgpu-no-agpr" :
"amdgpu-maybe-agpr";
1243 void trackStatistics()
const override {}
1249 const auto &CB = cast<CallBase>(
I);
1250 const Value *CalleeOp = CB.getCalledOperand();
1253 if (
const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1254 return !inlineAsmUsesAGPRs(IA);
1260 if (
Callee->isIntrinsic())
1264 const auto *
CalleeInfo =
A.getAAFor<AAAMDGPUNoAGPR>(
1270 bool UsedAssumedInformation =
false;
1271 if (!
A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *
this,
1272 UsedAssumedInformation))
1273 return indicatePessimisticFixpoint();
1280 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1281 return A.manifestAttrs(getIRPosition(),
1286 const char *getIdAddr()
const override {
return &
ID; }
1294 static const char ID;
1297const char AAAMDGPUNoAGPR::ID = 0;
1304 if (!
F.isIntrinsic())
1310 AMDGPUInformationCache InfoCache(M, AG, Allocator,
nullptr, TM);
1312 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1314 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1320 AC.IsClosedWorldModule =
Options.IsClosedWorld;
1322 AC.IsModulePass =
true;
1323 AC.DefaultInitializeLiveInternals =
false;
1324 AC.IndirectCalleeSpecializationCallback =
1330 AC.IPOAmendableCB = [](
const Function &
F) {
1338 dbgs() <<
"[AMDGPUAttributor] Running at phase " << LTOPhaseStr <<
'\n'
1339 <<
"[AMDGPUAttributor] Module " <<
M.getName() <<
" is "
1340 << (AC.IsClosedWorldModule ?
"" :
"not ")
1341 <<
"assumed to be a closed world.\n";
1344 for (
auto *
F : Functions) {
1357 if (
auto *LI = dyn_cast<LoadInst>(&
I))
1358 Ptr = LI->getPointerOperand();
1359 else if (
auto *SI = dyn_cast<StoreInst>(&
I))
1360 Ptr =
SI->getPointerOperand();
1361 else if (
auto *RMW = dyn_cast<AtomicRMWInst>(&
I))
1362 Ptr = RMW->getPointerOperand();
1363 else if (
auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&
I))
1364 Ptr = CmpX->getPointerOperand();
1373 return A.run() == ChangeStatus::CHANGED;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
static bool castRequiresQueuePtr(unsigned SrcAS)
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool runImpl(Function &F, const TargetLowering &TLI)
AMD GCN specific subclass of TargetSubtarget.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
This class represents a conversion between pointers from one address space to another.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
Allocate memory in an ever growing pool, as if by bump-pointer.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
A constant value that is initialized with an expression using other constant values.
This class represents a range of values.
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Implements a dense probed hash-table based set.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getAddressSpace() const
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
This is an important class for using LLVM in a threaded context.
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A vector that has set insertion semantics.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
A Use represents the edge between a Value definition and its users.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
LLVM Value Representation.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
NodeAddr< CodeNode * > Code
This is an optimization pass for GlobalISel generic memory operations.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const char * to_string(ThinOrFullLTOPhase Phase)
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
@ REQUIRED
The target cannot be valid if the source is not.
@ Default
The result values are uniform if and only if all operands are uniform.
An abstract interface for address space information.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
An abstract state for querying live call edges.
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
An abstract interface for potential address space information.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
An abstract interface for struct information.
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Base struct for all "concrete attribute" deductions.
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
An interface to query the internal state of an abstract attribute.
virtual ChangeStatus indicatePessimisticFixpoint()=0
Indicate that the abstract state should converge to the pessimistic state.
virtual bool isAtFixpoint() const =0
Return if this abstract state is fixed, thus does not need to be updated if information changes as it...
virtual bool isValidState() const =0
Return if this abstract state is in a valid state.
virtual ChangeStatus indicateOptimisticFixpoint()=0
Indicate that the abstract state should converge to the optimistic state.
Wrapper for FunctionAnalysisManager.
Configuration for the Attributor.
The fixpoint analysis framework that orchestrates the attribute deduction.
Class to accumulate and hold information about a callee.
Specialization of the integer state for a decreasing value, hence 0 is the best state and ~0u the wor...
Helper class that provides common functionality to manifest IR attributes.
ChangeStatus manifest(Attributor &A) override
See AbstractAttribute::manifest(...).
Helper to describe and deal with positions in the LLVM-IR.
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
@ IRP_FUNCTION
An attribute for a function (scope).
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Kind getPositionKind() const
Return the associated position kind.
State for an integer range.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
Helper to tie a abstract state implementation to an abstract attribute.
StateType & getState() override
See AbstractAttribute::getState(...).