197#include "llvm/IR/IntrinsicsAMDGPU.h"
214#define DEBUG_TYPE "amdgpu-lower-module-lds"
217using namespace AMDGPU;
222 "amdgpu-super-align-lds-globals",
223 cl::desc(
"Increase alignment of LDS if it is not on align boundary"),
226enum class LoweringKind { module, table, kernel, hybrid };
228 "amdgpu-lower-module-lds-strategy",
232 clEnumValN(LoweringKind::table,
"table",
"Lower via table lookup"),
233 clEnumValN(LoweringKind::module,
"module",
"Lower via module struct"),
235 LoweringKind::kernel,
"kernel",
236 "Lower variables reachable from one kernel, otherwise abort"),
238 "Lower via mixture of above strategies")));
240template <
typename T> std::vector<T> sortByName(std::vector<T> &&V) {
241 llvm::sort(V, [](
const auto *L,
const auto *R) {
242 return L->getName() < R->getName();
244 return {std::move(V)};
247class AMDGPULowerModuleLDS {
251 removeLocalVarsFromUsedLists(
Module &M,
257 LocalVarsSet.
insert(cast<Constant>(LocalVar->stripPointerCasts()));
263 LocalVar->removeDeadConstantUsers();
288 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
291 Func->getParent(), Intrinsic::donothing, {});
293 Value *UseInstance[1] = {
303 struct LDSVariableReplacement {
313 static Constant *getAddressesOfVariablesInKernel(
323 ArrayType *KernelOffsetsType = ArrayType::get(I32, Variables.
size());
327 auto ConstantGepIt = LDSVarsToConstantGEP.
find(GV);
328 if (ConstantGepIt != LDSVarsToConstantGEP.
end()) {
330 Elements.push_back(elt);
342 if (Variables.
empty()) {
347 const size_t NumberVariables = Variables.
size();
348 const size_t NumberKernels = kernels.
size();
354 ArrayType::get(KernelOffsetsType, NumberKernels);
357 std::vector<Constant *> overallConstantExprElts(NumberKernels);
358 for (
size_t i = 0; i < NumberKernels; i++) {
359 auto Replacement = KernelToReplacement.
find(kernels[i]);
360 overallConstantExprElts[i] =
361 (Replacement == KernelToReplacement.
end())
363 : getAddressesOfVariablesInKernel(
364 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
379 Value *OptionalIndex) {
383 auto *
I = cast<Instruction>(U.getUser());
385 Value *tableKernelIndex = getTableLookupKernelIndex(M,
I->getFunction());
387 if (
auto *Phi = dyn_cast<PHINode>(
I)) {
395 ConstantInt::get(I32, 0),
402 LookupTable->getValueType(), LookupTable, GEPIdx, GV->
getName());
412 void replaceUsesInInstructionsWithTableLookup(
420 for (
size_t Index = 0; Index < ModuleScopeVariables.
size(); Index++) {
421 auto *GV = ModuleScopeVariables[Index];
424 auto *
I = dyn_cast<Instruction>(U.getUser());
428 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
429 ConstantInt::get(I32, Index));
440 if (VariableSet.
empty())
443 for (
Function &Func : M.functions()) {
448 KernelSet.insert(&Func);
458 chooseBestVariableForModuleStrategy(
const DataLayout &
DL,
464 size_t UserCount = 0;
467 CandidateTy() =
default;
470 : GV(GV), UserCount(UserCount),
Size(AllocSize) {}
474 if (UserCount <
Other.UserCount) {
477 if (UserCount >
Other.UserCount) {
495 CandidateTy MostUsed;
497 for (
auto &K : LDSVars) {
499 if (K.second.size() <= 1) {
504 CandidateTy Candidate(
507 if (MostUsed < Candidate)
508 MostUsed = Candidate;
532 auto [It, Inserted] = tableKernelIndexCache.
try_emplace(
F);
534 auto InsertAt =
F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
537 It->second = Builder.
CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
543 static std::vector<Function *> assignLDSKernelIDToEachKernel(
551 std::vector<Function *> OrderedKernels;
552 if (!KernelsThatAllocateTableLDS.
empty() ||
553 !KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
555 for (
Function &Func : M->functions()) {
556 if (Func.isDeclaration())
561 if (KernelsThatAllocateTableLDS.
contains(&Func) ||
562 KernelsThatIndirectlyAllocateDynamicLDS.
contains(&Func)) {
564 OrderedKernels.push_back(&Func);
569 OrderedKernels = sortByName(std::move(OrderedKernels));
575 if (OrderedKernels.size() > UINT32_MAX) {
580 for (
size_t i = 0; i < OrderedKernels.size(); i++) {
584 OrderedKernels[i]->setMetadata(
"llvm.amdgcn.lds.kernel.id",
588 return OrderedKernels;
591 static void partitionVariablesIntoIndirectStrategies(
600 LoweringKindLoc != LoweringKind::hybrid
602 : chooseBestVariableForModuleStrategy(
603 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
608 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
611 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
617 assert(K.second.size() != 0);
620 DynamicVariables.
insert(GV);
624 switch (LoweringKindLoc) {
625 case LoweringKind::module:
626 ModuleScopeVariables.insert(GV);
629 case LoweringKind::table:
630 TableLookupVariables.
insert(GV);
633 case LoweringKind::kernel:
634 if (K.second.size() == 1) {
635 KernelAccessVariables.
insert(GV);
639 "cannot lower LDS '" + GV->
getName() +
640 "' to kernel access as it is reachable from multiple kernels");
644 case LoweringKind::hybrid: {
645 if (GV == HybridModuleRoot) {
646 assert(K.second.size() != 1);
647 ModuleScopeVariables.insert(GV);
648 }
else if (K.second.size() == 1) {
649 KernelAccessVariables.
insert(GV);
650 }
else if (
set_is_subset(K.second, HybridModuleRootKernels)) {
651 ModuleScopeVariables.insert(GV);
653 TableLookupVariables.
insert(GV);
662 assert(ModuleScopeVariables.
size() + TableLookupVariables.
size() +
663 KernelAccessVariables.
size() + DynamicVariables.
size() ==
664 LDSToKernelsThatNeedToAccessItIndirectly.size());
677 if (ModuleScopeVariables.
empty()) {
683 LDSVariableReplacement ModuleScopeReplacement =
684 createLDSVariableReplacement(M,
"llvm.amdgcn.module.lds",
685 ModuleScopeVariables);
689 cast<Constant>(ModuleScopeReplacement.SGV),
690 PointerType::getUnqual(Ctx)))});
693 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
696 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
699 replaceLDSVariablesWithStruct(
700 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
713 for (
Function &Func : M.functions()) {
717 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
718 replaceLDSVariablesWithStruct(
719 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
728 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
732 return ModuleScopeReplacement.SGV;
736 lowerKernelScopeStructVariables(
745 for (
Function &Func : M.functions()) {
754 KernelUsedVariables.
insert(v);
762 KernelUsedVariables.
insert(v);
768 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
770 KernelUsedVariables.
erase(v);
774 if (KernelUsedVariables.
empty()) {
786 if (!Func.hasName()) {
790 std::string VarName =
791 (
Twine(
"llvm.amdgcn.kernel.") + Func.getName() +
".lds").str();
794 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
802 markUsedByKernel(&Func, Replacement.SGV);
805 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
806 KernelToReplacement[&Func] = Replacement;
809 replaceLDSVariablesWithStruct(
810 M, KernelUsedVariables, Replacement, [&Func](
Use &U) {
812 return I &&
I->getFunction() == &Func;
815 return KernelToReplacement;
835 Align MaxDynamicAlignment(1);
839 MaxDynamicAlignment =
845 UpdateMaxAlignment(GV);
849 UpdateMaxAlignment(GV);
858 N->setAlignment(MaxDynamicAlignment);
868 std::vector<Function *>
const &OrderedKernels) {
870 if (!KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
875 std::vector<Constant *> newDynamicLDS;
878 for (
auto &
func : OrderedKernels) {
880 if (KernelsThatIndirectlyAllocateDynamicLDS.
contains(
func)) {
882 if (!
func->hasName()) {
887 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo,
func);
889 KernelToCreatedDynamicLDS[
func] =
N;
891 markUsedByKernel(
func,
N);
895 emptyCharArray,
N, ConstantInt::get(I32, 0),
true);
901 assert(OrderedKernels.size() == newDynamicLDS.size());
903 ArrayType *t = ArrayType::get(I32, newDynamicLDS.size());
907 "llvm.amdgcn.dynlds.offset.table",
nullptr,
912 auto *
I = dyn_cast<Instruction>(U.getUser());
918 replaceUseWithTableLookup(M, Builder, table, GV, U,
nullptr);
922 return KernelToCreatedDynamicLDS;
927 bool NeedsReplacement =
false;
929 if (
auto *
I = dyn_cast<Instruction>(U.getUser())) {
932 NeedsReplacement =
true;
937 if (!NeedsReplacement)
946 if (
auto *
I = dyn_cast<Instruction>(U.getUser())) {
949 U.getUser()->replaceUsesOfWith(GV, NewGV);
956 bool lowerSpecialLDSVariables(
959 bool Changed =
false;
962 int NumAbsolutes = 0;
963 std::vector<GlobalVariable *> OrderedGVs;
964 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
971 if (LDSToKernelsThatNeedToAccessItIndirectly[GV].
size() > 1) {
972 OrderedGVs.push_back(GV);
978 LDSToKernelsThatNeedToAccessItIndirectly.
erase(GV);
980 OrderedGVs = sortByName(std::move(OrderedGVs));
983 unsigned BarId = NumAbsolutes + 1;
985 NumAbsolutes += BarCnt;
989 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
990 recordLDSAbsoluteAddress(&M, GV,
Offset);
997 std::vector<Function *> OrderedKernels;
1001 OrderedKernels.push_back(
F);
1003 OrderedKernels = sortByName(std::move(OrderedKernels));
1016 OrderedGVs.push_back(GV);
1018 OrderedGVs = sortByName(std::move(OrderedGVs));
1022 auto NewGV = uniquifyGVPerKernel(M, GV,
F);
1023 Changed |= (NewGV != GV);
1025 unsigned BarId = Kernel2BarId[
F];
1026 BarId += NumAbsolutes + 1;
1028 Kernel2BarId[
F] += BarCnt;
1029 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
1030 recordLDSAbsoluteAddress(&M, NewGV,
Offset);
1045 bool runOnModule(
Module &M) {
1047 bool Changed = superAlignLDSGlobals(M);
1063 LDSToKernelsThatNeedToAccessItIndirectly[GV].
insert(
F);
1069 Changed |= lowerSpecialLDSVariables(
1070 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly);
1078 partitionVariablesIntoIndirectStrategies(
1079 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
1080 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
1087 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1088 ModuleScopeVariables);
1090 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1091 TableLookupVariables);
1094 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1097 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
1098 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
1101 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
1102 KernelsThatAllocateModuleLDS,
1103 MaybeModuleScopeStruct);
1106 for (
auto &GV : KernelAccessVariables) {
1107 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
1108 assert(funcs.size() == 1);
1109 LDSVariableReplacement Replacement =
1110 KernelToReplacement[*(funcs.begin())];
1115 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](
Use &U) {
1116 return isa<Instruction>(U.getUser());
1121 std::vector<Function *> OrderedKernels =
1122 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
1123 KernelsThatIndirectlyAllocateDynamicLDS);
1125 if (!KernelsThatAllocateTableLDS.
empty()) {
1131 auto TableLookupVariablesOrdered =
1132 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.
begin(),
1133 TableLookupVariables.
end()));
1136 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1137 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1142 lowerDynamicLDSVariables(M, LDSUsesInfo,
1143 KernelsThatIndirectlyAllocateDynamicLDS,
1144 DynamicVariables, OrderedKernels);
1149 for (
auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1150 &KernelsThatAllocateTableLDS})
1159 for (
Function &Func : M.functions()) {
1174 const bool AllocateModuleScopeStruct =
1175 MaybeModuleScopeStruct &&
1176 KernelsThatAllocateModuleLDS.
contains(&Func);
1178 auto Replacement = KernelToReplacement.
find(&Func);
1179 const bool AllocateKernelScopeStruct =
1180 Replacement != KernelToReplacement.
end();
1182 const bool AllocateDynamicVariable =
1183 KernelToCreatedDynamicLDS.
contains(&Func);
1187 if (AllocateModuleScopeStruct) {
1193 if (AllocateKernelScopeStruct) {
1196 recordLDSAbsoluteAddress(&M, KernelStruct,
Offset);
1204 if (AllocateDynamicVariable) {
1205 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1207 recordLDSAbsoluteAddress(&M, DynamicVariable,
Offset);
1222 if (AllocateDynamicVariable)
1225 Func.addFnAttr(
"amdgpu-lds-size", Buffer);
1244 static bool superAlignLDSGlobals(
Module &M) {
1246 bool Changed =
false;
1247 if (!SuperAlignLDSGlobals) {
1251 for (
auto &GV : M.globals()) {
1271 Alignment = std::max(Alignment,
Align(16));
1272 }
else if (GVSize > 4) {
1274 Alignment = std::max(Alignment,
Align(8));
1275 }
else if (GVSize > 2) {
1277 Alignment = std::max(Alignment,
Align(4));
1278 }
else if (GVSize > 1) {
1280 Alignment = std::max(Alignment,
Align(2));
1291 static LDSVariableReplacement createLDSVariableReplacement(
1292 Module &M, std::string VarName,
1309 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1310 LDSVarsToTransform.
begin(), LDSVarsToTransform.
end()));
1322 std::vector<GlobalVariable *> LocalVars;
1325 IsPaddingField.
reserve(LDSVarsToTransform.
size());
1328 for (
auto &
F : LayoutFields) {
1331 Align DataAlign =
F.Alignment;
1334 if (
uint64_t Rem = CurrentOffset % DataAlignV) {
1335 uint64_t Padding = DataAlignV - Rem;
1347 CurrentOffset += Padding;
1350 LocalVars.push_back(FGV);
1352 CurrentOffset +=
F.Size;
1356 std::vector<Type *> LocalVarTypes;
1357 LocalVarTypes.reserve(LocalVars.size());
1359 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1374 for (
size_t I = 0;
I < LocalVars.size();
I++) {
1376 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32,
I)};
1378 if (IsPaddingField[
I]) {
1385 assert(Map.size() == LDSVarsToTransform.
size());
1386 return {SGV, std::move(Map)};
1389 template <
typename PredicateTy>
1390 static void replaceLDSVariablesWithStruct(
1392 const LDSVariableReplacement &Replacement, PredicateTy
Predicate) {
1399 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1400 LDSVarsToTransformArg.
begin(), LDSVarsToTransformArg.
end()));
1406 const size_t NumberVars = LDSVarsToTransform.
size();
1407 if (NumberVars > 1) {
1409 AliasScopes.
reserve(NumberVars);
1411 for (
size_t I = 0;
I < NumberVars;
I++) {
1415 NoAliasList.
append(&AliasScopes[1], AliasScopes.
end());
1420 for (
size_t I = 0;
I < NumberVars;
I++) {
1422 Constant *
GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1426 APInt APOff(
DL.getIndexTypeSizeInBits(
GEP->getType()), 0);
1427 GEP->stripAndAccumulateInBoundsConstantOffsets(
DL, APOff);
1434 NoAliasList[
I - 1] = AliasScopes[
I - 1];
1440 refineUsesAlignmentAndAA(
GEP,
A,
DL, AliasScope, NoAlias);
1446 MDNode *NoAlias,
unsigned MaxDepth = 5) {
1447 if (!MaxDepth || (
A == 1 && !AliasScope))
1452 for (
User *U :
Ptr->users()) {
1453 if (
auto *
I = dyn_cast<Instruction>(U)) {
1454 if (AliasScope &&
I->mayReadOrWriteMemory()) {
1455 MDNode *AS =
I->getMetadata(LLVMContext::MD_alias_scope);
1458 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1460 MDNode *NA =
I->getMetadata(LLVMContext::MD_noalias);
1484 if (Intersection.empty()) {
1489 I->setMetadata(LLVMContext::MD_noalias, NA);
1493 if (
auto *LI = dyn_cast<LoadInst>(U)) {
1494 LI->setAlignment(std::max(
A, LI->getAlign()));
1497 if (
auto *SI = dyn_cast<StoreInst>(U)) {
1498 if (SI->getPointerOperand() ==
Ptr)
1499 SI->setAlignment(std::max(
A, SI->getAlign()));
1502 if (
auto *AI = dyn_cast<AtomicRMWInst>(U)) {
1505 if (AI->getPointerOperand() ==
Ptr)
1506 AI->setAlignment(std::max(
A, AI->getAlign()));
1509 if (
auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
1510 if (AI->getPointerOperand() ==
Ptr)
1511 AI->setAlignment(std::max(
A, AI->getAlign()));
1514 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(U)) {
1515 unsigned BitWidth =
DL.getIndexTypeSizeInBits(
GEP->getType());
1517 if (
GEP->getPointerOperand() ==
Ptr) {
1519 if (
GEP->accumulateConstantOffset(
DL, Off))
1521 refineUsesAlignmentAndAA(
GEP, GA,
DL, AliasScope, NoAlias,
1526 if (
auto *
I = dyn_cast<Instruction>(U)) {
1527 if (
I->getOpcode() == Instruction::BitCast ||
1528 I->getOpcode() == Instruction::AddrSpaceCast)
1529 refineUsesAlignmentAndAA(
I,
A,
DL, AliasScope, NoAlias, MaxDepth - 1);
1535class AMDGPULowerModuleLDSLegacy :
public ModulePass {
1550 auto &TPC = getAnalysis<TargetPassConfig>();
1554 return AMDGPULowerModuleLDS(*TM).runOnModule(M);
1559char AMDGPULowerModuleLDSLegacy::ID = 0;
1564 "Lower uses of LDS variables from non-kernel functions",
1573 return new AMDGPULowerModuleLDSLegacy(TM);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Lower uses of LDS variables from non kernel functions
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
The basic data container for the call graph of a Module of IR.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LinkageTypes getLinkage() const
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
ThreadLocalMode getThreadLocalMode() const
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI void copyAttributesFrom(const GlobalVariable *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
A container for an operand bundle being viewed as a set of values rather than a set of uses.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A simple AA result which uses scoped-noalias metadata to answer queries.
LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains) const
Collect the set of scoped domains relevant to the noalias scopes.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ BARRIER_SCOPE_WORKGROUP
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
bool isKernelLDS(const Function *F)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void sort(IteratorTy Start, IteratorTy End)
char & AMDGPULowerModuleLDSLegacyPassID
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const AMDGPUTargetMachine & TM
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.