75 ToErase.eraseFromParent();
79 if (!
G.isThreadLocal())
82 G.dropDroppableUses();
84 if (!
G.isConstantUsed())
90 OS <<
"Accelerator does not support the thread_local variable "
97 auto U = std::move(Tmp.
back());
100 if (!Visited.
insert(U).second)
103 if (isa<Instruction>(U))
104 I = cast<Instruction>(U);
106 Tmp.
insert(Tmp.
end(), U->user_begin(), U->user_end());
107 }
while (!
I && !Tmp.
empty());
109 assert(
I &&
"thread_local global should have at least one non-constant use.");
111 G.getContext().diagnose(
119 while (!M.functions().empty())
121 while (!M.globals().empty())
123 while (!M.aliases().empty())
125 while (!M.ifuncs().empty())
135 while (!Stack.empty()) {
136 Use &U = Stack.pop_back_val();
137 if (isa<Instruction>(U.getUser()))
138 Uses.emplace_back(U);
140 transform(U.getUser()->uses(), std::back_inserter(Stack),
141 [](
auto &&U) { return std::ref(U); });
154 N->setInitializer(CDS);
155 N->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
156 N->setConstant(
true);
165 Type *PtrTy = PointerType::get(
166 M->getContext(), M->getDataLayout().getDefaultGlobalsAddressSpace());
170 NewG->
setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
183 auto *InitTy = cast<StructType>(IndirectionTable->
getValueType());
184 auto *SymbolListTy = cast<StructType>(InitTy->getStructElementType(2));
185 Type *NameTy = SymbolListTy->getElementType(0);
186 Type *IndirectTy = SymbolListTy->getElementType(1);
201 size_t SymCnt = Indirections.
size();
203 auto *InitTy = cast<StructType>(IndirectionTable->
getValueType());
205 auto *SymbolTy = cast<StructType>(InitTy->getStructElementType(2));
207 Constant *Count = ConstantInt::get(InitTy->getStructElementType(0), SymCnt);
208 M->removeGlobalVariable(IndirectionTable);
210 M->getOrInsertGlobal(
"", ArrayType::get(SymbolTy, SymCnt));
211 Symbols->setLinkage(GlobalValue::LinkageTypes::PrivateLinkage);
212 Symbols->setInitializer(
214 Symbols->setConstant(
true);
219 M->insertGlobalVariable(IndirectionTable);
225 auto *
I = cast<Instruction>(U.getUser());
228 unsigned OpIdx = U.getOperandNo();
234 while (
auto *CE = dyn_cast<ConstantExpr>(
Op)) {
235 assert((CE->getOpcode() == Instruction::GetElementPtr ||
236 CE->getOpcode() == Instruction::AddrSpaceCast ||
237 CE->getOpcode() == Instruction::PtrToInt) &&
238 "Only GEP, ASCAST or PTRTOINT constant uses supported!");
241 I->replaceUsesOfWith(
Op, NewI);
243 Op =
I->getOperand(0);
248 assert(
Op ==
G &&
"Must reach indirected global!");
260 if (!isa<StructType>(Ty)) {
261 OS <<
"The Indirection Table must be a struct type; ";
263 OS <<
" is incorrect.\n";
265 OS <<
"The Indirection Table must have 3 elements; "
266 << cast<StructType>(Ty)->getNumElements() <<
" is incorrect.\n";
267 }
else if (!isa<IntegerType>(cast<StructType>(Ty)->getStructElementType(0))) {
268 OS <<
"The first element in the Indirection Table must be an integer; ";
269 cast<StructType>(Ty)->getStructElementType(0)->print(
OS);
270 OS <<
" is incorrect.\n";
271 }
else if (!isa<PointerType>(cast<StructType>(Ty)->getStructElementType(1))) {
272 OS <<
"The second element in the Indirection Table must be a pointer; ";
273 cast<StructType>(Ty)->getStructElementType(1)->print(
OS);
274 OS <<
" is incorrect.\n";
275 }
else if (!isa<StructType>(cast<StructType>(Ty)->getStructElementType(2))) {
276 OS <<
"The third element in the Indirection Table must be a struct type; ";
277 cast<StructType>(Ty)->getStructElementType(2)->print(
OS);
278 OS <<
" is incorrect.\n";
295 for (
auto &&
G : ToIndirect) {
310 if (SymbolIndirections.
empty())
317 unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
320 for (
auto &&
G : M.globals()) {
323 if (
G.getAddressSpace() != GlobAS)
325 if (
G.isConstant() &&
G.hasInitializer() &&
G.hasAtLeastLocalUnnamedAddr())
331 if (ToIndirect.
empty())
334 if (
auto *
IT = M.getNamedGlobal(
"__hipstdpar_symbol_indirection_table")) {
339 for (
auto &&
G : ToIndirect) {
341 if (!
G->hasInitializer())
351 if (
auto F = dyn_cast<Function>(
C))
359 return !
F.isIntrinsic() && !Reachable.
contains(&
F);
373 const auto Dx =
F->getName().rfind(
"__hipstdpar_unsupported");
378 const auto N =
F->getName().substr(0, Dx);
384 OS <<
"Accelerator does not support the ASM block:\n"
385 << cast<ConstantDataArray>(CB->
getArgOperand(0))->getAsCString();
387 OS <<
"Accelerator does not support the " <<
N <<
" function.";
389 auto Caller = CB->
getParent()->getParent();
391 Caller->getContext().diagnose(
403 for (
auto &&CGN : CGA) {
407 Reachable.
insert(CGN.first);
411 auto F = std::move(Tmp.back());
414 for (
auto &&
N : *CGA[
F]) {
417 if (!
N.second->getFunction())
419 if (Reachable.contains(
N.second->getFunction()))
423 dyn_cast<CallBase>(*
N.first)))
426 Reachable.insert(
N.second->getFunction());
427 Tmp.push_back(
N.second->getFunction());
429 }
while (!std::empty(Tmp));
432 if (std::empty(Reachable))
442static constexpr std::pair<StringLiteral, StringLiteral>
ReplaceMap[]{
443 {
"aligned_alloc",
"__hipstdpar_aligned_alloc"},
444 {
"calloc",
"__hipstdpar_calloc"},
445 {
"free",
"__hipstdpar_free"},
446 {
"malloc",
"__hipstdpar_malloc"},
447 {
"memalign",
"__hipstdpar_aligned_alloc"},
448 {
"mmap",
"__hipstdpar_mmap"},
449 {
"munmap",
"__hipstdpar_munmap"},
450 {
"posix_memalign",
"__hipstdpar_posix_aligned_alloc"},
451 {
"realloc",
"__hipstdpar_realloc"},
452 {
"reallocarray",
"__hipstdpar_realloc_array"},
453 {
"_ZdaPv",
"__hipstdpar_operator_delete"},
454 {
"_ZdaPvm",
"__hipstdpar_operator_delete_sized"},
455 {
"_ZdaPvSt11align_val_t",
"__hipstdpar_operator_delete_aligned"},
456 {
"_ZdaPvmSt11align_val_t",
"__hipstdpar_operator_delete_aligned_sized"},
457 {
"_ZdlPv",
"__hipstdpar_operator_delete"},
458 {
"_ZdlPvm",
"__hipstdpar_operator_delete_sized"},
459 {
"_ZdlPvSt11align_val_t",
"__hipstdpar_operator_delete_aligned"},
460 {
"_ZdlPvmSt11align_val_t",
"__hipstdpar_operator_delete_aligned_sized"},
461 {
"_Znam",
"__hipstdpar_operator_new"},
462 {
"_ZnamRKSt9nothrow_t",
"__hipstdpar_operator_new_nothrow"},
463 {
"_ZnamSt11align_val_t",
"__hipstdpar_operator_new_aligned"},
464 {
"_ZnamSt11align_val_tRKSt9nothrow_t",
465 "__hipstdpar_operator_new_aligned_nothrow"},
467 {
"_Znwm",
"__hipstdpar_operator_new"},
468 {
"_ZnwmRKSt9nothrow_t",
"__hipstdpar_operator_new_nothrow"},
469 {
"_ZnwmSt11align_val_t",
"__hipstdpar_operator_new_aligned"},
470 {
"_ZnwmSt11align_val_tRKSt9nothrow_t",
471 "__hipstdpar_operator_new_aligned_nothrow"},
472 {
"__builtin_calloc",
"__hipstdpar_calloc"},
473 {
"__builtin_free",
"__hipstdpar_free"},
474 {
"__builtin_malloc",
"__hipstdpar_malloc"},
475 {
"__builtin_operator_delete",
"__hipstdpar_operator_delete"},
476 {
"__builtin_operator_new",
"__hipstdpar_operator_new"},
477 {
"__builtin_realloc",
"__hipstdpar_realloc"},
478 {
"__libc_calloc",
"__hipstdpar_calloc"},
479 {
"__libc_free",
"__hipstdpar_free"},
480 {
"__libc_malloc",
"__hipstdpar_malloc"},
481 {
"__libc_memalign",
"__hipstdpar_aligned_alloc"},
482 {
"__libc_realloc",
"__hipstdpar_realloc"}};
484static constexpr std::pair<StringLiteral, StringLiteral>
HiddenMap[]{
487 {
"__hipstdpar_hidden_malloc",
"__libc_malloc"},
488 {
"__hipstdpar_hidden_free",
"__libc_free"},
489 {
"__hipstdpar_hidden_memalign",
"__libc_memalign"},
490 {
"__hipstdpar_hidden_mmap",
"mmap"},
491 {
"__hipstdpar_hidden_munmap",
"munmap"}};
501 auto It = AllocReplacements.
find(
F.getName());
502 if (It == AllocReplacements.
end())
505 if (
auto R = M.getFunction(It->second)) {
506 F.replaceAllUsesWith(R);
511 OS <<
"cannot be interposed, missing: " << AllocReplacements[
F.getName()]
512 <<
". Tried to run the allocation interposition pass without the "
513 <<
"replacement functions available.";
522 if (
auto F = M.getFunction(HR.first)) {
523 auto R = M.getOrInsertFunction(HR.second,
F->getFunctionType(),
525 F->replaceAllUsesWith(R.getCallee());
535 {
"acosh",
"__hipstdpar_acosh_f64"},
536 {
"acoshf",
"__hipstdpar_acosh_f32"},
537 {
"asinh",
"__hipstdpar_asinh_f64"},
538 {
"asinhf",
"__hipstdpar_asinh_f32"},
539 {
"atanh",
"__hipstdpar_atanh_f64"},
540 {
"atanhf",
"__hipstdpar_atanh_f32"},
541 {
"cbrt",
"__hipstdpar_cbrt_f64"},
542 {
"cbrtf",
"__hipstdpar_cbrt_f32"},
543 {
"erf",
"__hipstdpar_erf_f64"},
544 {
"erff",
"__hipstdpar_erf_f32"},
545 {
"erfc",
"__hipstdpar_erfc_f64"},
546 {
"erfcf",
"__hipstdpar_erfc_f32"},
547 {
"fdim",
"__hipstdpar_fdim_f64"},
548 {
"fdimf",
"__hipstdpar_fdim_f32"},
549 {
"expm1",
"__hipstdpar_expm1_f64"},
550 {
"expm1f",
"__hipstdpar_expm1_f32"},
551 {
"hypot",
"__hipstdpar_hypot_f64"},
552 {
"hypotf",
"__hipstdpar_hypot_f32"},
553 {
"ilogb",
"__hipstdpar_ilogb_f64"},
554 {
"ilogbf",
"__hipstdpar_ilogb_f32"},
555 {
"lgamma",
"__hipstdpar_lgamma_f64"},
556 {
"lgammaf",
"__hipstdpar_lgamma_f32"},
557 {
"log1p",
"__hipstdpar_log1p_f64"},
558 {
"log1pf",
"__hipstdpar_log1p_f32"},
559 {
"logb",
"__hipstdpar_logb_f64"},
560 {
"logbf",
"__hipstdpar_logb_f32"},
561 {
"nextafter",
"__hipstdpar_nextafter_f64"},
562 {
"nextafterf",
"__hipstdpar_nextafter_f32"},
563 {
"nexttoward",
"__hipstdpar_nexttoward_f64"},
564 {
"nexttowardf",
"__hipstdpar_nexttoward_f32"},
565 {
"remainder",
"__hipstdpar_remainder_f64"},
566 {
"remainderf",
"__hipstdpar_remainder_f32"},
567 {
"remquo",
"__hipstdpar_remquo_f64"},
568 {
"remquof",
"__hipstdpar_remquo_f32"},
569 {
"scalbln",
"__hipstdpar_scalbln_f64"},
570 {
"scalblnf",
"__hipstdpar_scalbln_f32"},
571 {
"scalbn",
"__hipstdpar_scalbn_f64"},
572 {
"scalbnf",
"__hipstdpar_scalbn_f32"},
573 {
"tgamma",
"__hipstdpar_tgamma_f64"},
574 {
"tgammaf",
"__hipstdpar_tgamma_f32"}};
598 case Intrinsic::acos:
599 case Intrinsic::asin:
600 case Intrinsic::atan:
601 case Intrinsic::atan2:
602 case Intrinsic::cosh:
603 case Intrinsic::modf:
604 case Intrinsic::sinh:
606 case Intrinsic::tanh:
609 if (
F.getReturnType()->isDoubleTy()) {
613 case Intrinsic::exp2:
615 case Intrinsic::log10:
616 case Intrinsic::log2:
632 ToReplace.
back().second.replace(0, Prefix.size(),
"__hipstdpar");
634 for (
auto &&[
F, NewF] : ToReplace)
635 F->replaceAllUsesWith(
636 M.getOrInsertFunction(NewF,
F->getFunctionType()).getCallee());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefAnalysis InstSet & ToRemove
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static constexpr std::pair< StringLiteral, StringLiteral > HiddenMap[]
static SmallVector< std::reference_wrapper< Use > > collectIndirectableUses(GlobalVariable *G)
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
static void maybeHandleGlobals(Module &M)
static void replaceWithIndirectUse(const Use &U, const GlobalVariable *G, Constant *IndirectedG)
static bool isAcceleratorExecutionRoot(const Function *F)
static void eraseFromModule(T &ToErase)
static void removeUnreachableFunctions(const SmallPtrSet< const Function *, N > &Reachable, Module &M)
static constexpr std::pair< StringLiteral, StringLiteral > MathLibToHipStdPar[]
static void fillIndirectionTable(GlobalVariable *IndirectionTable, SmallVector< Constant * > Indirections)
static bool checkIfSupported(GlobalVariable &G)
static void indirectGlobals(GlobalVariable *IndirectionTable, SmallVector< GlobalVariable * > ToIndirect)
static GlobalVariable * getGlobalForName(GlobalVariable *G)
static GlobalVariable * getIndirectionGlobal(Module *M)
static Constant * appendIndirectedGlobal(const GlobalVariable *IndirectionTable, SmallVector< Constant * > &SymbolIndirections, GlobalVariable *ToIndirect)
static void clearModule(Module &M)
static bool isValidIndirectionTable(GlobalVariable *IndirectionTable)
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
Module.h This file contains the declarations for the Module class.
MachineInstr unsigned OpIdx
ModuleAnalysisManager MAM
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Value * getArgOperand(unsigned i) const
An analysis pass to compute the CallGraph for a Module.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Diagnostic information for unsupported feature in backend.
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
Type * getValueType() const
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
void setConstant(bool Val)
void setExternallyInitialized(bool Val)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
const ParentTy * getParent() const
A raw_ostream that writes to an std::string.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.