64#define DEBUG_TYPE "openmp-ir-builder"
71 cl::desc(
"Use optimistic attributes describing "
72 "'as-if' properties of runtime calls."),
76 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
77 cl::desc(
"Factor for the unroll threshold to account for code "
78 "simplifications still taking place"),
89 if (!IP1.isSet() || !IP2.isSet())
91 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
96 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
97 case OMPScheduleType::UnorderedStaticChunked:
98 case OMPScheduleType::UnorderedStatic:
99 case OMPScheduleType::UnorderedDynamicChunked:
100 case OMPScheduleType::UnorderedGuidedChunked:
101 case OMPScheduleType::UnorderedRuntime:
102 case OMPScheduleType::UnorderedAuto:
103 case OMPScheduleType::UnorderedTrapezoidal:
104 case OMPScheduleType::UnorderedGreedy:
105 case OMPScheduleType::UnorderedBalanced:
106 case OMPScheduleType::UnorderedGuidedIterativeChunked:
107 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
108 case OMPScheduleType::UnorderedSteal:
109 case OMPScheduleType::UnorderedStaticBalancedChunked:
110 case OMPScheduleType::UnorderedGuidedSimd:
111 case OMPScheduleType::UnorderedRuntimeSimd:
112 case OMPScheduleType::OrderedStaticChunked:
113 case OMPScheduleType::OrderedStatic:
114 case OMPScheduleType::OrderedDynamicChunked:
115 case OMPScheduleType::OrderedGuidedChunked:
116 case OMPScheduleType::OrderedRuntime:
117 case OMPScheduleType::OrderedAuto:
118 case OMPScheduleType::OrderdTrapezoidal:
119 case OMPScheduleType::NomergeUnorderedStaticChunked:
120 case OMPScheduleType::NomergeUnorderedStatic:
121 case OMPScheduleType::NomergeUnorderedDynamicChunked:
122 case OMPScheduleType::NomergeUnorderedGuidedChunked:
123 case OMPScheduleType::NomergeUnorderedRuntime:
124 case OMPScheduleType::NomergeUnorderedAuto:
125 case OMPScheduleType::NomergeUnorderedTrapezoidal:
126 case OMPScheduleType::NomergeUnorderedGreedy:
127 case OMPScheduleType::NomergeUnorderedBalanced:
128 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
129 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
130 case OMPScheduleType::NomergeUnorderedSteal:
131 case OMPScheduleType::NomergeOrderedStaticChunked:
132 case OMPScheduleType::NomergeOrderedStatic:
133 case OMPScheduleType::NomergeOrderedDynamicChunked:
134 case OMPScheduleType::NomergeOrderedGuidedChunked:
135 case OMPScheduleType::NomergeOrderedRuntime:
136 case OMPScheduleType::NomergeOrderedAuto:
137 case OMPScheduleType::NomergeOrderedTrapezoidal:
145 SchedType & OMPScheduleType::MonotonicityMask;
146 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
169 if (Features.
count(
"+wavefrontsize64"))
170 return omp::getAMDGPUGridValues<64>();
171 return omp::getAMDGPUGridValues<32>();
184 bool HasSimdModifier) {
186 switch (ClauseKind) {
187 case OMP_SCHEDULE_Default:
188 case OMP_SCHEDULE_Static:
189 return HasChunks ? OMPScheduleType::BaseStaticChunked
190 : OMPScheduleType::BaseStatic;
191 case OMP_SCHEDULE_Dynamic:
192 return OMPScheduleType::BaseDynamicChunked;
193 case OMP_SCHEDULE_Guided:
194 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
195 : OMPScheduleType::BaseGuidedChunked;
196 case OMP_SCHEDULE_Auto:
198 case OMP_SCHEDULE_Runtime:
199 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
200 : OMPScheduleType::BaseRuntime;
208 bool HasOrderedClause) {
209 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
210 OMPScheduleType::None &&
211 "Must not have ordering nor monotonicity flags already set");
214 ? OMPScheduleType::ModifierOrdered
215 : OMPScheduleType::ModifierUnordered;
216 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
219 if (OrderingScheduleType ==
220 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
221 return OMPScheduleType::OrderedGuidedChunked;
222 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
223 OMPScheduleType::ModifierOrdered))
224 return OMPScheduleType::OrderedRuntime;
226 return OrderingScheduleType;
232 bool HasSimdModifier,
bool HasMonotonic,
233 bool HasNonmonotonic,
bool HasOrderedClause) {
234 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
235 OMPScheduleType::None &&
236 "Must not have monotonicity flags already set");
237 assert((!HasMonotonic || !HasNonmonotonic) &&
238 "Monotonic and Nonmonotonic are contradicting each other");
241 return ScheduleType | OMPScheduleType::ModifierMonotonic;
242 }
else if (HasNonmonotonic) {
243 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
253 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
254 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
260 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
268 bool HasSimdModifier,
bool HasMonotonicModifier,
269 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
275 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
276 HasNonmonotonicModifier, HasOrderedClause);
290 auto *Br = cast<BranchInst>(Term);
291 assert(!Br->isConditional() &&
292 "BB's terminator must be an unconditional branch (or degenerate)");
295 Br->setSuccessor(0,
Target);
300 NewBr->setDebugLoc(
DL);
305 assert(New->getFirstInsertionPt() == New->begin() &&
306 "Target BB must not have PHI nodes");
322 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
326 NewBr->setDebugLoc(
DL);
352 New->replaceSuccessorsPhiUsesWith(Old, New);
397 const Twine &
Name =
"",
bool AsPtr =
true) {
405 FakeVal = FakeValAddr;
433enum OpenMPOffloadingRequiresDirFlags {
435 OMP_REQ_UNDEFINED = 0x000,
437 OMP_REQ_NONE = 0x001,
439 OMP_REQ_REVERSE_OFFLOAD = 0x002,
441 OMP_REQ_UNIFIED_ADDRESS = 0x004,
443 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
445 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
456 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
457 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
458 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
459 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
460 RequiresFlags(OMP_REQ_UNDEFINED) {
461 if (HasRequiresReverseOffload)
462 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
463 if (HasRequiresUnifiedAddress)
464 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
465 if (HasRequiresUnifiedSharedMemory)
466 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
467 if (HasRequiresDynamicAllocators)
468 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
489 :
static_cast<int64_t
>(OMP_REQ_NONE);
494 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
496 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
501 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
503 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
508 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
510 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
515 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
517 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
530 constexpr const size_t MaxDim = 3;
538 Value *NumThreads3D =
541 seq<unsigned>(1, std::min(KernelArgs.
NumTeams.size(), MaxDim)))
545 seq<unsigned>(1, std::min(KernelArgs.
NumThreads.size(), MaxDim)))
569 auto FnAttrs = Attrs.getFnAttrs();
570 auto RetAttrs = Attrs.getRetAttrs();
572 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
577 bool Param =
true) ->
void {
578 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
579 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
580 if (HasSignExt || HasZeroExt) {
581 assert(AS.getNumAttributes() == 1 &&
582 "Currently not handling extension attr combined with others.");
584 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
587 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
594#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
595#include "llvm/Frontend/OpenMP/OMPKinds.def"
599#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
601 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
602 addAttrSet(RetAttrs, RetAttrSet, false); \
603 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
604 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
605 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
607#include "llvm/Frontend/OpenMP/OMPKinds.def"
621#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
623 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
625 Fn = M.getFunction(Str); \
627#include "llvm/Frontend/OpenMP/OMPKinds.def"
633#define OMP_RTL(Enum, Str, ...) \
635 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
637#include "llvm/Frontend/OpenMP/OMPKinds.def"
641 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
651 LLVMContext::MD_callback,
653 2, {-1, -1},
true)}));
666 assert(Fn &&
"Failed to create OpenMP runtime function");
673 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
674 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
689 for (
auto Inst =
Block->getReverseIterator()->begin();
690 Inst !=
Block->getReverseIterator()->end();) {
691 if (
auto *
AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
715 ParallelRegionBlockSet.
clear();
717 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
736 ".omp_par", ArgsInZeroAddressSpace);
740 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
742 "Expected OpenMP outlining to be possible!");
744 for (
auto *V : OI.ExcludeArgsFromAggregate)
751 if (TargetCpuAttr.isStringAttribute())
754 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
755 if (TargetFeaturesAttr.isStringAttribute())
756 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
759 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
761 "OpenMP outlined functions should not return a value!");
773 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
780 "Expected instructions to add in the outlined region entry");
787 if (
I.isTerminator()) {
789 if (OI.EntryBB->getTerminator())
790 OI.EntryBB->getTerminator()->adoptDbgRecords(
791 &ArtificialEntry,
I.getIterator(),
false);
795 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
798 OI.EntryBB->moveBefore(&ArtificialEntry);
805 if (OI.PostOutlineCB)
806 OI.PostOutlineCB(*OutlinedFn);
837 errs() <<
"Error of kind: " << Kind
838 <<
" when emitting offload entries and metadata during "
839 "OMPIRBuilder finalization \n";
846 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
848 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
878 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
882 if (UsedArray.
empty())
889 GV->setSection(
"llvm.metadata");
898 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
906 unsigned Reserve2Flags) {
908 LocFlags |= OMP_IDENT_FLAG_KMPC;
916 ConstantInt::get(
Int32, Reserve2Flags),
917 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
924 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
925 if (
GV.getInitializer() == Initializer)
930 M, OpenMPIRBuilder::Ident,
945 SrcLocStrSize = LocStr.
size();
954 if (
GV.isConstant() &&
GV.hasInitializer() &&
955 GV.getInitializer() == Initializer)
966 unsigned Line,
unsigned Column,
972 Buffer.
append(FunctionName);
974 Buffer.
append(std::to_string(Line));
976 Buffer.
append(std::to_string(Column));
984 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
995 if (
DIFile *DIF = DIL->getFile())
996 if (std::optional<StringRef> Source = DIF->getSource())
1002 DIL->getColumn(), SrcLocStrSize);
1014 "omp_global_thread_num");
1019 bool ForceSimpleCall,
bool CheckCancelFlag) {
1029 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1032 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1035 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1038 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1041 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1054 bool UseCancelBarrier =
1059 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1060 : OMPRTL___kmpc_barrier),
1063 if (UseCancelBarrier && CheckCancelFlag)
1073 omp::Directive CanceledDirective) {
1085 Value *CancelKind =
nullptr;
1086 switch (CanceledDirective) {
1087#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1088 case DirectiveEnum: \
1089 CancelKind = Builder.getInt32(Value); \
1091#include "llvm/Frontend/OpenMP/OMPKinds.def"
1103 if (CanceledDirective == OMPD_parallel) {
1107 omp::Directive::OMPD_unknown,
1121 UI->eraseFromParent();
1128 omp::Directive CanceledDirective) {
1136 Value *CancelKind =
nullptr;
1137 switch (CanceledDirective) {
1138#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1139 case DirectiveEnum: \
1140 CancelKind = Builder.getInt32(Value); \
1142#include "llvm/Frontend/OpenMP/OMPKinds.def"
1154 if (CanceledDirective == OMPD_parallel) {
1158 omp::Directive::OMPD_unknown,
1172 UI->eraseFromParent();
1185 auto *KernelArgsPtr =
1198 NumThreads, HostPtr, KernelArgsPtr};
1225 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1229 Value *Return =
nullptr;
1249 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1250 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1266 emitBlock(OffloadContBlock, CurFn,
true);
1271 Value *CancelFlag, omp::Directive CanceledDirective,
1274 "Unexpected cancellation!");
1327 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1330 "Expected at least tid and bounded tid as arguments");
1331 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1334 assert(CI &&
"Expected call instruction to outlined function");
1335 CI->
getParent()->setName(
"omp_parallel");
1338 Type *PtrTy = OMPIRBuilder->VoidPtr;
1342 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.
saveIP();
1346 Value *Args = ArgsAlloca;
1354 for (
unsigned Idx = 0;
Idx < NumCapturedVars;
Idx++) {
1366 Value *Parallel51CallArgs[] = {
1370 NumThreads ? NumThreads : Builder.
getInt32(-1),
1375 Builder.
getInt64(NumCapturedVars)};
1380 Builder.
CreateCall(RTLFn, Parallel51CallArgs);
1395 I->eraseFromParent();
1417 if (
auto *
F = dyn_cast<Function>(RTLFn.
getCallee())) {
1418 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1426 F->addMetadata(LLVMContext::MD_callback,
1435 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1438 "Expected at least tid and bounded tid as arguments");
1439 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1442 CI->
getParent()->setName(
"omp_parallel");
1446 Value *ForkCallArgs[] = {Ident, Builder.
getInt32(NumCapturedVars),
1450 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1459 auto PtrTy = OMPIRBuilder->VoidPtr;
1460 if (IfCondition && NumCapturedVars == 0) {
1480 I->eraseFromParent();
1488 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1515 if (ProcBind != OMP_PROC_BIND_default) {
1519 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1547 TIDAddrAlloca, PointerType ::get(
M.
getContext(), 0),
"tid.addr.ascast");
1552 "zero.addr.ascast");
1576 if (IP.getBlock()->end() == IP.getPoint()) {
1582 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1583 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1584 "Unexpected insertion point for finalization call!");
1620 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1623 assert(BodyGenCB &&
"Expected body generation callback!");
1625 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1628 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1634 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1636 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1637 ThreadID, ToBeDeletedVec);
1642 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1644 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1665 ".omp_par", ArgsInZeroAddressSpace);
1670 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1676 if (
auto *
GV = dyn_cast_if_present<GlobalVariable>(
I))
1677 return GV->getValueType() == OpenMPIRBuilder::Ident;
1682 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1688 if (&V == TIDAddr || &V == ZeroAddr) {
1694 for (
Use &U : V.uses())
1695 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1696 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1706 if (!V.getType()->isPointerTy()) {
1725 Value *ReplacementValue =
nullptr;
1726 CallInst *CI = dyn_cast<CallInst>(&V);
1728 ReplacementValue = PrivTID;
1731 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue);
1739 assert(ReplacementValue &&
1740 "Expected copy/create callback to set replacement value!");
1741 if (ReplacementValue == &V)
1746 UPtr->set(ReplacementValue);
1765 for (
Value *Input : Inputs) {
1767 if (
Error Err = PrivHelper(*Input))
1771 for (
Value *Output : Outputs)
1775 "OpenMP outlining should not produce live-out values!");
1777 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1780 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1788 assert(FiniInfo.DK == OMPD_parallel &&
1789 "Unexpected finalization stack state!");
1794 if (
Error Err = FiniCB(PreFiniIP))
1800 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1801 UI->eraseFromParent();
1867 if (Dependencies.
empty())
1887 Type *DependInfo = OMPBuilder.DependInfo;
1890 Value *DepArray =
nullptr;
1896 DepArray = Builder.
CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1900 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1906 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1911 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1913 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1918 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1921 static_cast<unsigned int>(Dep.DepKind)),
1963 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1974 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1976 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1977 Mergeable, Priority, EventHandle, TaskAllocaBB,
1978 ToBeDeleted](
Function &OutlinedFn)
mutable {
1980 assert(OutlinedFn.hasOneUse() &&
1981 "there must be a single user for the outlined function");
1982 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1986 bool HasShareds = StaleCI->
arg_size() > 1;
2034 assert(ArgStructAlloca &&
2035 "Unable to find the alloca instruction corresponding to arguments "
2036 "for extracted function");
2039 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2040 "arguments for extracted function");
2048 TaskAllocFn, {Ident, ThreadID, Flags,
2049 TaskSize, SharedsSize,
2057 OMPRTL___kmpc_task_allow_completion_event);
2085 Constant *Zero = ConstantInt::get(Int32Ty, 0);
2094 TaskStructType, TaskGEP, {Zero, ConstantInt::get(Int32Ty, 4)});
2098 PriorityData, {Zero, Zero});
2127 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2133 if (Dependencies.
size()) {
2157 if (Dependencies.
size()) {
2178 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2182 I->eraseFromParent();
2234 if (IP.getBlock()->end() != IP.getPoint())
2242 CancellationBranches.
push_back(DummyBranch);
2271 unsigned CaseNumber = 0;
2272 for (
auto SectionCB : SectionCBs) {
2290 Value *LB = ConstantInt::get(I32Ty, 0);
2291 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2292 Value *ST = ConstantInt::get(I32Ty, 1);
2294 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2299 applyStaticWorkshareLoop(Loc.
DL, *
LoopInfo, AllocaIP,
2300 WorksharingLoopType::ForStaticLoop, !IsNowait);
2306 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2310 assert(FiniInfo.DK == OMPD_sections &&
2311 "Unexpected finalization stack state!");
2318 AfterIP = {FiniBB, FiniBB->
begin()};
2322 for (
BranchInst *DummyBranch : CancellationBranches) {
2323 assert(DummyBranch->getNumSuccessors() == 1);
2324 DummyBranch->setSuccessor(0, LoopFini);
2338 if (IP.getBlock()->end() != IP.getPoint())
2357 Directive OMPD = Directive::OMPD_sections;
2360 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2371Value *OpenMPIRBuilder::getGPUThreadID() {
2374 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2378Value *OpenMPIRBuilder::getGPUWarpSize() {
2383Value *OpenMPIRBuilder::getNVPTXWarpID() {
2388Value *OpenMPIRBuilder::getNVPTXLaneID() {
2390 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2391 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2396Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *
From,
2401 assert(FromSize > 0 &&
"From size must be greater than zero");
2402 assert(ToSize > 0 &&
"To size must be greater than zero");
2403 if (FromType == ToType)
2405 if (FromSize == ToSize)
2420Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2425 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2429 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2433 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2434 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2435 Value *WarpSizeCast =
2437 Value *ShuffleCall =
2439 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2442void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2458 Value *ElemPtr = DstAddr;
2460 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2472 if ((
Size / IntSize) > 1) {
2496 Value *Res = createRuntimeShuffleFunction(
2505 Value *LocalElemPtr =
2512 Value *Res = createRuntimeShuffleFunction(
2526void OpenMPIRBuilder::emitReductionListCopy(
2527 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2529 CopyOptionsTy CopyOptions) {
2532 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2536 for (
auto En :
enumerate(ReductionInfos)) {
2537 const ReductionInfo &RI = En.value();
2538 Value *SrcElementAddr =
nullptr;
2539 Value *DestElementAddr =
nullptr;
2540 Value *DestElementPtrAddr =
nullptr;
2542 bool ShuffleInElement =
false;
2545 bool UpdateDestListPtr =
false;
2549 ReductionArrayTy, SrcBase,
2550 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2556 ReductionArrayTy, DestBase,
2557 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2563 ".omp.reduction.element");
2566 DestElementAddr = DestAlloca;
2569 DestElementAddr->
getName() +
".ascast");
2571 ShuffleInElement =
true;
2572 UpdateDestListPtr =
true;
2584 if (ShuffleInElement) {
2585 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2586 RemoteLaneOffset, ReductionArrayTy);
2588 switch (RI.EvaluationKind) {
2597 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2599 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2601 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2603 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2606 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2608 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2630 if (UpdateDestListPtr) {
2633 DestElementAddr->
getName() +
".ascast");
2649 "_omp_reduction_inter_warp_copy_func", &
M);
2672 "__openmp_nvptx_data_transfer_temporary_storage";
2676 if (!TransferMedium) {
2685 Value *GPUThreadID = getGPUThreadID();
2687 Value *LaneID = getNVPTXLaneID();
2689 Value *WarpID = getNVPTXWarpID();
2698 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2702 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2705 NumWarpsAlloca->
getName() +
".ascast");
2716 for (
auto En :
enumerate(ReductionInfos)) {
2721 const ReductionInfo &RI = En.value();
2723 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2726 unsigned NumIters = RealTySize / TySize;
2729 Value *Cnt =
nullptr;
2730 Value *CntAddr =
nullptr;
2740 CntAddr->
getName() +
".ascast");
2760 omp::Directive::OMPD_unknown,
2764 return BarrierIP1.takeError();
2775 auto *RedListArrayTy =
2781 {ConstantInt::get(IndexTy, 0),
2782 ConstantInt::get(IndexTy, En.index())});
2808 omp::Directive::OMPD_unknown,
2812 return BarrierIP2.takeError();
2819 Value *NumWarpsVal =
2822 Value *IsActiveThread =
2833 Value *TargetElemPtrPtr =
2835 {ConstantInt::get(IndexTy, 0),
2836 ConstantInt::get(IndexTy, En.index())});
2837 Value *TargetElemPtrVal =
2839 Value *TargetElemPtr = TargetElemPtrVal;
2845 Value *SrcMediumValue =
2864 RealTySize %= TySize;
2874Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2880 {Builder.getPtrTy(), Builder.getInt16Ty(),
2881 Builder.getInt16Ty(), Builder.getInt16Ty()},
2885 "_omp_reduction_shuffle_and_reduce_func", &
M);
2906 Type *ReduceListArgType = ReduceListArg->
getType();
2910 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2912 LaneIDArg->
getName() +
".addr");
2914 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2916 AlgoVerArg->
getName() +
".addr");
2923 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2926 ReduceListAlloca, ReduceListArgType,
2927 ReduceListAlloca->
getName() +
".ascast");
2929 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2931 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2932 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2934 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2937 RemoteReductionListAlloca->
getName() +
".ascast");
2946 Value *RemoteLaneOffset =
2955 emitReductionListCopy(
2957 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2988 Value *RemoteOffsetComp =
3005 ->addFnAttr(Attribute::NoUnwind);
3026 ReductionInfos, RemoteListAddrCast, ReduceList);
3039Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3046 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3050 "_omp_reduction_list_to_global_copy_func", &
M);
3067 BufferArg->
getName() +
".addr");
3074 BufferArgAlloca->
getName() +
".ascast");
3079 ReduceListArgAlloca->
getName() +
".ascast");
3085 Value *LocalReduceList =
3087 Value *BufferArgVal =
3092 for (
auto En :
enumerate(ReductionInfos)) {
3093 const ReductionInfo &RI = En.value();
3094 auto *RedListArrayTy =
3098 RedListArrayTy, LocalReduceList,
3099 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3107 ReductionsBufferTy, BufferVD, 0, En.index());
3109 switch (RI.EvaluationKind) {
3117 RI.ElementType, ElemPtr, 0, 0,
".realp");
3119 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3121 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3123 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3126 RI.ElementType, GlobVal, 0, 0,
".realp");
3128 RI.ElementType, GlobVal, 0, 1,
".imagp");
3149Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3156 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3160 "_omp_reduction_list_to_global_reduce_func", &
M);
3177 BufferArg->
getName() +
".addr");
3182 auto *RedListArrayTy =
3187 Value *LocalReduceList =
3192 BufferArgAlloca->
getName() +
".ascast");
3197 ReduceListArgAlloca->
getName() +
".ascast");
3200 LocalReduceList->
getName() +
".ascast");
3210 for (
auto En :
enumerate(ReductionInfos)) {
3212 RedListArrayTy, LocalReduceListAddrCast,
3213 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3218 ReductionsBufferTy, BufferVD, 0, En.index());
3226 ->addFnAttr(Attribute::NoUnwind);
3232Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3239 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3243 "_omp_reduction_global_to_list_copy_func", &
M);
3260 BufferArg->
getName() +
".addr");
3267 BufferArgAlloca->
getName() +
".ascast");
3272 ReduceListArgAlloca->
getName() +
".ascast");
3277 Value *LocalReduceList =
3283 for (
auto En :
enumerate(ReductionInfos)) {
3285 auto *RedListArrayTy =
3289 RedListArrayTy, LocalReduceList,
3290 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3297 ReductionsBufferTy, BufferVD, 0, En.index());
3340Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3347 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3351 "_omp_reduction_global_to_list_reduce_func", &
M);
3368 BufferArg->
getName() +
".addr");
3378 Value *LocalReduceList =
3383 BufferArgAlloca->
getName() +
".ascast");
3388 ReduceListArgAlloca->
getName() +
".ascast");
3391 LocalReduceList->
getName() +
".ascast");
3401 for (
auto En :
enumerate(ReductionInfos)) {
3403 RedListArrayTy, ReductionList,
3404 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3409 ReductionsBufferTy, BufferVD, 0, En.index());
3417 ->addFnAttr(Attribute::NoUnwind);
3423std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3424 std::string Suffix =
3426 return (
Name + Suffix).str();
3431 ReductionGenCBKind ReductionGenCBKind,
AttributeList FuncAttrs) {
3433 {Builder.getPtrTy(), Builder.getPtrTy()},
3435 std::string
Name = getReductionFuncName(ReducerName);
3447 Value *LHSArrayPtr =
nullptr;
3448 Value *RHSArrayPtr =
nullptr;
3459 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3461 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3471 for (
auto En :
enumerate(ReductionInfos)) {
3472 const ReductionInfo &RI = En.value();
3474 RedArrayTy, RHSArrayPtr,
3475 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3478 RHSI8Ptr, RI.PrivateVariable->getType(),
3479 RHSI8Ptr->
getName() +
".ascast");
3482 RedArrayTy, LHSArrayPtr,
3483 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3486 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3498 return AfterIP.takeError();
3500 return ReductionFunc;
3506 for (
auto En :
enumerate(ReductionInfos)) {
3507 unsigned Index = En.index();
3508 const ReductionInfo &RI = En.value();
3509 Value *LHSFixupPtr, *RHSFixupPtr;
3511 Builder.
saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3516 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3517 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3521 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3522 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3528 return ReductionFunc;
3536 assert(RI.Variable &&
"expected non-null variable");
3537 assert(RI.PrivateVariable &&
"expected non-null private variable");
3538 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3539 "expected non-null reduction generator callback");
3542 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3543 "expected variables and their private equivalents to have the same "
3546 assert(RI.Variable->getType()->isPointerTy() &&
3547 "expected variables to be pointers");
3555 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3556 Value *SrcLocInfo) {
3570 if (ReductionInfos.
size() == 0)
3595 if (!ReductionResult)
3597 Function *ReductionFunc = *ReductionResult;
3601 if (GridValue.has_value())
3619 Value *ReductionListAlloca =
3622 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3626 for (
auto En :
enumerate(ReductionInfos)) {
3629 RedArrayTy, ReductionList,
3630 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3637 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3639 emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
3647 unsigned MaxDataSize = 0;
3649 for (
auto En :
enumerate(ReductionInfos)) {
3651 if (
Size > MaxDataSize)
3653 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3655 Value *ReductionDataSize =
3657 if (!IsTeamsReduction) {
3658 Value *SarFuncCast =
3662 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3665 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3670 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3672 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3673 Function *LtGCFunc = emitListToGlobalCopyFunction(
3674 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3675 Function *LtGRFunc = emitListToGlobalReduceFunction(
3676 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3677 Function *GtLCFunc = emitGlobalToListCopyFunction(
3678 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3679 Function *GtLRFunc = emitGlobalToListReduceFunction(
3680 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3684 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3686 Value *Args3[] = {SrcLocInfo,
3687 KernelTeamsReductionPtr,
3699 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3716 for (
auto En :
enumerate(ReductionInfos)) {
3723 Value *LHSPtr, *RHSPtr;
3725 &LHSPtr, &RHSPtr, CurFunc));
3730 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3734 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3749 if (ContinuationBlock) {
3764 ".omp.reduction.func", &M);
3775 Value *LHSArrayPtr =
nullptr;
3776 Value *RHSArrayPtr =
nullptr;
3790 Value *LHSAddrCast =
3792 Value *RHSAddrCast =
3796 LHSArrayPtr = Builder.
CreateLoad(Arg0Type, LHSAddrCast);
3797 RHSArrayPtr = Builder.
CreateLoad(Arg1Type, RHSAddrCast);
3799 LHSArrayPtr = ReductionFunc->
getArg(0);
3800 RHSArrayPtr = ReductionFunc->
getArg(1);
3803 unsigned NumReductions = ReductionInfos.
size();
3806 for (
auto En :
enumerate(ReductionInfos)) {
3809 RedArrayTy, LHSArrayPtr, 0, En.index());
3815 RedArrayTy, RHSArrayPtr, 0, En.index());
3832 if (!IsByRef[En.index()])
3842 bool IsNoWait,
bool IsTeamsReduction) {
3846 IsNoWait, IsTeamsReduction);
3853 if (ReductionInfos.
size() == 0)
3863 unsigned NumReductions = ReductionInfos.
size();
3870 for (
auto En :
enumerate(ReductionInfos)) {
3871 unsigned Index = En.index();
3874 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3891 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3896 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3897 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3899 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3901 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3902 : RuntimeFunction::OMPRTL___kmpc_reduce);
3905 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3906 ReductionFunc, Lock},
3925 for (
auto En :
enumerate(ReductionInfos)) {
3931 if (!IsByRef[En.index()]) {
3933 "red.value." +
Twine(En.index()));
3935 Value *PrivateRedValue =
3937 "red.private.value." +
Twine(En.index()));
3948 if (!IsByRef[En.index()])
3952 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3953 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3961 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3998 Directive OMPD = Directive::OMPD_master;
4003 Value *Args[] = {Ident, ThreadId};
4011 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4022 Directive OMPD = Directive::OMPD_masked;
4028 Value *ArgsEnd[] = {Ident, ThreadId};
4036 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4046 Call->setDoesNotThrow();
4061 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4063 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4064 ScanVarsType, ScanRedInfo);
4075 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4078 Type *DestTy = ScanVarsType[i];
4090 IV = ScanRedInfo->
IV;
4093 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4096 Type *DestTy = ScanVarsType[i];
4119Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4125 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4137 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4142 AllocSpan,
nullptr,
"arr");
4170Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4175 for (ReductionInfo RedInfo : ReductionInfos) {
4176 Value *PrivateVar = RedInfo.PrivateVariable;
4177 Value *OrigVar = RedInfo.Variable;
4181 Type *SrcTy = RedInfo.ElementType;
4247 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
4273 Value *ReductionVal = RedInfo.PrivateVariable;
4276 Type *DestTy = RedInfo.ElementType;
4299 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4326 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4333Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4345 Error Err = InputLoopGen();
4363void OpenMPIRBuilder::createScanBBs(
ScanInfo *ScanRedInfo) {
4405 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4418 "omp_" +
Name +
".next",
true);
4429 CL->Header = Header;
4448 NextBB, NextBB,
Name);
4480 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4489 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop,
Name);
4490 ScanRedInfo->
Span = TripCount;
4496 ScanRedInfo->
IV =
IV;
4497 createScanBBs(ScanRedInfo);
4500 assert(Terminator->getNumSuccessors() == 1);
4501 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
4514 const auto &&InputLoopGen = [&]() ->
Error {
4516 Builder.
saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4517 ComputeIP,
Name,
true, ScanRedInfo);
4527 InclusiveStop, ComputeIP,
Name,
true, ScanRedInfo);
4535 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4543 bool IsSigned,
bool InclusiveStop,
const Twine &
Name) {
4552 auto *IndVarTy = cast<IntegerType>(Start->getType());
4553 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4554 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4558 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
4586 Value *CountIfLooping;
4587 if (InclusiveStop) {
4598 "omp_" +
Name +
".tripcount");
4603 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4610 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop,
Name);
4617 ScanRedInfo->
IV = IndVar;
4638 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4641 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4654 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4657 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4664 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4666 "Require dedicated allocate IP");
4678 Type *IVTy =
IV->getType();
4680 LoopType == WorksharingLoopType::DistributeForStaticLoop
4702 Constant *One = ConstantInt::get(IVTy, 1);
4711 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4712 ? OMPScheduleType::OrderedDistribute
4713 : OMPScheduleType::UnorderedStatic;
4715 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4720 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4721 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4722 Value *PDistUpperBound =
4724 Args.push_back(PDistUpperBound);
4732 CLI->setTripCount(TripCount);
4754 omp::Directive::OMPD_for,
false,
4757 return BarrierIP.takeError();
4767OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4769 InsertPointTy AllocaIP,
4772 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4773 assert(ChunkSize &&
"Chunk size is required");
4778 Type *IVTy =
IV->getType();
4780 "Max supported tripcount bitwidth is 64 bits");
4782 :
Type::getInt64Ty(Ctx);
4785 Constant *One = ConstantInt::get(InternalIVTy, 1);
4797 Value *PLowerBound =
4799 Value *PUpperBound =
4809 Value *CastedChunkSize =
4811 Value *CastedTripCount =
4814 Constant *SchedulingType = ConstantInt::get(
4815 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4829 SchedulingType, PLastIter,
4830 PLowerBound, PUpperBound,
4835 Value *FirstChunkStart =
4837 Value *FirstChunkStop =
4842 Value *NextChunkStride =
4847 Value *DispatchCounter;
4855 DispatchCounter = Counter;
4858 FirstChunkStart, CastedTripCount, NextChunkStride,
4882 Value *IsLastChunk =
4884 Value *CountUntilOrigTripCount =
4887 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4888 Value *BackcastedChunkTC =
4890 CLI->setTripCount(BackcastedChunkTC);
4895 Value *BackcastedDispatchCounter =
4912 return AfterIP.takeError();
4933 case WorksharingLoopType::ForStaticLoop:
4936 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4939 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4941 case WorksharingLoopType::DistributeStaticLoop:
4944 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4947 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4949 case WorksharingLoopType::DistributeForStaticLoop:
4952 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4955 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4958 if (Bitwidth != 32 && Bitwidth != 64) {
4981 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4982 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4984 Builder.
restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4989 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4990 Builder.
restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4995 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4996 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4997 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5033 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5041 "Expected unique undroppable user of outlined function");
5042 CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
5043 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5045 "Expected outlined function call to be located in loop preheader");
5047 if (OutlinedFnCallInstruction->
arg_size() > 1)
5054 LoopBodyArg, TripCount, OutlinedFn);
5056 for (
auto &ToBeDeletedItem : ToBeDeleted)
5057 ToBeDeletedItem->eraseFromParent();
5063 InsertPointTy AllocaIP,
5076 OI.OuterAllocaBB = AllocaIP.getBlock();
5081 "omp.prelatch",
true);
5101 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
5121 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5130 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5131 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
5137 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5144 OI.PostOutlineCB = [=, ToBeDeletedVec =
5145 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5155 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5156 bool HasSimdModifier,
bool HasMonotonicModifier,
5157 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5160 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
5162 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5163 HasNonmonotonicModifier, HasOrderedClause);
5165 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5166 OMPScheduleType::ModifierOrdered;
5167 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5168 case OMPScheduleType::BaseStatic:
5169 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5171 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5172 NeedsBarrier, ChunkSize);
5174 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5176 case OMPScheduleType::BaseStaticChunked:
5178 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5179 NeedsBarrier, ChunkSize);
5181 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5184 case OMPScheduleType::BaseRuntime:
5185 case OMPScheduleType::BaseAuto:
5186 case OMPScheduleType::BaseGreedy:
5187 case OMPScheduleType::BaseBalanced:
5188 case OMPScheduleType::BaseSteal:
5189 case OMPScheduleType::BaseGuidedSimd:
5190 case OMPScheduleType::BaseRuntimeSimd:
5192 "schedule type does not support user-defined chunk sizes");
5194 case OMPScheduleType::BaseDynamicChunked:
5195 case OMPScheduleType::BaseGuidedChunked:
5196 case OMPScheduleType::BaseGuidedIterativeChunked:
5197 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5198 case OMPScheduleType::BaseStaticBalancedChunked:
5199 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5200 NeedsBarrier, ChunkSize);
5216 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5219 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5232 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5235 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5247 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5250 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5256 InsertPointTy AllocaIP,
5258 bool NeedsBarrier,
Value *Chunk) {
5259 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5261 "Require dedicated allocate IP");
5263 "Require valid schedule type");
5265 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5266 OMPScheduleType::ModifierOrdered;
5277 Type *IVTy =
IV->getType();
5296 Constant *One = ConstantInt::get(IVTy, 1);
5317 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5321 {SrcLoc, ThreadNum, SchedulingType, One,
5322 UpperBound, One, Chunk});
5332 PLowerBound, PUpperBound, PStride});
5333 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5342 auto *PI = cast<PHINode>(Phi);
5343 PI->setIncomingBlock(0, OuterCond);
5344 PI->setIncomingValue(0, LowerBound);
5348 auto *Br = cast<BranchInst>(Term);
5349 Br->setSuccessor(0, OuterCond);
5357 auto *CI = cast<CmpInst>(Comp);
5358 CI->setOperand(1, UpperBound);
5361 auto *BI = cast<BranchInst>(Branch);
5362 assert(BI->getSuccessor(1) == Exit);
5363 BI->setSuccessor(1, OuterCond);
5377 omp::Directive::OMPD_for,
false,
5380 return BarrierIP.takeError();
5399 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5400 for (
Use &U : BB->uses()) {
5401 auto *UseInst = dyn_cast<Instruction>(U.getUser());
5404 if (BBsToErase.
count(UseInst->getParent()))
5411 while (BBsToErase.
remove_if(HasRemainingUses)) {
5422 assert(
Loops.size() >= 1 &&
"At least one loop required");
5423 size_t NumLoops =
Loops.size();
5427 return Loops.front();
5439 Loop->collectControlBlocks(OldControlBBs);
5443 if (ComputeIP.
isSet())
5450 Value *CollapsedTripCount =
nullptr;
5453 "All loops to collapse must be valid canonical loops");
5454 Value *OrigTripCount = L->getTripCount();
5455 if (!CollapsedTripCount) {
5456 CollapsedTripCount = OrigTripCount;
5467 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5475 Value *Leftover = Result->getIndVar();
5477 NewIndVars.
resize(NumLoops);
5478 for (
int i = NumLoops - 1; i >= 1; --i) {
5479 Value *OrigTripCount =
Loops[i]->getTripCount();
5482 NewIndVars[i] = NewIndVar;
5487 NewIndVars[0] = Leftover;
5496 BasicBlock *ContinueBlock = Result->getBody();
5498 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5505 ContinueBlock =
nullptr;
5506 ContinuePred = NextSrc;
5513 for (
size_t i = 0; i < NumLoops - 1; ++i)
5514 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5520 for (
size_t i = NumLoops - 1; i > 0; --i)
5521 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5524 ContinueWith(Result->getLatch(),
nullptr);
5531 for (
size_t i = 0; i < NumLoops; ++i)
5532 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5546std::vector<CanonicalLoopInfo *>
5550 "Must pass as many tile sizes as there are loops");
5551 int NumLoops =
Loops.size();
5552 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5564 Loop->collectControlBlocks(OldControlBBs);
5572 assert(L->isValid() &&
"All input loops must be valid canonical loops");
5573 OrigTripCounts.
push_back(L->getTripCount());
5584 for (
int i = 0; i < NumLoops - 1; ++i) {
5597 for (
int i = 0; i < NumLoops; ++i) {
5599 Value *OrigTripCount = OrigTripCounts[i];
5612 Value *FloorTripOverflow =
5616 Value *FloorTripCount =
5618 "omp_floor" +
Twine(i) +
".tripcount",
true);
5621 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5627 std::vector<CanonicalLoopInfo *> Result;
5628 Result.reserve(NumLoops * 2);
5641 auto EmbeddNewLoop =
5642 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5645 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
5650 Enter = EmbeddedLoop->
getBody();
5652 OutroInsertBefore = EmbeddedLoop->
getLatch();
5653 return EmbeddedLoop;
5657 const Twine &NameBase) {
5660 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5661 Result.push_back(EmbeddedLoop);
5665 EmbeddNewLoops(FloorCount,
"floor");
5671 for (
int i = 0; i < NumLoops; ++i) {
5675 Value *FloorIsEpilogue =
5677 Value *TileTripCount =
5684 EmbeddNewLoops(TileCounts,
"tile");
5689 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5698 BodyEnter =
nullptr;
5699 BodyEntered = ExitBB;
5712 for (
int i = 0; i < NumLoops; ++i) {
5715 Value *OrigIndVar = OrigIndVars[i];
5743 if (Properties.
empty())
5766 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5770 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5778 if (
I.mayReadOrWriteMemory()) {
5782 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5805 const Twine &NamePrefix) {
5834 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5836 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
5853 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
5855 ExistingBlocks.
append(L->block_begin(), L->block_end());
5861 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5863 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
5870 if (
Block == ThenBlock)
5871 NewBB->
setName(NamePrefix +
".if.else");
5874 VMap[
Block] = NewBB;
5882 L->getLoopLatch()->splitBasicBlock(
5883 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5887 L->addBasicBlockToLoop(ThenBlock, LI);
5893 if (TargetTriple.
isX86()) {
5894 if (Features.
lookup(
"avx512f"))
5896 else if (Features.
lookup(
"avx"))
5900 if (TargetTriple.
isPPC())
5902 if (TargetTriple.
isWasm())
5909 Value *IfCond, OrderKind Order,
5928 if (AlignedVars.
size()) {
5930 for (
auto &AlignedItem : AlignedVars) {
5931 Value *AlignedPtr = AlignedItem.first;
5932 Value *Alignment = AlignedItem.second;
5933 Instruction *loadInst = dyn_cast<Instruction>(AlignedPtr);
5943 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5966 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5974 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5990 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
5992 if (Simdlen || Safelen) {
5996 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6022static std::unique_ptr<TargetMachine>
6026 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6027 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6038 std::nullopt, OptLevel));
6062 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6077 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6082 nullptr, ORE,
static_cast<int>(OptLevel),
6103 <<
" Threshold=" << UP.
Threshold <<
"\n"
6106 <<
" PartialOptSizeThreshold="
6125 if (
auto *Load = dyn_cast<LoadInst>(&
I)) {
6126 Ptr = Load->getPointerOperand();
6127 }
else if (
auto *Store = dyn_cast<StoreInst>(&
I)) {
6128 Ptr = Store->getPointerOperand();
6132 Ptr =
Ptr->stripPointerCasts();
6134 if (
auto *Alloca = dyn_cast<AllocaInst>(
Ptr)) {
6135 if (Alloca->getParent() == &
F->getEntryBlock())
6155 int MaxTripCount = 0;
6156 bool MaxOrZero =
false;
6157 unsigned TripMultiple = 0;
6159 bool UseUpperBound =
false;
6161 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6163 unsigned Factor = UP.
Count;
6164 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6175 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6191 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6204 *UnrolledCLI =
Loop;
6209 "unrolling only makes sense with a factor of 2 or larger");
6211 Type *IndVarTy =
Loop->getIndVarType();
6218 std::vector<CanonicalLoopInfo *>
LoopNest =
6233 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6236 (*UnrolledCLI)->assertOK();
6254 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6273 if (!CPVars.
empty()) {
6278 Directive OMPD = Directive::OMPD_single;
6283 Value *Args[] = {Ident, ThreadId};
6292 if (
Error Err = FiniCB(IP))
6313 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6320 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
6323 ConstantInt::get(
Int64, 0), CPVars[
I],
6326 }
else if (!IsNowait) {
6329 omp::Directive::OMPD_unknown,
false,
6344 Directive OMPD = Directive::OMPD_critical;
6349 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6350 Value *Args[] = {Ident, ThreadId, LockVar};
6367 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6375 const Twine &
Name,
bool IsDependSource) {
6378 [](
Value *SV) {
return SV->
getType()->isIntegerTy(64); }) &&
6379 "OpenMP runtime requires depend vec with i64 type");
6392 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6406 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6424 Directive OMPD = Directive::OMPD_ordered;
6433 Value *Args[] = {Ident, ThreadId};
6443 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6449 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6450 bool HasFinalize,
bool IsCancellable) {
6459 if (!isa_and_nonnull<BranchInst>(SplitPos))
6466 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6477 "Unexpected control flow graph state!!");
6479 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6481 return AfterIP.takeError();
6483 "Unexpected Control Flow State!");
6489 "Unexpected Insertion point location!");
6492 auto InsertBB = merged ? ExitPredBB : ExitBB;
6493 if (!isa_and_nonnull<BranchInst>(SplitPos))
6503 if (!Conditional || !EntryCall)
6523 UI->eraseFromParent();
6531 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6539 "Unexpected finalization stack state!");
6542 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6544 if (
Error Err = Fi.FiniCB(FinIP))
6593 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
6595 "copyin.not.master.end");
6650 Value *DependenceAddress,
bool HaveNowaitClause) {
6658 if (Device ==
nullptr)
6660 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6661 if (NumDependences ==
nullptr) {
6662 NumDependences = ConstantInt::get(
Int32, 0);
6666 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6668 Ident, ThreadId, InteropVar, InteropTypeVal,
6669 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6678 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6686 if (Device ==
nullptr)
6688 if (NumDependences ==
nullptr) {
6689 NumDependences = ConstantInt::get(
Int32, 0);
6693 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6695 Ident, ThreadId, InteropVar, Device,
6696 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6705 Value *NumDependences,
6706 Value *DependenceAddress,
6707 bool HaveNowaitClause) {
6714 if (Device ==
nullptr)
6716 if (NumDependences ==
nullptr) {
6717 NumDependences = ConstantInt::get(
Int32, 0);
6721 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6723 Ident, ThreadId, InteropVar, Device,
6724 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6754 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
6755 "expected num_threads and num_teams to be specified");
6774 const std::string DebugPrefix =
"_debug__";
6775 if (KernelName.
ends_with(DebugPrefix)) {
6776 KernelName = KernelName.
drop_back(DebugPrefix.length());
6783 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
6788 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
6789 if (MaxThreadsVal < 0)
6790 MaxThreadsVal = std::max(
6793 if (MaxThreadsVal > 0)
6806 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6809 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6810 Constant *DynamicEnvironmentInitializer =
6814 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6816 DL.getDefaultGlobalsAddressSpace());
6820 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6821 ? DynamicEnvironmentGV
6823 DynamicEnvironmentPtr);
6826 ConfigurationEnvironment, {
6827 UseGenericStateMachineVal,
6828 MayUseNestedParallelismVal,
6835 ReductionBufferLength,
6838 KernelEnvironment, {
6839 ConfigurationEnvironmentInitializer,
6843 std::string KernelEnvironmentName =
6844 (KernelName +
"_kernel_environment").str();
6847 KernelEnvironmentInitializer, KernelEnvironmentName,
6849 DL.getDefaultGlobalsAddressSpace());
6853 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6854 ? KernelEnvironmentGV
6856 KernelEnvironmentPtr);
6857 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6859 KernelLaunchEnvironment =
6860 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6861 ? KernelLaunchEnvironment
6863 KernelLaunchEnvParamTy);
6891 UI->eraseFromParent();
6899 int32_t TeamsReductionDataSize,
6900 int32_t TeamsReductionBufferLength) {
6905 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6909 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6915 const std::string DebugPrefix =
"_debug__";
6917 KernelName = KernelName.
drop_back(DebugPrefix.length());
6918 auto *KernelEnvironmentGV =
6920 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6921 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
6923 KernelEnvironmentInitializer,
6924 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6926 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6928 KernelEnvironmentGV->setInitializer(NewInitializer);
6940std::pair<int32_t, int32_t>
6942 int32_t ThreadLimit =
6947 if (!Attr.isValid() || !Attr.isStringAttribute())
6948 return {0, ThreadLimit};
6951 if (!llvm::to_integer(UBStr, UB, 10))
6952 return {0, ThreadLimit};
6953 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6954 if (!llvm::to_integer(LBStr, LB, 10))
6961 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6963 return {0, ThreadLimit};
6973 llvm::utostr(LB) +
"," + llvm::utostr(UB));
6980std::pair<int32_t, int32_t>
6987 int32_t LB, int32_t UB) {
6992 Kernel.
addFnAttr(
"amdgpu-max-num-workgroups", llvm::utostr(LB) +
",1,1");
6997void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7006 else if (
T.isNVPTX())
7008 else if (
T.isSPIRV())
7016 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7025Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7031 "Named kernel already exists?");
7049 OutlinedFn = *CBResult;
7051 OutlinedFn =
nullptr;
7057 if (!IsOffloadEntry)
7060 std::string EntryFnIDName =
7062 ? std::string(EntryFnName)
7066 EntryFnName, EntryFnIDName);
7074 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7075 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7076 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7078 EntryInfo, EntryAddr, OutlinedFnID,
7080 return OutlinedFnID;
7108 bool IsStandAlone = !BodyGenCB;
7118 true, DeviceAddrCB))
7135 SrcLocInfo, DeviceID,
7142 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7146 if (
Info.HasNoWait) {
7156 if (
Info.HasNoWait) {
7160 emitBlock(OffloadContBlock, CurFn,
true);
7166 bool RequiresOuterTargetTask =
Info.HasNoWait;
7167 if (!RequiresOuterTargetTask)
7168 cantFail(TaskBodyCB(
nullptr,
nullptr,
7172 {}, RTArgs,
Info.HasNoWait));
7175 omp::OMPRTL___tgt_target_data_begin_mapper);
7179 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7180 if (isa<AllocaInst>(DeviceMap.second.second)) {
7228 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7250 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7266 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7270 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7282 bool IsGPUDistribute) {
7283 assert((IVSize == 32 || IVSize == 64) &&
7284 "IV size is not compatible with the omp runtime");
7286 if (IsGPUDistribute)
7288 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7289 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7290 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
7291 : omp::OMPRTL___kmpc_distribute_static_init_8u);
7293 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7294 : omp::OMPRTL___kmpc_for_static_init_4u)
7295 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7296 : omp::OMPRTL___kmpc_for_static_init_8u);
7303 assert((IVSize == 32 || IVSize == 64) &&
7304 "IV size is not compatible with the omp runtime");
7306 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7307 : omp::OMPRTL___kmpc_dispatch_init_4u)
7308 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
7309 : omp::OMPRTL___kmpc_dispatch_init_8u);
7316 assert((IVSize == 32 || IVSize == 64) &&
7317 "IV size is not compatible with the omp runtime");
7319 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7320 : omp::OMPRTL___kmpc_dispatch_next_4u)
7321 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
7322 : omp::OMPRTL___kmpc_dispatch_next_8u);
7329 assert((IVSize == 32 || IVSize == 64) &&
7330 "IV size is not compatible with the omp runtime");
7332 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7333 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7334 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
7335 : omp::OMPRTL___kmpc_dispatch_fini_8u);
7346 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7354 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7358 if (NewVar && (arg == NewVar->
getArg()))
7368 auto UpdateDebugRecord = [&](
auto *DR) {
7371 for (
auto Loc : DR->location_ops()) {
7372 auto Iter = ValueReplacementMap.find(Loc);
7373 if (Iter != ValueReplacementMap.end()) {
7374 DR->replaceVariableLocationOp(Loc, std::get<0>(Iter->second));
7375 ArgNo = std::get<1>(Iter->second) + 1;
7379 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7385 if (
auto *DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&
I))
7386 UpdateDebugRecord(DDI);
7389 UpdateDebugRecord(&DVR);
7394 Module *M = Func->getParent();
7397 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7399 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7400 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7402 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(), Loc,
7422 for (
auto &Arg : Inputs)
7423 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
7427 for (
auto &Arg : Inputs)
7428 ParameterTypes.
push_back(Arg->getType());
7432 auto M = BB->getModule();
7443 if (TargetCpuAttr.isStringAttribute())
7444 Func->addFnAttr(TargetCpuAttr);
7446 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7447 if (TargetFeaturesAttr.isStringAttribute())
7448 Func->addFnAttr(TargetFeaturesAttr);
7453 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
7480 splitBB(Builder,
true,
"outlined.body");
7495 auto AllocaIP = Builder.
saveIP();
7500 const auto &ArgRange =
7502 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7525 if (
auto *Const = dyn_cast<Constant>(Input))
7534 if (
auto *Instr = dyn_cast<Instruction>(
User))
7535 if (Instr->getFunction() == Func)
7536 Instr->replaceUsesOfWith(Input, InputCopy);
7542 for (
auto InArg :
zip(Inputs, ArgRange)) {
7543 Value *Input = std::get<0>(InArg);
7544 Argument &Arg = std::get<1>(InArg);
7545 Value *InputCopy =
nullptr;
7548 ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.
saveIP());
7552 ValueReplacementMap[Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7570 if (isa<GlobalValue>(Input)) {
7571 DeferredReplacement.
push_back(std::make_pair(Input, InputCopy));
7575 if (isa<ConstantData>(Input))
7578 ReplaceValue(Input, InputCopy, Func);
7582 for (
auto Deferred : DeferredReplacement)
7583 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7586 ValueReplacementMap);
7594 Value *TaskWithPrivates,
7595 Type *TaskWithPrivatesTy) {
7597 Type *TaskTy = OMPIRBuilder.Task;
7601 Value *Shareds = TaskT;
7611 if (TaskWithPrivatesTy != TaskTy)
7629 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7634 assert((!NumOffloadingArrays || PrivatesTy) &&
7635 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7668 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7669 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7675 ".omp_target_task_proxy_func",
7677 Value *ThreadId = ProxyFn->getArg(0);
7678 Value *TaskWithPrivates = ProxyFn->getArg(1);
7679 ThreadId->
setName(
"thread.id");
7680 TaskWithPrivates->
setName(
"task");
7682 bool HasShareds = SharedArgsOperandNo > 0;
7683 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7692 if (HasOffloadingArrays) {
7693 assert(TaskTy != TaskWithPrivatesTy &&
7694 "If there are offloading arrays to pass to the target"
7695 "TaskTy cannot be the same as TaskWithPrivatesTy");
7699 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7705 auto *ArgStructAlloca =
7706 dyn_cast<AllocaInst>(StaleCI->
getArgOperand(SharedArgsOperandNo));
7707 assert(ArgStructAlloca &&
7708 "Unable to find the alloca instruction corresponding to arguments "
7709 "for extracted function");
7710 auto *ArgStructType = cast<StructType>(ArgStructAlloca->getAllocatedType());
7713 Builder.
CreateAlloca(ArgStructType,
nullptr,
"structArg");
7715 Value *SharedsSize =
7716 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7719 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7722 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7724 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7726 Builder.
CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7732 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(V))
7733 return GEP->getSourceElementType();
7734 if (
auto *Alloca = dyn_cast<AllocaInst>(V))
7735 return Alloca->getAllocatedType();
7758 if (OffloadingArraysToPrivatize.
empty())
7759 return OMPIRBuilder.Task;
7762 for (
Value *V : OffloadingArraysToPrivatize) {
7763 assert(V->getType()->isPointerTy() &&
7764 "Expected pointer to array to privatize. Got a non-pointer value "
7767 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7773 "struct.task_with_privates");
7787 EntryFnName, Inputs, CBFunc,
7792 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7929 TargetTaskAllocaBB->
begin());
7933 OI.
EntryBB = TargetTaskAllocaBB;
7939 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7943 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7961 bool NeedsTargetTask = HasNoWait && DeviceID;
7962 if (NeedsTargetTask) {
7967 if (V && !isa<ConstantPointerNull, GlobalVariable>(V)) {
7968 OffloadingArraysToPrivatize.
push_back(V);
7973 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7974 DeviceID, OffloadingArraysToPrivatize](
7977 "there must be a single user for the outlined function");
7991 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
7992 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
7994 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
7995 "Wrong number of arguments for StaleCI when shareds are present");
7996 int SharedArgOperandNo =
7997 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8003 if (!OffloadingArraysToPrivatize.
empty())
8008 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8009 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8011 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8031 OMPRTL___kmpc_omp_target_task_alloc);
8050 auto *ArgStructAlloca =
8051 dyn_cast<AllocaInst>(StaleCI->
getArgOperand(SharedArgOperandNo));
8052 assert(ArgStructAlloca &&
8053 "Unable to find the alloca instruction corresponding to arguments "
8054 "for extracted function");
8055 auto *ArgStructType =
8056 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
8057 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8058 "arguments for extracted function");
8079 TaskSize, SharedsSize,
8082 if (NeedsTargetTask) {
8083 assert(DeviceID &&
"Expected non-empty device ID.");
8093 *
this,
Builder, TaskData, TaskWithPrivatesTy);
8097 if (!OffloadingArraysToPrivatize.
empty()) {
8100 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8101 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8108 "ElementType should match ArrayType");
8113 Dst, Alignment, PtrToPrivatize, Alignment,
8128 if (!NeedsTargetTask) {
8150 }
else if (DepArray) {
8169 I->eraseFromParent();
8188 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8216 bool HasDependencies = Dependencies.
size() > 0;
8217 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8234 if (OutlinedFnID && DeviceID)
8236 EmitTargetCallFallbackCB, KArgs,
8237 DeviceID, RTLoc, TargetTaskAllocaIP);
8245 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
8252 auto &&EmitTargetCallElse =
8258 if (RequiresOuterTargetTask) {
8265 Dependencies, EmptyRTArgs, HasNoWait);
8267 return EmitTargetCallFallbackCB(Builder.
saveIP());
8274 auto &&EmitTargetCallThen =
8277 Info.HasNoWait = HasNoWait;
8281 AllocaIP, Builder.
saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8289 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8294 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8300 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8311 Value *MaxThreadsClause =
8313 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
8316 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8318 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8319 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8321 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8322 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8327 unsigned NumTargetItems =
Info.NumberOfPtrs;
8345 NumTeamsC, NumThreadsC,
8346 DynCGGroupMem, HasNoWait);
8353 if (RequiresOuterTargetTask)
8354 return OMPBuilder.
emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8355 Dependencies, KArgs.
RTArgs,
8359 EmitTargetCallFallbackCB, KArgs,
8360 DeviceID, RTLoc, AllocaIP);
8370 if (!OutlinedFnID) {
8382 EmitTargetCallElse, AllocaIP));
8408 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8409 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8417 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8418 CustomMapperCB, Dependencies, HasNowait);
8432 return OS.str().str();
8446 assert(Elem.second->getValueType() == Ty &&
8447 "OMP internal variable has different type than requested");
8463 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8470Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8471 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8472 std::string
Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
8483 return SizePtrToInt;
8488 std::string VarName) {
8496 return MaptypesArrayGlobal;
8501 unsigned NumOperands,
8510 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8514 ArrI64Ty,
nullptr,
".offload_sizes");
8525 int64_t DeviceID,
unsigned NumOperands) {
8531 Value *ArgsBaseGEP =
8533 {Builder.getInt32(0), Builder.getInt32(0)});
8536 {Builder.getInt32(0), Builder.getInt32(0)});
8537 Value *ArgSizesGEP =
8539 {Builder.getInt32(0), Builder.getInt32(0)});
8545 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8552 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8553 "expected region end call to runtime only when end call is separate");
8555 auto VoidPtrTy = UnqualPtrTy;
8556 auto VoidPtrPtrTy = UnqualPtrTy;
8558 auto Int64PtrTy = UnqualPtrTy;
8560 if (!
Info.NumberOfPtrs) {
8572 Info.RTArgs.BasePointersArray,
8583 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8584 :
Info.RTArgs.MapTypesArray,
8590 if (!
Info.EmitDebug)
8599 if (!
Info.HasMapper)
8624 "struct.descriptor_dim");
8626 enum { OffsetFD = 0, CountFD, StrideFD };
8630 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
8633 if (NonContigInfo.
Dims[
I] == 1)
8640 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
8641 unsigned RevIdx = EE -
II - 1;
8644 {Builder.getInt64(0), Builder.getInt64(II)});
8648 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
8653 NonContigInfo.
Counts[L][RevIdx], CountLVal,
8658 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
8667 Info.RTArgs.PointersArray, 0,
I);
8674void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8678 StringRef Prefix = IsInit ?
".init" :
".del";
8688 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8689 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8699 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8700 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8725 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8726 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8727 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8731 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8732 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8736 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8737 ArraySize, MapTypeArg, MapName};
8763 MapperFn->
addFnAttr(Attribute::NoInline);
8764 MapperFn->
addFnAttr(Attribute::NoUnwind);
8789 Value *PtrBegin = BeginIn;
8795 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8796 MapType, MapName, ElementSize, HeadBB,
8815 PtrPHI->addIncoming(PtrBegin, HeadBB);
8820 return Info.takeError();
8824 Value *OffloadingArgs[] = {MapperHandle};
8828 Value *ShiftedPreviousSize =
8832 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8842 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8844 Value *MemberMapType =
8862 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8863 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8864 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8880 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8881 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8882 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8888 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8889 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8896 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8897 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8903 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8904 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8911 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8912 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8923 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8924 CurSizeArg, CurMapType, CurNameArg};
8926 auto ChildMapperFn = CustomMapperCB(
I);
8928 return ChildMapperFn.takeError();
8929 if (*ChildMapperFn) {
8944 "omp.arraymap.next");
8945 PtrPHI->addIncoming(PtrNext, LastBB);
8953 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8954 MapType, MapName, ElementSize, DoneBB,
8968 bool IsNonContiguous,
8972 Info.clearArrayInfo();
8975 if (
Info.NumberOfPtrs == 0)
8985 PointerArrayType,
nullptr,
".offload_baseptrs");
8988 PointerArrayType,
nullptr,
".offload_ptrs");
8990 PointerArrayType,
nullptr,
".offload_mappers");
8991 Info.RTArgs.MappersArray = MappersArray;
8998 ConstantInt::get(Int64Ty, 0));
9000 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
9001 if (
auto *CI = dyn_cast<Constant>(CombinedInfo.
Sizes[
I])) {
9002 if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
9003 if (IsNonContiguous &&
9004 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9006 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9014 RuntimeSizes.
set(
I);
9017 if (RuntimeSizes.
all()) {
9020 SizeArrayType,
nullptr,
".offload_sizes");
9026 auto *SizesArrayGbl =
9031 if (!RuntimeSizes.
any()) {
9032 Info.RTArgs.SizesArray = SizesArrayGbl;
9038 SizeArrayType,
nullptr,
".offload_sizes");
9043 SizesArrayGbl, OffloadSizeAlign,
9048 Info.RTArgs.SizesArray = Buffer;
9056 for (
auto mapFlag : CombinedInfo.
Types)
9058 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9062 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9068 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9069 Info.EmitDebug =
true;
9071 Info.RTArgs.MapNamesArray =
9073 Info.EmitDebug =
false;
9078 if (
Info.separateBeginEndCalls()) {
9079 bool EndMapTypesDiffer =
false;
9081 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9082 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9083 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9084 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9085 EndMapTypesDiffer =
true;
9088 if (EndMapTypesDiffer) {
9090 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9095 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9103 if (
Info.requiresDevicePointerInfo()) {
9110 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9112 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9114 DeviceAddrCB(
I, BP);
9126 if (RuntimeSizes.
test(
I)) {
9140 auto CustomMFunc = CustomMapperCB(
I);
9142 return CustomMFunc.takeError();
9148 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9154 Info.NumberOfPtrs == 0)
9200 if (
auto *CI = dyn_cast<ConstantInt>(
Cond)) {
9201 auto CondConstant = CI->getSExtValue();
9233bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9237 "Unexpected Atomic Ordering.");
9301 assert(
X.Var->getType()->isPointerTy() &&
9302 "OMP Atomic expects a pointer to target memory");
9303 Type *XElemTy =
X.ElemTy;
9306 "OMP atomic read expected a scalar type");
9308 Value *XRead =
nullptr;
9314 XRead = cast<Value>(XLD);
9325 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9327 XRead = AtomicLoadRes.first;
9342 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
9354 assert(
X.Var->getType()->isPointerTy() &&
9355 "OMP Atomic expects a pointer to target memory");
9356 Type *XElemTy =
X.ElemTy;
9359 "OMP atomic write expected a scalar type");
9371 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9384 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
9392 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9398 Type *XTy =
X.Var->getType();
9400 "OMP Atomic expects a pointer to target memory");
9401 Type *XElemTy =
X.ElemTy;
9404 "OMP atomic update expected a scalar type");
9407 "OpenMP atomic does not support LT or GT operations");
9411 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9412 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9414 return AtomicResult.takeError();
9415 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
9420Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9459 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9460 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9463 bool emitRMWOp =
false;
9471 emitRMWOp = XElemTy;
9474 emitRMWOp = (IsXBinopExpr && XElemTy);
9481 std::pair<Value *, Value *> Res;
9486 if (IsIgnoreDenormalMode)
9487 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9489 if (!IsFineGrainedMemory)
9490 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9492 if (!IsRemoteMemory)
9496 Res.first = RMWInst;
9501 Res.second = Res.first;
9503 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9515 OldVal->
getAlign(),
true , AllocaIP,
X);
9516 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9523 X->getName() +
".atomic.cont");
9527 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9530 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9535 Value *Upd = *CBResult;
9539 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9540 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9545 Res.first = OldExprVal;
9572 X->getName() +
".atomic.cont");
9576 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9579 PHI->addIncoming(OldVal, CurBB);
9585 X->getName() +
".atomic.fltCast");
9588 X->getName() +
".atomic.ptrCast");
9595 Value *Upd = *CBResult;
9602 Result->setVolatile(VolatileX);
9608 Res.first = OldExprVal;
9628 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9629 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9634 Type *XTy =
X.Var->getType();
9636 "OMP Atomic expects a pointer to target memory");
9637 Type *XElemTy =
X.ElemTy;
9640 "OMP atomic capture expected a scalar type");
9642 "OpenMP atomic does not support LT or GT operations");
9649 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9650 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9653 Value *CapturedVal =
9654 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9657 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
9669 IsPostfixUpdate, IsFailOnly, Failure);
9681 assert(
X.Var->getType()->isPointerTy() &&
9682 "OMP atomic expects a pointer to target memory");
9685 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9686 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9691 if (
Op == OMPAtomicCompareOp::EQ) {
9710 "OldValue and V must be of same type");
9711 if (IsPostfixUpdate) {
9729 CurBBTI,
X.Var->getName() +
".atomic.exit");
9749 Value *CapturedValue =
9757 assert(R.Var->getType()->isPointerTy() &&
9758 "r.var must be of pointer type");
9759 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9762 Value *ResultCast = R.IsSigned
9768 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9769 "Op should be either max or min at this point");
9770 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9810 Value *CapturedValue =
nullptr;
9811 if (IsPostfixUpdate) {
9812 CapturedValue = OldValue;
9844 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
9891 bool SubClausesPresent =
9892 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9895 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9896 "if lowerbound is non-null, then upperbound must also be non-null "
9897 "for bounds on num_teams");
9899 if (NumTeamsUpper ==
nullptr)
9902 if (NumTeamsLower ==
nullptr)
9903 NumTeamsLower = NumTeamsUpper;
9907 "argument to if clause must be an integer value");
9912 ConstantInt::get(IfExpr->
getType(), 0));
9921 if (ThreadLimit ==
nullptr)
9927 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9932 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9944 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9946 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9948 auto HostPostOutlineCB = [
this, Ident,
9949 ToBeDeleted](
Function &OutlinedFn)
mutable {
9954 "there must be a single user for the outlined function");
9959 "Outlined function must have two or three arguments only");
9961 bool HasShared = OutlinedFn.
arg_size() == 3;
9969 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9970 "outlined function.");
9977 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
9981 I->eraseFromParent();
10018 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10034 std::string VarName) {
10043 return MapNamesArrayGlobal;
10048void OpenMPIRBuilder::initializeTypes(
Module &M) {
10051#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10052#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10053 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10054 VarName##PtrTy = PointerType::getUnqual(Ctx);
10055#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10056 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10057 VarName##Ptr = PointerType::getUnqual(Ctx);
10058#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10059 T = StructType::getTypeByName(Ctx, StructName); \
10061 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10063 VarName##Ptr = PointerType::getUnqual(Ctx);
10064#include "llvm/Frontend/OpenMP/OMPKinds.def"
10075 while (!Worklist.
empty()) {
10079 if (
BlockSet.insert(SuccBB).second)
10103 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10104 Fn->
addFnAttr(Attribute::MustProgress);
10122 auto &&GetMDInt = [
this](
unsigned V) {
10130 auto &&TargetRegionMetadataEmitter =
10131 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10146 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
10147 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10148 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10149 GetMDInt(E.getOrder())};
10152 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
10161 auto &&DeviceGlobalVarMetadataEmitter =
10162 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10172 Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
10173 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
10177 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
10184 DeviceGlobalVarMetadataEmitter);
10186 for (
const auto &E : OrderedEntries) {
10187 assert(E.first &&
"All ordered entries must exist!");
10188 if (
const auto *CE =
10189 dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
10191 if (!CE->getID() || !CE->getAddress()) {
10203 }
else if (
const auto *CE =
dyn_cast<
10214 if (!CE->getAddress()) {
10219 if (CE->getVarSize() == 0)
10225 "Declaret target link address is set.");
10228 if (!CE->getAddress()) {
10240 if (
auto *
GV = dyn_cast<GlobalValue>(CE->getAddress()))
10241 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10249 Flags, CE->getLinkage(), CE->getVarName());
10252 Flags, CE->getLinkage());
10274 unsigned FileID,
unsigned Line,
unsigned Count) {
10277 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10279 OS <<
"_" << Count;
10284 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10287 EntryInfo.
Line, NewCount);
10294 auto FileIDInfo = CallBack();
10300 FileID =
hash_value(std::get<0>(FileIDInfo));
10302 FileID =
ID.getFile();
10305 std::get<1>(FileIDInfo));
10311 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10313 !(Remain & 1); Remain = Remain >> 1)
10331 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10333 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10340 Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10341 Flags |= MemberOfFlag;
10347 bool IsDeclaration,
bool IsExternallyVisible,
10349 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10350 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10351 std::function<
Constant *()> GlobalInitializer,
10367 if (!IsExternallyVisible)
10369 OS <<
"_decl_tgt_ref_ptr";
10378 auto *
GV = cast<GlobalVariable>(
Ptr);
10382 if (GlobalInitializer)
10383 GV->setInitializer(GlobalInitializer());
10389 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10390 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10391 GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(
Ptr));
10394 return cast<Constant>(
Ptr);
10403 bool IsDeclaration,
bool IsExternallyVisible,
10405 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10406 std::vector<Triple> TargetTriple,
10407 std::function<
Constant *()> GlobalInitializer,
10424 VarName = MangledName;
10427 if (!IsDeclaration)
10432 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
10448 auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
10449 GvAddrRef->setConstant(
true);
10451 GvAddrRef->setInitializer(
Addr);
10452 GeneratedRefs.push_back(GvAddrRef);
10462 VarName = (
Addr) ?
Addr->getName() :
"";
10466 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10467 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10468 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10469 VarName = (
Addr) ?
Addr->getName() :
"";
10490 auto &&GetMDInt = [MN](
unsigned Idx) {
10491 auto *V = cast<ConstantAsMetadata>(MN->getOperand(
Idx));
10492 return cast<ConstantInt>(V->getValue())->getZExtValue();
10495 auto &&GetMDString = [MN](
unsigned Idx) {
10496 auto *V = cast<MDString>(MN->getOperand(
Idx));
10497 return V->getString();
10500 switch (GetMDInt(0)) {
10528 if (HostFilePath.
empty())
10532 if (std::error_code Err = Buf.getError()) {
10534 "OpenMPIRBuilder: " +
10542 if (std::error_code Err =
M.getError()) {
10544 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10556 return OffloadEntriesTargetRegion.empty() &&
10557 OffloadEntriesDeviceGlobalVar.empty();
10560unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10562 auto It = OffloadEntriesTargetRegionCount.find(
10563 getTargetRegionEntryCountKey(EntryInfo));
10564 if (It == OffloadEntriesTargetRegionCount.end())
10569void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10571 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10572 EntryInfo.
Count + 1;
10578 OffloadEntriesTargetRegion[EntryInfo] =
10580 OMPTargetRegionEntryTargetRegion);
10581 ++OffloadingEntriesNum;
10587 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
10590 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
10594 if (OMPBuilder->Config.isTargetDevice()) {
10596 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10599 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10600 Entry.setAddress(
Addr);
10602 Entry.setFlags(Flags);
10605 hasTargetRegionEntryInfo(EntryInfo,
true))
10607 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10608 "Target region entry already registered!");
10610 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10611 ++OffloadingEntriesNum;
10613 incrementTargetRegionEntryInfoCount(EntryInfo);
10620 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
10622 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10623 if (It == OffloadEntriesTargetRegion.end()) {
10627 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10635 for (
const auto &It : OffloadEntriesTargetRegion) {
10636 Action(It.first, It.second);
10642 OffloadEntriesDeviceGlobalVar.try_emplace(
Name, Order, Flags);
10643 ++OffloadingEntriesNum;
10649 if (OMPBuilder->Config.isTargetDevice()) {
10651 if (!hasDeviceGlobalVarEntryInfo(VarName))
10653 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
10654 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10655 if (Entry.getVarSize() == 0) {
10656 Entry.setVarSize(VarSize);
10657 Entry.setLinkage(Linkage);
10661 Entry.setVarSize(VarSize);
10662 Entry.setLinkage(Linkage);
10663 Entry.setAddress(
Addr);
10665 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10666 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
10667 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10668 "Entry not initialized!");
10669 if (Entry.getVarSize() == 0) {
10670 Entry.setVarSize(VarSize);
10671 Entry.setLinkage(Linkage);
10676 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10677 Addr, VarSize, Flags, Linkage,
10680 OffloadEntriesDeviceGlobalVar.try_emplace(
10681 VarName, OffloadingEntriesNum,
Addr, VarSize, Flags, Linkage,
"");
10682 ++OffloadingEntriesNum;
10689 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
10690 Action(E.getKey(), E.getValue());
10697void CanonicalLoopInfo::collectControlBlocks(
10704 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
10716void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10720 assert(isa<CmpInst>(CmpI) &&
"First inst must compare IV with TripCount");
10728void CanonicalLoopInfo::mapIndVar(
10738 for (
Use &U : OldIV->
uses()) {
10739 auto *
User = dyn_cast<Instruction>(U.getUser());
10742 if (
User->getParent() == getCond())
10744 if (
User->getParent() == getLatch())
10750 Value *NewIV = Updater(OldIV);
10753 for (
Use *U : ReplacableUses)
10774 "Preheader must terminate with unconditional branch");
10776 "Preheader must jump to header");
10779 assert(isa<BranchInst>(Header->getTerminator()) &&
10780 "Header must terminate with unconditional branch");
10781 assert(Header->getSingleSuccessor() ==
Cond &&
10782 "Header must jump to exiting block");
10785 assert(
Cond->getSinglePredecessor() == Header &&
10786 "Exiting block only reachable from header");
10788 assert(isa<BranchInst>(
Cond->getTerminator()) &&
10789 "Exiting block must terminate with conditional branch");
10791 "Exiting block must have two successors");
10792 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(0) == Body &&
10793 "Exiting block's first successor jump to the body");
10794 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(1) == Exit &&
10795 "Exiting block's second successor must exit the loop");
10799 "Body only reachable from exiting block");
10804 "Latch must terminate with unconditional branch");
10812 assert(isa<BranchInst>(Exit->getTerminator()) &&
10813 "Exit block must terminate with unconditional branch");
10814 assert(Exit->getSingleSuccessor() == After &&
10815 "Exit block must jump to after block");
10819 "After block only reachable from exit block");
10823 assert(IndVar &&
"Canonical induction variable not found?");
10825 "Induction variable must be an integer");
10827 "Induction variable must be a PHI in the loop header");
10828 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
10830 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->
isZero());
10831 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
10833 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
10835 assert(cast<BinaryOperator>(NextIndVar)->
getOpcode() == BinaryOperator::Add);
10836 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
10837 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
10840 Value *TripCount = getTripCount();
10841 assert(TripCount &&
"Loop trip count not found?");
10843 "Trip count and induction variable must have the same type");
10845 auto *CmpI = cast<CmpInst>(&
Cond->front());
10847 "Exit condition must be a signed less-than comparison");
10849 "Exit condition must compare the induction variable");
10851 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
BlockVerifier::State From
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn)
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
A container for analyses that lazily runs them and caches their results.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
LLVM_ABI AttrBuilder & addAttribute(Attribute::AttrKind Val)
Add an attribute to the builder.
LLVM_ABI AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
LLVM_ABI AttributeSet getFnAttrs() const
The function attributes are returned.
AttributeList addFnAttributes(LLVMContext &C, const AttrBuilder &B) const
Add function attribute to the list.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
const Instruction & back() const
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getDefaultGlobalsAddressSpace() const
Align getABIIntegerTypeAlignment(unsigned BitWidth) const
Returns the minimum ABI-required alignment for an integer type of the specified bitwidth.
unsigned getAllocaAddrSpace() const
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI unsigned getPointerSize(unsigned AS=0) const
The pointer representation size in bytes, rounded up to a whole number of bytes.
unsigned getIndexSizeInBits(unsigned AS) const
The size in bits of indices used for address calculation in getelementptr and for addresses in the gi...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI Value * CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS, const Twine &Name="")
Return the i64 difference between two pointer values, dividing out the size of the pointed-to objects...
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert a memcpy between the specified pointers.
UnreachableInst * CreateUnreachable()
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI CallInst * CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, Value *OffsetValue=nullptr)
Create an assume intrinsic call that represents an alignment assumption on the provided pointer.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
BasicBlock::iterator GetInsertPoint() const
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
IntegerType * getIndexTy(const DataLayout &DL, unsigned AddrSpace)
Fetch the type of an integer that should be used to index GEP operations within AddressSpace.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name="")
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
LLVM_ABI CallInst * CreateMalloc(Type *IntPtrTy, Type *AllocTy, Value *AllocSize, Value *ArraySize, ArrayRef< OperandBundleDef > OpB, Function *MallocF=nullptr, const Twine &Name="")
InsertPoint saveIP() const
Returns the current insert point.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
LLVM_ABI CallInst * CreateFree(Value *Source, ArrayRef< OperandBundleDef > Bundles={})
Generate the IR for a call to the builtin free function.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
LLVM_ABI DebugLoc getCurrentDebugLocation() const
Get location information used by debugging information.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void ClearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg == 0.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI GlobalVariable * CreateGlobalString(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr, bool AddNull=true)
Make a new global variable with initializer type i8*.
Value * CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
LLVMContext & getContext() const
Get the global data context.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
StringRef getName() const
Get a short "name" for the module.
iterator_range< global_iterator > globals()
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
void setGridValue(omp::GV G)
StringRef separator() const
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
StringRef firstSeparator() const
std::optional< bool > EmitLLVMUsedMetaInfo
Flag for specifying if LLVMUsed information should be emitted.
omp::GV getGridValue() const
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
LLVM_ABI bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
void setEmitLLVMUsed(bool Value=true)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp task
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false)
Generator for '#omp target'.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for '#omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for '#omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
LLVM_ABI Type * getStructElementType(unsigned N) const
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
iterator insertAfter(iterator where, pointer New)
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
LLVM_ABI void emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
LLVM_ABI std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * DynCGGroupMem
The size of the dynamic shared memory.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
unsigned GV_Warp_Size
The default value of maximum number of threads in a worker warp.