65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
158 if (Features.
count(
"+wavefrontsize64"))
159 return omp::getAMDGPUGridValues<64>();
160 return omp::getAMDGPUGridValues<32>();
173 bool HasSimdModifier) {
175 switch (ClauseKind) {
176 case OMP_SCHEDULE_Default:
177 case OMP_SCHEDULE_Static:
178 return HasChunks ? OMPScheduleType::BaseStaticChunked
179 : OMPScheduleType::BaseStatic;
180 case OMP_SCHEDULE_Dynamic:
181 return OMPScheduleType::BaseDynamicChunked;
182 case OMP_SCHEDULE_Guided:
183 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
184 : OMPScheduleType::BaseGuidedChunked;
185 case OMP_SCHEDULE_Auto:
187 case OMP_SCHEDULE_Runtime:
188 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
189 : OMPScheduleType::BaseRuntime;
197 bool HasOrderedClause) {
198 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
199 OMPScheduleType::None &&
200 "Must not have ordering nor monotonicity flags already set");
203 ? OMPScheduleType::ModifierOrdered
204 : OMPScheduleType::ModifierUnordered;
205 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
208 if (OrderingScheduleType ==
209 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
210 return OMPScheduleType::OrderedGuidedChunked;
211 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
212 OMPScheduleType::ModifierOrdered))
213 return OMPScheduleType::OrderedRuntime;
215 return OrderingScheduleType;
221 bool HasSimdModifier,
bool HasMonotonic,
222 bool HasNonmonotonic,
bool HasOrderedClause) {
223 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
224 OMPScheduleType::None &&
225 "Must not have monotonicity flags already set");
226 assert((!HasMonotonic || !HasNonmonotonic) &&
227 "Monotonic and Nonmonotonic are contradicting each other");
230 return ScheduleType | OMPScheduleType::ModifierMonotonic;
231 }
else if (HasNonmonotonic) {
232 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
242 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
243 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
249 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
257 bool HasSimdModifier,
bool HasMonotonicModifier,
258 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
264 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
265 HasNonmonotonicModifier, HasOrderedClause);
278 VType = vAlloca->getAllocatedType();
306 auto *Br = cast<BranchInst>(Term);
307 assert(!Br->isConditional() &&
308 "BB's terminator must be an unconditional branch (or degenerate)");
311 Br->setSuccessor(0,
Target);
316 NewBr->setDebugLoc(
DL);
321 assert(New->getFirstInsertionPt() == New->begin() &&
322 "Target BB must not have PHI nodes");
326 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
330 NewBr->setDebugLoc(
DL);
356 New->replaceSuccessorsPhiUsesWith(Old, New);
401 const Twine &
Name =
"",
bool AsPtr =
true) {
409 FakeVal = FakeValAddr;
437enum OpenMPOffloadingRequiresDirFlags {
439 OMP_REQ_UNDEFINED = 0x000,
441 OMP_REQ_NONE = 0x001,
443 OMP_REQ_REVERSE_OFFLOAD = 0x002,
445 OMP_REQ_UNIFIED_ADDRESS = 0x004,
447 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
449 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
456 : RequiresFlags(OMP_REQ_UNDEFINED) {}
459 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
460 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
461 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
462 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
463 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
464 RequiresFlags(OMP_REQ_UNDEFINED) {
465 if (HasRequiresReverseOffload)
466 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
467 if (HasRequiresUnifiedAddress)
468 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
469 if (HasRequiresUnifiedSharedMemory)
470 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
471 if (HasRequiresDynamicAllocators)
472 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
476 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
480 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
484 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
488 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
493 :
static_cast<int64_t
>(OMP_REQ_NONE);
498 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
500 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
505 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
507 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
512 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
514 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
519 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
521 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
534 constexpr const size_t MaxDim = 3;
542 Value *NumThreads3D =
545 seq<unsigned>(1, std::min(KernelArgs.
NumTeams.size(), MaxDim)))
549 seq<unsigned>(1, std::min(KernelArgs.
NumThreads.size(), MaxDim)))
573 auto FnAttrs = Attrs.getFnAttrs();
574 auto RetAttrs = Attrs.getRetAttrs();
576 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
581 bool Param =
true) ->
void {
582 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
583 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
584 if (HasSignExt || HasZeroExt) {
585 assert(AS.getNumAttributes() == 1 &&
586 "Currently not handling extension attr combined with others.");
588 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
591 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
598#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
599#include "llvm/Frontend/OpenMP/OMPKinds.def"
603#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
605 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
606 addAttrSet(RetAttrs, RetAttrSet, false); \
607 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
608 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
609 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
611#include "llvm/Frontend/OpenMP/OMPKinds.def"
625#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
627 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
629 Fn = M.getFunction(Str); \
631#include "llvm/Frontend/OpenMP/OMPKinds.def"
637#define OMP_RTL(Enum, Str, ...) \
639 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
641#include "llvm/Frontend/OpenMP/OMPKinds.def"
645 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
655 LLVMContext::MD_callback,
657 2, {-1, -1},
true)}));
670 assert(Fn &&
"Failed to create OpenMP runtime function");
677 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
678 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
693 for (
auto Inst =
Block->getReverseIterator()->begin();
694 Inst !=
Block->getReverseIterator()->end();) {
695 if (
auto *
AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
719 ParallelRegionBlockSet.
clear();
721 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
740 ".omp_par", ArgsInZeroAddressSpace);
744 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
746 "Expected OpenMP outlining to be possible!");
748 for (
auto *V : OI.ExcludeArgsFromAggregate)
755 if (TargetCpuAttr.isStringAttribute())
758 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
759 if (TargetFeaturesAttr.isStringAttribute())
760 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
763 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
765 "OpenMP outlined functions should not return a value!");
777 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
784 "Expected instructions to add in the outlined region entry");
791 if (
I.isTerminator())
794 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
797 OI.EntryBB->moveBefore(&ArtificialEntry);
804 if (OI.PostOutlineCB)
805 OI.PostOutlineCB(*OutlinedFn);
836 errs() <<
"Error of kind: " << Kind
837 <<
" when emitting offload entries and metadata during "
838 "OMPIRBuilder finalization \n";
845 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
847 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
873 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
877 if (UsedArray.
empty())
884 GV->setSection(
"llvm.metadata");
893 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
901 unsigned Reserve2Flags) {
903 LocFlags |= OMP_IDENT_FLAG_KMPC;
911 ConstantInt::get(
Int32, Reserve2Flags),
912 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
919 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
920 if (
GV.getInitializer() == Initializer)
925 M, OpenMPIRBuilder::Ident,
940 SrcLocStrSize = LocStr.
size();
949 if (
GV.isConstant() &&
GV.hasInitializer() &&
950 GV.getInitializer() == Initializer)
961 unsigned Line,
unsigned Column,
967 Buffer.
append(FunctionName);
969 Buffer.
append(std::to_string(Line));
971 Buffer.
append(std::to_string(Column));
979 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
990 if (
DIFile *DIF = DIL->getFile())
991 if (std::optional<StringRef> Source = DIF->getSource())
997 DIL->getColumn(), SrcLocStrSize);
1009 "omp_global_thread_num");
1014 bool ForceSimpleCall,
bool CheckCancelFlag) {
1024 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1027 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1030 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1033 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1036 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1049 bool UseCancelBarrier =
1054 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1055 : OMPRTL___kmpc_barrier),
1058 if (UseCancelBarrier && CheckCancelFlag)
1068 omp::Directive CanceledDirective) {
1080 Value *CancelKind =
nullptr;
1081 switch (CanceledDirective) {
1082#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1083 case DirectiveEnum: \
1084 CancelKind = Builder.getInt32(Value); \
1086#include "llvm/Frontend/OpenMP/OMPKinds.def"
1098 if (CanceledDirective == OMPD_parallel) {
1102 omp::Directive::OMPD_unknown,
1116 UI->eraseFromParent();
1129 auto *KernelArgsPtr =
1142 NumThreads, HostPtr, KernelArgsPtr};
1170 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1174 Value *Return =
nullptr;
1194 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1195 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1211 emitBlock(OffloadContBlock, CurFn,
true);
1216 Value *CancelFlag, omp::Directive CanceledDirective,
1219 "Unexpected cancellation!");
1272 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1275 "Expected at least tid and bounded tid as arguments");
1276 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1279 assert(CI &&
"Expected call instruction to outlined function");
1280 CI->
getParent()->setName(
"omp_parallel");
1283 Type *PtrTy = OMPIRBuilder->VoidPtr;
1287 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.
saveIP();
1291 Value *Args = ArgsAlloca;
1299 for (
unsigned Idx = 0;
Idx < NumCapturedVars;
Idx++) {
1311 Value *Parallel51CallArgs[] = {
1315 NumThreads ? NumThreads : Builder.
getInt32(-1),
1318 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1321 Builder.
getInt64(NumCapturedVars)};
1326 Builder.
CreateCall(RTLFn, Parallel51CallArgs);
1341 I->eraseFromParent();
1363 if (
auto *
F = dyn_cast<Function>(RTLFn.
getCallee())) {
1364 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1372 F->addMetadata(LLVMContext::MD_callback,
1381 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1384 "Expected at least tid and bounded tid as arguments");
1385 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1388 CI->
getParent()->setName(
"omp_parallel");
1392 Value *ForkCallArgs[] = {
1393 Ident, Builder.
getInt32(NumCapturedVars),
1394 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1397 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1406 auto PtrTy = OMPIRBuilder->VoidPtr;
1407 if (IfCondition && NumCapturedVars == 0) {
1411 if (IfCondition && RealArgs.
back()->getType() != PtrTy)
1429 I->eraseFromParent();
1437 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1464 if (ProcBind != OMP_PROC_BIND_default) {
1468 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1496 TIDAddrAlloca, PointerType ::get(
M.
getContext(), 0),
"tid.addr.ascast");
1501 "zero.addr.ascast");
1525 if (IP.getBlock()->end() == IP.getPoint()) {
1531 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1532 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1533 "Unexpected insertion point for finalization call!");
1569 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1572 assert(BodyGenCB &&
"Expected body generation callback!");
1574 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1577 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1583 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1585 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1586 ThreadID, ToBeDeletedVec);
1591 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1593 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1610 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1611 Blocks.push_back(PRegOutlinedExitBB);
1622 ".omp_par", ArgsInZeroAddressSpace);
1627 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1633 if (
auto *
GV = dyn_cast_if_present<GlobalVariable>(
I))
1634 return GV->getValueType() == OpenMPIRBuilder::Ident;
1639 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1645 if (&V == TIDAddr || &V == ZeroAddr) {
1651 for (
Use &U : V.uses())
1652 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1653 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1663 if (!V.getType()->isPointerTy()) {
1682 Value *ReplacementValue =
nullptr;
1683 CallInst *CI = dyn_cast<CallInst>(&V);
1685 ReplacementValue = PrivTID;
1688 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue);
1696 assert(ReplacementValue &&
1697 "Expected copy/create callback to set replacement value!");
1698 if (ReplacementValue == &V)
1703 UPtr->set(ReplacementValue);
1722 for (
Value *Input : Inputs) {
1724 if (
Error Err = PrivHelper(*Input))
1728 for (
Value *Output : Outputs)
1732 "OpenMP outlining should not produce live-out values!");
1734 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1737 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1745 assert(FiniInfo.DK == OMPD_parallel &&
1746 "Unexpected finalization stack state!");
1751 if (
Error Err = FiniCB(PreFiniIP))
1757 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1758 UI->eraseFromParent();
1824 if (Dependencies.
empty())
1844 Type *DependInfo = OMPBuilder.DependInfo;
1847 Value *DepArray =
nullptr;
1853 DepArray = Builder.
CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1855 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1861 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1866 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1868 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1873 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1876 static_cast<unsigned int>(Dep.DepKind)),
1919 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1930 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1932 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1933 Mergeable, Priority, EventHandle, TaskAllocaBB,
1934 ToBeDeleted](
Function &OutlinedFn)
mutable {
1936 assert(OutlinedFn.getNumUses() == 1 &&
1937 "there must be a single user for the outlined function");
1938 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1942 bool HasShareds = StaleCI->
arg_size() > 1;
1990 assert(ArgStructAlloca &&
1991 "Unable to find the alloca instruction corresponding to arguments "
1992 "for extracted function");
1995 assert(ArgStructType &&
"Unable to find struct type corresponding to "
1996 "arguments for extracted function");
2004 TaskAllocFn, {Ident, ThreadID,
Flags,
2005 TaskSize, SharedsSize,
2013 OMPRTL___kmpc_task_allow_completion_event);
2041 Constant *Zero = ConstantInt::get(Int32Ty, 0);
2050 TaskStructType, TaskGEP, {Zero, ConstantInt::get(Int32Ty, 4)});
2054 PriorityData, {Zero, Zero});
2058 Value *DepArray =
nullptr;
2059 if (Dependencies.
size()) {
2074 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2081 static_cast<unsigned int>(RTLDependInfoFields::Len));
2088 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2091 static_cast<unsigned int>(Dep.DepKind)),
2122 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2128 if (Dependencies.
size()) {
2152 if (Dependencies.
size()) {
2173 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2177 I->eraseFromParent();
2226 if (IP.getBlock()->end() != IP.getPoint())
2237 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
2238 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2239 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2270 unsigned CaseNumber = 0;
2271 for (
auto SectionCB : SectionCBs) {
2289 Value *LB = ConstantInt::get(I32Ty, 0);
2290 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2291 Value *ST = ConstantInt::get(I32Ty, 1);
2293 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2298 applyStaticWorkshareLoop(Loc.
DL, *
LoopInfo, AllocaIP, !IsNowait);
2305 assert(FiniInfo.DK == OMPD_sections &&
2306 "Unexpected finalization stack state!");
2313 AfterIP = {FiniBB, FiniBB->
begin()};
2327 if (IP.getBlock()->end() != IP.getPoint())
2346 Directive OMPD = Directive::OMPD_sections;
2349 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2360Value *OpenMPIRBuilder::getGPUThreadID() {
2363 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2367Value *OpenMPIRBuilder::getGPUWarpSize() {
2372Value *OpenMPIRBuilder::getNVPTXWarpID() {
2377Value *OpenMPIRBuilder::getNVPTXLaneID() {
2379 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2380 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2385Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *
From,
2390 assert(FromSize > 0 &&
"From size must be greater than zero");
2391 assert(ToSize > 0 &&
"To size must be greater than zero");
2392 if (FromType == ToType)
2394 if (FromSize == ToSize)
2409Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2414 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2418 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2422 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2423 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2424 Value *WarpSizeCast =
2426 Value *ShuffleCall =
2428 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2431void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2447 Value *ElemPtr = DstAddr;
2449 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2461 if ((
Size / IntSize) > 1) {
2485 Value *Res = createRuntimeShuffleFunction(
2494 Value *LocalElemPtr =
2501 Value *Res = createRuntimeShuffleFunction(
2515void OpenMPIRBuilder::emitReductionListCopy(
2516 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2518 CopyOptionsTy CopyOptions) {
2521 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2525 for (
auto En :
enumerate(ReductionInfos)) {
2526 const ReductionInfo &RI = En.value();
2527 Value *SrcElementAddr =
nullptr;
2528 Value *DestElementAddr =
nullptr;
2529 Value *DestElementPtrAddr =
nullptr;
2531 bool ShuffleInElement =
false;
2534 bool UpdateDestListPtr =
false;
2538 ReductionArrayTy, SrcBase,
2539 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2545 ReductionArrayTy, DestBase,
2546 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2552 ".omp.reduction.element");
2555 DestElementAddr = DestAlloca;
2558 DestElementAddr->
getName() +
".ascast");
2560 ShuffleInElement =
true;
2561 UpdateDestListPtr =
true;
2573 if (ShuffleInElement) {
2574 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2575 RemoteLaneOffset, ReductionArrayTy);
2577 switch (RI.EvaluationKind) {
2586 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2588 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2590 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2592 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2595 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2597 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2619 if (UpdateDestListPtr) {
2622 DestElementAddr->
getName() +
".ascast");
2638 "_omp_reduction_inter_warp_copy_func", &
M);
2661 "__openmp_nvptx_data_transfer_temporary_storage";
2665 if (!TransferMedium) {
2674 Value *GPUThreadID = getGPUThreadID();
2676 Value *LaneID = getNVPTXLaneID();
2678 Value *WarpID = getNVPTXWarpID();
2687 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2691 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2694 NumWarpsAlloca->
getName() +
".ascast");
2705 for (
auto En :
enumerate(ReductionInfos)) {
2710 const ReductionInfo &RI = En.value();
2712 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2715 unsigned NumIters = RealTySize / TySize;
2718 Value *Cnt =
nullptr;
2719 Value *CntAddr =
nullptr;
2729 CntAddr->
getName() +
".ascast");
2749 omp::Directive::OMPD_unknown,
2753 return BarrierIP1.takeError();
2764 auto *RedListArrayTy =
2770 {ConstantInt::get(IndexTy, 0),
2771 ConstantInt::get(IndexTy, En.index())});
2797 omp::Directive::OMPD_unknown,
2801 return BarrierIP2.takeError();
2808 Value *NumWarpsVal =
2811 Value *IsActiveThread =
2822 Value *TargetElemPtrPtr =
2824 {ConstantInt::get(IndexTy, 0),
2825 ConstantInt::get(IndexTy, En.index())});
2826 Value *TargetElemPtrVal =
2828 Value *TargetElemPtr = TargetElemPtrVal;
2834 Value *SrcMediumValue =
2853 RealTySize %= TySize;
2863Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2869 {Builder.getPtrTy(), Builder.getInt16Ty(),
2870 Builder.getInt16Ty(), Builder.getInt16Ty()},
2874 "_omp_reduction_shuffle_and_reduce_func", &
M);
2895 Type *ReduceListArgType = ReduceListArg->
getType();
2899 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2901 LaneIDArg->
getName() +
".addr");
2903 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2905 AlgoVerArg->
getName() +
".addr");
2912 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2915 ReduceListAlloca, ReduceListArgType,
2916 ReduceListAlloca->
getName() +
".ascast");
2918 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2920 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2921 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2923 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2926 RemoteReductionListAlloca->
getName() +
".ascast");
2935 Value *RemoteLaneOffset =
2944 emitReductionListCopy(
2946 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2977 Value *RemoteOffsetComp =
2994 ->addFnAttr(Attribute::NoUnwind);
3015 ReductionInfos, RemoteListAddrCast, ReduceList);
3028Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3035 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3039 "_omp_reduction_list_to_global_copy_func", &
M);
3056 BufferArg->
getName() +
".addr");
3063 BufferArgAlloca->
getName() +
".ascast");
3068 ReduceListArgAlloca->
getName() +
".ascast");
3074 Value *LocalReduceList =
3076 Value *BufferArgVal =
3081 for (
auto En :
enumerate(ReductionInfos)) {
3082 const ReductionInfo &RI = En.value();
3083 auto *RedListArrayTy =
3087 RedListArrayTy, LocalReduceList,
3088 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3096 ReductionsBufferTy, BufferVD, 0, En.index());
3098 switch (RI.EvaluationKind) {
3106 RI.ElementType, ElemPtr, 0, 0,
".realp");
3108 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3110 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3112 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3115 RI.ElementType, GlobVal, 0, 0,
".realp");
3117 RI.ElementType, GlobVal, 0, 1,
".imagp");
3138Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3145 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3149 "_omp_reduction_list_to_global_reduce_func", &
M);
3166 BufferArg->
getName() +
".addr");
3171 auto *RedListArrayTy =
3176 Value *LocalReduceList =
3181 BufferArgAlloca->
getName() +
".ascast");
3186 ReduceListArgAlloca->
getName() +
".ascast");
3189 LocalReduceList->
getName() +
".ascast");
3199 for (
auto En :
enumerate(ReductionInfos)) {
3201 RedListArrayTy, LocalReduceListAddrCast,
3202 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3207 ReductionsBufferTy, BufferVD, 0, En.index());
3215 ->addFnAttr(Attribute::NoUnwind);
3221Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3228 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3232 "_omp_reduction_global_to_list_copy_func", &
M);
3249 BufferArg->
getName() +
".addr");
3256 BufferArgAlloca->
getName() +
".ascast");
3261 ReduceListArgAlloca->
getName() +
".ascast");
3266 Value *LocalReduceList =
3272 for (
auto En :
enumerate(ReductionInfos)) {
3274 auto *RedListArrayTy =
3278 RedListArrayTy, LocalReduceList,
3279 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3286 ReductionsBufferTy, BufferVD, 0, En.index());
3329Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3336 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3340 "_omp_reduction_global_to_list_reduce_func", &
M);
3357 BufferArg->
getName() +
".addr");
3367 Value *LocalReduceList =
3372 BufferArgAlloca->
getName() +
".ascast");
3377 ReduceListArgAlloca->
getName() +
".ascast");
3380 LocalReduceList->
getName() +
".ascast");
3390 for (
auto En :
enumerate(ReductionInfos)) {
3392 RedListArrayTy, ReductionList,
3393 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3398 ReductionsBufferTy, BufferVD, 0, En.index());
3406 ->addFnAttr(Attribute::NoUnwind);
3412std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3413 std::string Suffix =
3415 return (
Name + Suffix).str();
3420 ReductionGenCBKind ReductionGenCBKind,
AttributeList FuncAttrs) {
3422 {Builder.getPtrTy(), Builder.getPtrTy()},
3424 std::string
Name = getReductionFuncName(ReducerName);
3436 Value *LHSArrayPtr =
nullptr;
3437 Value *RHSArrayPtr =
nullptr;
3448 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3450 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3460 for (
auto En :
enumerate(ReductionInfos)) {
3461 const ReductionInfo &RI = En.value();
3463 RedArrayTy, RHSArrayPtr,
3464 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3467 RHSI8Ptr, RI.PrivateVariable->getType(),
3468 RHSI8Ptr->
getName() +
".ascast");
3471 RedArrayTy, LHSArrayPtr,
3472 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3475 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3487 return AfterIP.takeError();
3489 return ReductionFunc;
3495 for (
auto En :
enumerate(ReductionInfos)) {
3496 unsigned Index = En.index();
3497 const ReductionInfo &RI = En.value();
3498 Value *LHSFixupPtr, *RHSFixupPtr;
3500 Builder.
saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3505 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3506 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3510 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3511 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3517 return ReductionFunc;
3525 assert(RI.Variable &&
"expected non-null variable");
3526 assert(RI.PrivateVariable &&
"expected non-null private variable");
3527 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3528 "expected non-null reduction generator callback");
3531 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3532 "expected variables and their private equivalents to have the same "
3535 assert(RI.Variable->getType()->isPointerTy() &&
3536 "expected variables to be pointers");
3543 bool IsNoWait,
bool IsTeamsReduction,
bool HasDistribute,
3545 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3559 if (ReductionInfos.
size() == 0)
3574 if (!ReductionResult)
3576 Function *ReductionFunc = *ReductionResult;
3580 if (GridValue.has_value())
3598 Value *ReductionListAlloca =
3601 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3605 for (
auto En :
enumerate(ReductionInfos)) {
3608 RedArrayTy, ReductionList,
3609 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3616 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3618 emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
3626 unsigned MaxDataSize = 0;
3628 for (
auto En :
enumerate(ReductionInfos)) {
3630 if (
Size > MaxDataSize)
3632 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3634 Value *ReductionDataSize =
3636 if (!IsTeamsReduction) {
3637 Value *SarFuncCast =
3641 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3644 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3649 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3651 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3652 Function *LtGCFunc = emitListToGlobalCopyFunction(
3653 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3654 Function *LtGRFunc = emitListToGlobalReduceFunction(
3655 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3656 Function *GtLCFunc = emitGlobalToListCopyFunction(
3657 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3658 Function *GtLRFunc = emitGlobalToListReduceFunction(
3659 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3663 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3665 Value *Args3[] = {SrcLocInfo,
3666 KernelTeamsReductionPtr,
3678 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3695 for (
auto En :
enumerate(ReductionInfos)) {
3702 Value *LHSPtr, *RHSPtr;
3704 &LHSPtr, &RHSPtr, CurFunc));
3709 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3713 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3717 assert(
false &&
"Unhandled ReductionGenCBKind");
3733 ".omp.reduction.func", &M);
3744 assert(RI.Variable &&
"expected non-null variable");
3745 assert(RI.PrivateVariable &&
"expected non-null private variable");
3746 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
3747 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
3748 "expected variables and their private equivalents to have the same "
3750 assert(RI.Variable->getType()->isPointerTy() &&
3751 "expected variables to be pointers");
3764 unsigned NumReductions = ReductionInfos.
size();
3771 for (
auto En :
enumerate(ReductionInfos)) {
3772 unsigned Index = En.index();
3775 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3790 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3795 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3798 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3800 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3801 : RuntimeFunction::OMPRTL___kmpc_reduce);
3804 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3805 ReductionFunc, Lock},
3824 for (
auto En :
enumerate(ReductionInfos)) {
3830 if (!IsByRef[En.index()]) {
3832 "red.value." +
Twine(En.index()));
3834 Value *PrivateRedValue =
3836 "red.private.value." +
Twine(En.index()));
3847 if (!IsByRef[En.index()])
3851 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3852 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3860 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3884 for (
auto En :
enumerate(ReductionInfos)) {
3887 RedArrayTy, LHSArrayPtr, 0, En.index());
3892 RedArrayTy, RHSArrayPtr, 0, En.index());
3906 if (!IsByRef[En.index()])
3922 Directive OMPD = Directive::OMPD_master;
3927 Value *Args[] = {Ident, ThreadId};
3935 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3946 Directive OMPD = Directive::OMPD_masked;
3952 Value *ArgsEnd[] = {Ident, ThreadId};
3960 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3995 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4008 "omp_" +
Name +
".next",
true);
4019 CL->Header = Header;
4038 NextBB, NextBB,
Name);
4063 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4073 auto *IndVarTy = cast<IntegerType>(Start->getType());
4074 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4075 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4081 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
4109 Value *CountIfLooping;
4110 if (InclusiveStop) {
4120 "omp_" +
Name +
".tripcount");
4141 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4144 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4150 InsertPointTy AllocaIP,
4151 bool NeedsBarrier) {
4152 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4154 "Require dedicated allocate IP");
4166 Type *IVTy =
IV->getType();
4186 Constant *One = ConstantInt::get(IVTy, 1);
4194 Constant *SchedulingType = ConstantInt::get(
4195 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStatic));
4200 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4201 PUpperBound, PStride, One,
Zero});
4206 CLI->setTripCount(TripCount);
4228 omp::Directive::OMPD_for,
false,
4231 return BarrierIP.takeError();
4241OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4243 InsertPointTy AllocaIP,
4246 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4247 assert(ChunkSize &&
"Chunk size is required");
4252 Type *IVTy =
IV->getType();
4254 "Max supported tripcount bitwidth is 64 bits");
4256 :
Type::getInt64Ty(Ctx);
4259 Constant *One = ConstantInt::get(InternalIVTy, 1);
4271 Value *PLowerBound =
4273 Value *PUpperBound =
4282 Value *CastedChunkSize =
4284 Value *CastedTripCount =
4287 Constant *SchedulingType = ConstantInt::get(
4288 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4302 SchedulingType, PLastIter,
4303 PLowerBound, PUpperBound,
4308 Value *FirstChunkStart =
4310 Value *FirstChunkStop =
4315 Value *NextChunkStride =
4320 Value *DispatchCounter;
4328 DispatchCounter = Counter;
4331 FirstChunkStart, CastedTripCount, NextChunkStride,
4355 Value *IsLastChunk =
4357 Value *CountUntilOrigTripCount =
4360 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4361 Value *BackcastedChunkTC =
4363 CLI->setTripCount(BackcastedChunkTC);
4368 Value *BackcastedDispatchCounter =
4385 return AfterIP.takeError();
4406 case WorksharingLoopType::ForStaticLoop:
4409 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4412 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4414 case WorksharingLoopType::DistributeStaticLoop:
4417 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4420 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4422 case WorksharingLoopType::DistributeForStaticLoop:
4425 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4428 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4431 if (Bitwidth != 32 && Bitwidth != 64) {
4453 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4454 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4459 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4460 Builder.
restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4465 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4466 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4467 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4504 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
4512 "Expected unique undroppable user of outlined function");
4513 CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
4514 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
4516 "Expected outlined function call to be located in loop preheader");
4518 if (OutlinedFnCallInstruction->
arg_size() > 1)
4525 LoopBodyArg, ParallelTaskPtr, TripCount,
4528 for (
auto &ToBeDeletedItem : ToBeDeleted)
4529 ToBeDeletedItem->eraseFromParent();
4535 InsertPointTy AllocaIP,
4548 OI.OuterAllocaBB = AllocaIP.getBlock();
4553 "omp.prelatch",
true);
4573 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
4575 ParallelRegionBlockSet.
end());
4595 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
4604 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
4605 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
4611 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
4618 OI.PostOutlineCB = [=, ToBeDeletedVec =
4619 std::move(ToBeDeleted)](
Function &OutlinedFn) {
4621 ToBeDeletedVec, LoopType);
4629 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
4630 bool HasSimdModifier,
bool HasMonotonicModifier,
4631 bool HasNonmonotonicModifier,
bool HasOrderedClause,
4634 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
4636 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
4637 HasNonmonotonicModifier, HasOrderedClause);
4639 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
4640 OMPScheduleType::ModifierOrdered;
4641 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
4642 case OMPScheduleType::BaseStatic:
4643 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
4645 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4646 NeedsBarrier, ChunkSize);
4648 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier);
4650 case OMPScheduleType::BaseStaticChunked:
4652 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4653 NeedsBarrier, ChunkSize);
4655 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
4658 case OMPScheduleType::BaseRuntime:
4659 case OMPScheduleType::BaseAuto:
4660 case OMPScheduleType::BaseGreedy:
4661 case OMPScheduleType::BaseBalanced:
4662 case OMPScheduleType::BaseSteal:
4663 case OMPScheduleType::BaseGuidedSimd:
4664 case OMPScheduleType::BaseRuntimeSimd:
4666 "schedule type does not support user-defined chunk sizes");
4668 case OMPScheduleType::BaseDynamicChunked:
4669 case OMPScheduleType::BaseGuidedChunked:
4670 case OMPScheduleType::BaseGuidedIterativeChunked:
4671 case OMPScheduleType::BaseGuidedAnalyticalChunked:
4672 case OMPScheduleType::BaseStaticBalancedChunked:
4673 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
4674 NeedsBarrier, ChunkSize);
4690 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
4693 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
4706 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
4709 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
4721 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
4724 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
4730 InsertPointTy AllocaIP,
4732 bool NeedsBarrier,
Value *Chunk) {
4733 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4735 "Require dedicated allocate IP");
4737 "Require valid schedule type");
4739 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
4740 OMPScheduleType::ModifierOrdered;
4751 Type *IVTy =
IV->getType();
4769 Constant *One = ConstantInt::get(IVTy, 1);
4790 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4794 {SrcLoc, ThreadNum, SchedulingType, One,
4795 UpperBound, One, Chunk});
4805 PLowerBound, PUpperBound, PStride});
4806 Constant *Zero32 = ConstantInt::get(I32Type, 0);
4815 auto *PI = cast<PHINode>(Phi);
4816 PI->setIncomingBlock(0, OuterCond);
4817 PI->setIncomingValue(0, LowerBound);
4821 auto *Br = cast<BranchInst>(Term);
4822 Br->setSuccessor(0, OuterCond);
4830 auto *CI = cast<CmpInst>(Comp);
4831 CI->setOperand(1, UpperBound);
4834 auto *BI = cast<BranchInst>(Branch);
4835 assert(BI->getSuccessor(1) == Exit);
4836 BI->setSuccessor(1, OuterCond);
4850 omp::Directive::OMPD_for,
false,
4853 return BarrierIP.takeError();
4872 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
4873 for (
Use &U : BB->uses()) {
4874 auto *UseInst = dyn_cast<Instruction>(U.getUser());
4877 if (BBsToErase.count(UseInst->getParent()))
4884 while (BBsToErase.remove_if(HasRemainingUses)) {
4895 assert(
Loops.size() >= 1 &&
"At least one loop required");
4896 size_t NumLoops =
Loops.size();
4900 return Loops.front();
4912 Loop->collectControlBlocks(OldControlBBs);
4916 if (ComputeIP.
isSet())
4923 Value *CollapsedTripCount =
nullptr;
4926 "All loops to collapse must be valid canonical loops");
4927 Value *OrigTripCount = L->getTripCount();
4928 if (!CollapsedTripCount) {
4929 CollapsedTripCount = OrigTripCount;
4941 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
4949 Value *Leftover = Result->getIndVar();
4951 NewIndVars.
resize(NumLoops);
4952 for (
int i = NumLoops - 1; i >= 1; --i) {
4953 Value *OrigTripCount =
Loops[i]->getTripCount();
4956 NewIndVars[i] = NewIndVar;
4961 NewIndVars[0] = Leftover;
4970 BasicBlock *ContinueBlock = Result->getBody();
4972 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
4979 ContinueBlock =
nullptr;
4980 ContinuePred = NextSrc;
4987 for (
size_t i = 0; i < NumLoops - 1; ++i)
4988 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
4994 for (
size_t i = NumLoops - 1; i > 0; --i)
4995 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
4998 ContinueWith(Result->getLatch(),
nullptr);
5005 for (
size_t i = 0; i < NumLoops; ++i)
5006 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5020std::vector<CanonicalLoopInfo *>
5024 "Must pass as many tile sizes as there are loops");
5025 int NumLoops =
Loops.size();
5026 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5038 Loop->collectControlBlocks(OldControlBBs);
5046 assert(L->isValid() &&
"All input loops must be valid canonical loops");
5047 OrigTripCounts.
push_back(L->getTripCount());
5058 for (
int i = 0; i < NumLoops - 1; ++i) {
5071 for (
int i = 0; i < NumLoops; ++i) {
5073 Value *OrigTripCount = OrigTripCounts[i];
5086 Value *FloorTripOverflow =
5092 "omp_floor" +
Twine(i) +
".tripcount",
true);
5100 std::vector<CanonicalLoopInfo *> Result;
5101 Result.reserve(NumLoops * 2);
5114 auto EmbeddNewLoop =
5115 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5118 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
5123 Enter = EmbeddedLoop->
getBody();
5125 OutroInsertBefore = EmbeddedLoop->
getLatch();
5126 return EmbeddedLoop;
5130 const Twine &NameBase) {
5133 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5134 Result.push_back(EmbeddedLoop);
5138 EmbeddNewLoops(FloorCount,
"floor");
5144 for (
int i = 0; i < NumLoops; ++i) {
5148 Value *FloorIsEpilogue =
5150 Value *TileTripCount =
5157 EmbeddNewLoops(TileCounts,
"tile");
5162 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5171 BodyEnter =
nullptr;
5172 BodyEntered = ExitBB;
5185 for (
int i = 0; i < NumLoops; ++i) {
5188 Value *OrigIndVar = OrigIndVars[i];
5216 if (Properties.
empty())
5239 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5243 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5251 if (
I.mayReadOrWriteMemory()) {
5255 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5277 const Twine &NamePrefix) {
5324 VMap[
Block] = NewBB;
5334 if (TargetTriple.
isX86()) {
5335 if (Features.
lookup(
"avx512f"))
5337 else if (Features.
lookup(
"avx"))
5341 if (TargetTriple.
isPPC())
5343 if (TargetTriple.
isWasm())
5350 Value *IfCond, OrderKind Order,
5369 if (AlignedVars.
size()) {
5371 for (
auto &AlignedItem : AlignedVars) {
5372 Value *AlignedPtr = AlignedItem.first;
5373 Value *Alignment = AlignedItem.second;
5374 Instruction *loadInst = dyn_cast<Instruction>(AlignedPtr);
5384 createIfVersion(CanonicalLoop, IfCond, VMap,
"simd");
5388 "Cannot find value which corresponds to original loop latch");
5389 assert(isa<BasicBlock>(MappedLatch) &&
5390 "Cannot cast mapped latch block value to BasicBlock");
5391 BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
5420 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5428 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5436 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
5438 if (Simdlen || Safelen) {
5442 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
5468static std::unique_ptr<TargetMachine>
5472 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
5473 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
5474 const std::string &
Triple = M->getTargetTriple();
5484 std::nullopt, OptLevel));
5508 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
5523 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
5528 nullptr, ORE,
static_cast<int>(OptLevel),
5549 <<
" Threshold=" << UP.
Threshold <<
"\n"
5552 <<
" PartialOptSizeThreshold="
5571 if (
auto *Load = dyn_cast<LoadInst>(&
I)) {
5572 Ptr = Load->getPointerOperand();
5573 }
else if (
auto *Store = dyn_cast<StoreInst>(&
I)) {
5574 Ptr = Store->getPointerOperand();
5578 Ptr =
Ptr->stripPointerCasts();
5580 if (
auto *Alloca = dyn_cast<AllocaInst>(
Ptr)) {
5581 if (Alloca->getParent() == &
F->getEntryBlock())
5601 int MaxTripCount = 0;
5602 bool MaxOrZero =
false;
5603 unsigned TripMultiple = 0;
5605 bool UseUpperBound =
false;
5607 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
5609 unsigned Factor = UP.
Count;
5610 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
5621 assert(Factor >= 0 &&
"Unroll factor must not be negative");
5637 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
5650 *UnrolledCLI =
Loop;
5655 "unrolling only makes sense with a factor of 2 or larger");
5657 Type *IndVarTy =
Loop->getIndVarType();
5664 std::vector<CanonicalLoopInfo *>
LoopNest =
5679 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
5682 (*UnrolledCLI)->assertOK();
5700 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
5719 if (!CPVars.
empty()) {
5724 Directive OMPD = Directive::OMPD_single;
5729 Value *Args[] = {Ident, ThreadId};
5738 if (
Error Err = FiniCB(IP))
5759 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
5766 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
5769 ConstantInt::get(
Int64, 0), CPVars[
I],
5772 }
else if (!IsNowait) {
5775 omp::Directive::OMPD_unknown,
false,
5790 Directive OMPD = Directive::OMPD_critical;
5795 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
5796 Value *Args[] = {Ident, ThreadId, LockVar};
5813 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5821 const Twine &
Name,
bool IsDependSource) {
5824 [](
Value *SV) {
return SV->
getType()->isIntegerTy(64); }) &&
5825 "OpenMP runtime requires depend vec with i64 type");
5838 for (
unsigned I = 0;
I < NumLoops; ++
I) {
5852 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
5870 Directive OMPD = Directive::OMPD_ordered;
5879 Value *Args[] = {Ident, ThreadId};
5889 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5895 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
5896 bool HasFinalize,
bool IsCancellable) {
5905 if (!isa_and_nonnull<BranchInst>(SplitPos))
5912 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
5923 "Unexpected control flow graph state!!");
5925 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
5927 return AfterIP.takeError();
5929 "Unexpected Control Flow State!");
5935 "Unexpected Insertion point location!");
5938 auto InsertBB = merged ? ExitPredBB : ExitBB;
5939 if (!isa_and_nonnull<BranchInst>(SplitPos))
5949 if (!Conditional || !EntryCall)
5969 UI->eraseFromParent();
5977 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
5985 "Unexpected finalization stack state!");
5988 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
5990 if (
Error Err = Fi.FiniCB(FinIP))
6039 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
6041 "copyin.not.master.end");
6096 Value *DependenceAddress,
bool HaveNowaitClause) {
6104 if (Device ==
nullptr)
6106 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6107 if (NumDependences ==
nullptr) {
6108 NumDependences = ConstantInt::get(
Int32, 0);
6112 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6114 Ident, ThreadId, InteropVar, InteropTypeVal,
6115 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6124 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6132 if (Device ==
nullptr)
6134 if (NumDependences ==
nullptr) {
6135 NumDependences = ConstantInt::get(
Int32, 0);
6139 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6141 Ident, ThreadId, InteropVar, Device,
6142 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6151 Value *NumDependences,
6152 Value *DependenceAddress,
6153 bool HaveNowaitClause) {
6160 if (Device ==
nullptr)
6162 if (NumDependences ==
nullptr) {
6163 NumDependences = ConstantInt::get(
Int32, 0);
6167 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6169 Ident, ThreadId, InteropVar, Device,
6170 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6200 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
6201 "expected num_threads and num_teams to be specified");
6220 const std::string DebugPrefix =
"_debug__";
6221 if (KernelName.
ends_with(DebugPrefix)) {
6222 KernelName = KernelName.
drop_back(DebugPrefix.length());
6229 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
6234 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
6235 if (MaxThreadsVal < 0)
6236 MaxThreadsVal = std::max(
6239 if (MaxThreadsVal > 0)
6250 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6253 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6254 Constant *DynamicEnvironmentInitializer =
6258 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6260 DL.getDefaultGlobalsAddressSpace());
6264 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6265 ? DynamicEnvironmentGV
6267 DynamicEnvironmentPtr);
6270 ConfigurationEnvironment, {
6271 UseGenericStateMachineVal,
6272 MayUseNestedParallelismVal,
6279 ReductionBufferLength,
6282 KernelEnvironment, {
6283 ConfigurationEnvironmentInitializer,
6287 std::string KernelEnvironmentName =
6288 (KernelName +
"_kernel_environment").str();
6291 KernelEnvironmentInitializer, KernelEnvironmentName,
6293 DL.getDefaultGlobalsAddressSpace());
6297 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6298 ? KernelEnvironmentGV
6300 KernelEnvironmentPtr);
6301 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6329 UI->eraseFromParent();
6337 int32_t TeamsReductionDataSize,
6338 int32_t TeamsReductionBufferLength) {
6343 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6347 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6353 const std::string DebugPrefix =
"_debug__";
6355 KernelName = KernelName.
drop_back(DebugPrefix.length());
6356 auto *KernelEnvironmentGV =
6358 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6359 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
6361 KernelEnvironmentInitializer,
6362 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6364 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6366 KernelEnvironmentGV->setInitializer(NewInitializer);
6371 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6375 auto *KernelOp = dyn_cast<ConstantAsMetadata>(
Op->getOperand(0));
6376 if (!KernelOp || KernelOp->getValue() != &
Kernel)
6378 auto *Prop = dyn_cast<MDString>(
Op->getOperand(1));
6379 if (!Prop || Prop->getString() !=
Name)
6391 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->
getOperand(2));
6392 int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6395 OldVal->getValue()->getType(),
6396 Min ? std::min(OldLimit,
Value) : std::max(OldLimit,
Value))));
6405 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"nvvm.annotations");
6410std::pair<int32_t, int32_t>
6412 int32_t ThreadLimit =
6417 if (!Attr.isValid() || !Attr.isStringAttribute())
6418 return {0, ThreadLimit};
6421 if (!llvm::to_integer(UBStr, UB, 10))
6422 return {0, ThreadLimit};
6423 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6424 if (!llvm::to_integer(LBStr, LB, 10))
6430 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
6431 int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
6432 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6434 return {0, ThreadLimit};
6444 llvm::utostr(LB) +
"," + llvm::utostr(UB));
6451std::pair<int32_t, int32_t>
6458 int32_t LB, int32_t UB) {
6463 Kernel.
addFnAttr(
"amdgpu-max-num-workgroups", llvm::utostr(LB) +
",1,1");
6468void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
6477 else if (
T.isNVPTX())
6479 else if (
T.isSPIRV())
6487 assert(OutlinedFn &&
"The outlined function must exist if embedded");
6496Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
6502 "Named kernel already exists?");
6520 OutlinedFn = *CBResult;
6522 OutlinedFn =
nullptr;
6528 if (!IsOffloadEntry)
6531 std::string EntryFnIDName =
6533 ? std::string(EntryFnName)
6537 EntryFnName, EntryFnIDName);
6545 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
6546 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
6547 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
6549 EntryInfo, EntryAddr, OutlinedFnID,
6551 return OutlinedFnID;
6580 bool IsStandAlone = !BodyGenCB;
6606 SrcLocInfo, DeviceID,
6613 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
6617 if (
Info.HasNoWait) {
6627 if (
Info.HasNoWait) {
6631 emitBlock(OffloadContBlock, CurFn,
true);
6637 bool RequiresOuterTargetTask =
Info.HasNoWait;
6638 if (!RequiresOuterTargetTask)
6639 cantFail(TaskBodyCB(
nullptr,
nullptr,
6643 {},
Info.HasNoWait));
6646 omp::OMPRTL___tgt_target_data_begin_mapper);
6650 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
6651 if (isa<AllocaInst>(DeviceMap.second.second)) {
6699 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
6721 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
6737 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
6741 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
6753 bool IsGPUDistribute) {
6754 assert((IVSize == 32 || IVSize == 64) &&
6755 "IV size is not compatible with the omp runtime");
6757 if (IsGPUDistribute)
6759 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
6760 : omp::OMPRTL___kmpc_distribute_static_init_4u)
6761 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
6762 : omp::OMPRTL___kmpc_distribute_static_init_8u);
6764 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
6765 : omp::OMPRTL___kmpc_for_static_init_4u)
6766 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
6767 : omp::OMPRTL___kmpc_for_static_init_8u);
6774 assert((IVSize == 32 || IVSize == 64) &&
6775 "IV size is not compatible with the omp runtime");
6777 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
6778 : omp::OMPRTL___kmpc_dispatch_init_4u)
6779 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
6780 : omp::OMPRTL___kmpc_dispatch_init_8u);
6787 assert((IVSize == 32 || IVSize == 64) &&
6788 "IV size is not compatible with the omp runtime");
6790 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
6791 : omp::OMPRTL___kmpc_dispatch_next_4u)
6792 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
6793 : omp::OMPRTL___kmpc_dispatch_next_8u);
6800 assert((IVSize == 32 || IVSize == 64) &&
6801 "IV size is not compatible with the omp runtime");
6803 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
6804 : omp::OMPRTL___kmpc_dispatch_fini_4u)
6805 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
6806 : omp::OMPRTL___kmpc_dispatch_fini_8u);
6817 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
6826 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
6827 auto NewSP = Func->getSubprogram();
6831 if (NewVar && (arg == NewVar->
getArg()))
6843 auto UpdateDebugRecord = [&](
auto *DR) {
6846 for (
auto Loc : DR->location_ops()) {
6847 auto Iter = ValueReplacementMap.find(Loc);
6848 if (Iter != ValueReplacementMap.end()) {
6849 DR->replaceVariableLocationOp(Loc, std::get<0>(Iter->second));
6850 ArgNo = std::get<1>(Iter->second) + 1;
6853 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
6859 if (
auto *DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&
I))
6860 UpdateDebugRecord(DDI);
6863 UpdateDebugRecord(&DVR);
6868 Module *M = Func->getParent();
6871 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
6873 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
6874 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
6876 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(), Loc,
6896 for (
auto &Arg : Inputs)
6897 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
6901 for (
auto &Arg : Inputs)
6902 ParameterTypes.
push_back(Arg->getType());
6906 auto M = BB->getModule();
6917 if (TargetCpuAttr.isStringAttribute())
6918 Func->addFnAttr(TargetCpuAttr);
6920 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
6921 if (TargetFeaturesAttr.isStringAttribute())
6922 Func->addFnAttr(TargetFeaturesAttr);
6927 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
6943 DB.createSubroutineType(DB.getOrCreateTypeArray({}));
6945 DISubprogram::SPFlagOptimized |
6946 DISubprogram::SPFlagLocalToUnit;
6949 CU, FuncName, FuncName, SP->getFile(),
DL.getLine(), Ty,
6950 DL.getLine(), DINode::DIFlags::FlagArtificial, SPFlags);
6953 Func->setSubprogram(OutlinedSP);
6958 OutlinedSP,
DL.getInlinedAt()));
6981 splitBB(Builder,
true,
"outlined.body");
6996 auto AllocaIP = Builder.
saveIP();
7001 const auto &ArgRange =
7003 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7026 if (
auto *Const = dyn_cast<Constant>(Input))
7031 if (
auto *Instr = dyn_cast<Instruction>(
User))
7032 if (Instr->getFunction() == Func)
7033 Instr->replaceUsesOfWith(Input, InputCopy);
7039 for (
auto InArg :
zip(Inputs, ArgRange)) {
7040 Value *Input = std::get<0>(InArg);
7041 Argument &Arg = std::get<1>(InArg);
7042 Value *InputCopy =
nullptr;
7045 ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.
saveIP());
7049 ValueReplacementMap[Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7067 if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
7068 llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
7069 llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
7070 DeferredReplacement.
push_back(std::make_pair(Input, InputCopy));
7074 ReplaceValue(Input, InputCopy, Func);
7078 for (
auto Deferred : DeferredReplacement)
7079 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7082 ValueReplacementMap);
7121 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7122 Type *TaskTy = OMPBuilder.Task;
7127 ".omp_target_task_proxy_func",
7129 ProxyFn->getArg(0)->setName(
"thread.id");
7130 ProxyFn->getArg(1)->setName(
"task");
7136 bool HasShareds = StaleCI->
arg_size() > 1;
7142 "StaleCI with shareds should have exactly two arguments.");
7144 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
7145 assert(ArgStructAlloca &&
7146 "Unable to find the alloca instruction corresponding to arguments "
7147 "for extracted function");
7148 auto *ArgStructType = cast<StructType>(ArgStructAlloca->getAllocatedType());
7151 Builder.
CreateAlloca(ArgStructType,
nullptr,
"structArg");
7152 Value *TaskT = ProxyFn->getArg(1);
7153 Value *ThreadId = ProxyFn->getArg(0);
7154 Value *SharedsSize =
7155 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7162 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7165 Builder.
CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
7183 EntryFnName, Inputs, CBFunc,
7188 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7296 TargetTaskAllocaBB->
begin());
7300 OI.
EntryBB = TargetTaskAllocaBB;
7306 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7310 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7314 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, HasNoWait,
7315 DeviceID](
Function &OutlinedFn)
mutable {
7317 "there must be a single user for the outlined function");
7320 bool HasShareds = StaleCI->
arg_size() > 1;
7324 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
7340 bool NeedsTargetTask = HasNoWait && DeviceID;
7345 OMPRTL___kmpc_omp_target_task_alloc);
7363 auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->
getArgOperand(1));
7364 assert(ArgStructAlloca &&
7365 "Unable to find the alloca instruction corresponding to arguments "
7366 "for extracted function");
7367 auto *ArgStructType =
7368 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
7369 assert(ArgStructType &&
"Unable to find struct type corresponding to "
7370 "arguments for extracted function");
7391 TaskSize, SharedsSize,
7394 if (NeedsTargetTask) {
7395 assert(DeviceID &&
"Expected non-empty device ID.");
7419 if (!NeedsTargetTask) {
7441 }
else if (DepArray) {
7460 I->eraseFromParent();
7478 DeviceAddrCB, CustomMapperCB);
7491 bool HasNoWait =
false) {
7502 bool HasDependencies = Dependencies.size() > 0;
7503 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
7520 if (OutlinedFnID && DeviceID)
7522 EmitTargetCallFallbackCB, KArgs,
7523 DeviceID, RTLoc, TargetTaskAllocaIP);
7531 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
7538 auto &&EmitTargetCallElse =
7544 if (RequiresOuterTargetTask) {
7550 Dependencies, HasNoWait);
7552 return EmitTargetCallFallbackCB(Builder.
saveIP());
7559 auto &&EmitTargetCallThen =
7576 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
7581 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
7598 Value *MaxThreadsClause =
7600 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
7603 for (
auto [TeamsVal, TargetVal] :
zip_equal(
7605 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
7606 Value *NumThreads = InitMaxThreadsClause(TargetVal);
7608 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
7609 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
7614 unsigned NumTargetItems =
Info.NumberOfPtrs;
7632 NumTeamsC, NumThreadsC,
7633 DynCGGroupMem, HasNoWait);
7640 if (RequiresOuterTargetTask)
7641 return OMPBuilder.
emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
7642 Dependencies, HasNoWait);
7645 EmitTargetCallFallbackCB, KArgs,
7646 DeviceID, RTLoc, AllocaIP);
7656 if (!OutlinedFnID) {
7668 EmitTargetCallElse, AllocaIP));
7692 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
7693 OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB))
7701 OutlinedFn, OutlinedFnID, Args, GenMapInfoCB, Dependencies,
7716 return OS.str().str();
7730 assert(Elem.second->getValueType() == Ty &&
7731 "OMP internal variable has different type than requested");
7747 GV->setAlignment(std::max(TypeAlign, PtrAlign));
7754Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
7755 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
7756 std::string
Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
7767 return SizePtrToInt;
7772 std::string VarName) {
7780 return MaptypesArrayGlobal;
7785 unsigned NumOperands,
7794 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
7798 ArrI64Ty,
nullptr,
".offload_sizes");
7809 int64_t DeviceID,
unsigned NumOperands) {
7815 Value *ArgsBaseGEP =
7817 {Builder.getInt32(0), Builder.getInt32(0)});
7820 {Builder.getInt32(0), Builder.getInt32(0)});
7821 Value *ArgSizesGEP =
7823 {Builder.getInt32(0), Builder.getInt32(0)});
7829 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
7836 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
7837 "expected region end call to runtime only when end call is separate");
7839 auto VoidPtrTy = UnqualPtrTy;
7840 auto VoidPtrPtrTy = UnqualPtrTy;
7842 auto Int64PtrTy = UnqualPtrTy;
7844 if (!
Info.NumberOfPtrs) {
7856 Info.RTArgs.BasePointersArray,
7867 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
7868 :
Info.RTArgs.MapTypesArray,
7874 if (!
Info.EmitDebug)
7883 if (!
Info.HasMapper)
7908 "struct.descriptor_dim");
7910 enum { OffsetFD = 0, CountFD, StrideFD };
7914 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
7917 if (NonContigInfo.
Dims[
I] == 1)
7924 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
7925 unsigned RevIdx = EE -
II - 1;
7928 {Builder.getInt64(0), Builder.getInt64(II)});
7932 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
7937 NonContigInfo.
Counts[L][RevIdx], CountLVal,
7942 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
7951 Info.RTArgs.PointersArray, 0,
I);
7958void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
7962 StringRef Prefix = IsInit ?
".init" :
".del";
7972 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7973 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
7983 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
7984 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8009 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8010 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8011 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8015 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8016 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8020 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8021 ArraySize, MapTypeArg, MapName};
8048 MapperFn->
addFnAttr(Attribute::NoInline);
8049 MapperFn->
addFnAttr(Attribute::NoUnwind);
8080 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8081 MapType, MapName, ElementSize, HeadBB,
8107 Value *OffloadingArgs[] = {MapperHandle};
8111 Value *ShiftedPreviousSize =
8115 for (
unsigned I = 0;
I <
Info.BasePointers.size(); ++
I) {
8118 Value *CurBeginArg =
8127 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8129 Value *MemberMapType =
8147 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8148 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8149 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8165 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8166 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8167 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8173 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8174 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8181 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8182 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8188 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8189 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8196 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8197 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8208 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8209 CurSizeArg, CurMapType, CurNameArg};
8211 if (CustomMapperCB && CustomMapperCB(
I, &ChildMapperFn)) {
8226 "omp.arraymap.next");
8235 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8236 MapType, MapName, ElementSize, DoneBB,
8254 Info.clearArrayInfo();
8257 if (
Info.NumberOfPtrs == 0)
8267 PointerArrayType,
nullptr,
".offload_baseptrs");
8270 PointerArrayType,
nullptr,
".offload_ptrs");
8272 PointerArrayType,
nullptr,
".offload_mappers");
8273 Info.RTArgs.MappersArray = MappersArray;
8280 ConstantInt::get(Int64Ty, 0));
8282 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
8283 if (
auto *CI = dyn_cast<Constant>(CombinedInfo.
Sizes[
I])) {
8284 if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
8285 if (IsNonContiguous &&
8286 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8288 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
8296 RuntimeSizes.
set(
I);
8299 if (RuntimeSizes.
all()) {
8302 SizeArrayType,
nullptr,
".offload_sizes");
8308 auto *SizesArrayGbl =
8313 if (!RuntimeSizes.
any()) {
8314 Info.RTArgs.SizesArray = SizesArrayGbl;
8320 SizeArrayType,
nullptr,
".offload_sizes");
8325 SizesArrayGbl, OffloadSizeAlign,
8330 Info.RTArgs.SizesArray = Buffer;
8338 for (
auto mapFlag : CombinedInfo.
Types)
8340 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8344 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
8349 auto *MapNamesArrayGbl =
8351 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
8352 Info.EmitDebug =
true;
8354 Info.RTArgs.MapNamesArray =
8356 Info.EmitDebug =
false;
8361 if (
Info.separateBeginEndCalls()) {
8362 bool EndMapTypesDiffer =
false;
8364 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8365 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
8366 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8367 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8368 EndMapTypesDiffer =
true;
8371 if (EndMapTypesDiffer) {
8373 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
8378 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
8386 if (
Info.requiresDevicePointerInfo()) {
8393 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
8395 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
8397 DeviceAddrCB(
I, BP);
8409 if (RuntimeSizes.
test(
I)) {
8423 if (
Value *CustomMFunc = CustomMapperCB(
I))
8427 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
8433 Info.NumberOfPtrs == 0)
8478 if (
auto *CI = dyn_cast<ConstantInt>(
Cond)) {
8479 auto CondConstant = CI->getSExtValue();
8511bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
8515 "Unexpected Atomic Ordering.");
8579 assert(
X.Var->getType()->isPointerTy() &&
8580 "OMP Atomic expects a pointer to target memory");
8581 Type *XElemTy =
X.ElemTy;
8584 "OMP atomic read expected a scalar type");
8586 Value *XRead =
nullptr;
8592 XRead = cast<Value>(XLD);
8605 XRead = AtomicLoadRes.first;
8620 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
8621 if (XRead->
getType() != V.Var->getType())
8634 assert(
X.Var->getType()->isPointerTy() &&
8635 "OMP Atomic expects a pointer to target memory");
8636 Type *XElemTy =
X.ElemTy;
8639 "OMP atomic write expected a scalar type");
8654 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
8667 Type *XTy =
X.Var->getType();
8669 "OMP Atomic expects a pointer to target memory");
8670 Type *XElemTy =
X.ElemTy;
8673 "OMP atomic update expected a scalar type");
8676 "OpenMP atomic does not support LT or GT operations");
8680 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
8681 X.IsVolatile, IsXBinopExpr);
8683 return AtomicResult.takeError();
8684 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
8689Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
8726 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr) {
8729 bool emitRMWOp =
false;
8737 emitRMWOp = XElemTy;
8740 emitRMWOp = (IsXBinopExpr && XElemTy);
8747 std::pair<Value *, Value *> Res;
8754 Res.second = Res.first;
8756 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
8769 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
8776 X->getName() +
".atomic.cont");
8780 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
8783 PHI->addIncoming(AtomicLoadRes.first, CurBB);
8788 Value *Upd = *CBResult;
8792 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
8793 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
8798 Res.first = OldExprVal;
8825 X->getName() +
".atomic.cont");
8829 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
8832 PHI->addIncoming(OldVal, CurBB);
8838 X->getName() +
".atomic.fltCast");
8841 X->getName() +
".atomic.ptrCast");
8848 Value *Upd = *CBResult;
8855 Result->setVolatile(VolatileX);
8861 Res.first = OldExprVal;
8881 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr) {
8886 Type *XTy =
X.Var->getType();
8888 "OMP Atomic expects a pointer to target memory");
8889 Type *XElemTy =
X.ElemTy;
8892 "OMP atomic capture expected a scalar type");
8894 "OpenMP atomic does not support LT or GT operations");
8901 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
8902 X.IsVolatile, IsXBinopExpr);
8905 Value *CapturedVal =
8906 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
8907 if (CapturedVal->
getType() != V.Var->getType())
8911 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
8923 IsPostfixUpdate, IsFailOnly, Failure);
8935 assert(
X.Var->getType()->isPointerTy() &&
8936 "OMP atomic expects a pointer to target memory");
8939 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
8940 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
8945 if (
Op == OMPAtomicCompareOp::EQ) {
8964 "OldValue and V must be of same type");
8965 if (IsPostfixUpdate) {
8983 CurBBTI,
X.Var->getName() +
".atomic.exit");
9003 Value *CapturedValue =
9011 assert(R.Var->getType()->isPointerTy() &&
9012 "r.var must be of pointer type");
9013 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9016 Value *ResultCast = R.IsSigned
9022 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9023 "Op should be either max or min at this point");
9024 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9064 Value *CapturedValue =
nullptr;
9065 if (IsPostfixUpdate) {
9066 CapturedValue = OldValue;
9098 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
9145 bool SubClausesPresent =
9146 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9149 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9150 "if lowerbound is non-null, then upperbound must also be non-null "
9151 "for bounds on num_teams");
9153 if (NumTeamsUpper ==
nullptr)
9156 if (NumTeamsLower ==
nullptr)
9157 NumTeamsLower = NumTeamsUpper;
9161 "argument to if clause must be an integer value");
9166 ConstantInt::get(IfExpr->
getType(), 0));
9175 if (ThreadLimit ==
nullptr)
9181 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9186 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9198 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9200 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9202 auto HostPostOutlineCB = [
this, Ident,
9203 ToBeDeleted](
Function &OutlinedFn)
mutable {
9208 "there must be a single user for the outlined function");
9213 "Outlined function must have two or three arguments only");
9215 bool HasShared = OutlinedFn.
arg_size() == 3;
9223 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9224 "outlined function.");
9231 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
9235 I->eraseFromParent();
9250 std::string VarName) {
9259 return MapNamesArrayGlobal;
9264void OpenMPIRBuilder::initializeTypes(
Module &M) {
9267#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
9268#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
9269 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
9270 VarName##PtrTy = PointerType::getUnqual(Ctx);
9271#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
9272 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
9273 VarName##Ptr = PointerType::getUnqual(Ctx);
9274#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
9275 T = StructType::getTypeByName(Ctx, StructName); \
9277 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
9279 VarName##Ptr = PointerType::getUnqual(Ctx);
9280#include "llvm/Frontend/OpenMP/OMPKinds.def"
9291 while (!Worklist.
empty()) {
9295 if (BlockSet.
insert(SuccBB).second)
9319 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
9338 auto &&GetMDInt = [
this](
unsigned V) {
9346 auto &&TargetRegionMetadataEmitter =
9347 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
9362 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
9363 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
9364 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
9365 GetMDInt(E.getOrder())};
9368 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
9377 auto &&DeviceGlobalVarMetadataEmitter =
9378 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
9388 Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
9389 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
9393 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
9400 DeviceGlobalVarMetadataEmitter);
9402 for (
const auto &E : OrderedEntries) {
9403 assert(E.first &&
"All ordered entries must exist!");
9404 if (
const auto *CE =
9405 dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
9407 if (!CE->getID() || !CE->getAddress()) {
9419 }
else if (
const auto *CE =
dyn_cast<
9430 if (!CE->getAddress()) {
9435 if (CE->getVarSize() == 0)
9441 "Declaret target link address is set.");
9444 if (!CE->getAddress()) {
9456 if (
auto *
GV = dyn_cast<GlobalValue>(CE->getAddress()))
9457 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
9465 Flags, CE->getLinkage(), CE->getVarName());
9468 Flags, CE->getLinkage());
9490 unsigned FileID,
unsigned Line,
unsigned Count) {
9493 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
9500 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
9503 EntryInfo.
Line, NewCount);
9510 auto FileIDInfo = CallBack();
9513 "getTargetEntryUniqueInfo, error message: " +
9519 std::get<1>(FileIDInfo));
9525 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
9527 !(Remain & 1); Remain = Remain >> 1)
9545 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
9547 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
9554 Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
9555 Flags |= MemberOfFlag;
9561 bool IsDeclaration,
bool IsExternallyVisible,
9563 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
9564 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
9565 std::function<
Constant *()> GlobalInitializer,
9581 if (!IsExternallyVisible)
9583 OS <<
"_decl_tgt_ref_ptr";
9592 auto *
GV = cast<GlobalVariable>(
Ptr);
9596 if (GlobalInitializer)
9597 GV->setInitializer(GlobalInitializer());
9603 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
9604 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
9605 GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(
Ptr));
9608 return cast<Constant>(
Ptr);
9617 bool IsDeclaration,
bool IsExternallyVisible,
9619 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
9620 std::vector<Triple> TargetTriple,
9621 std::function<
Constant *()> GlobalInitializer,
9638 VarName = MangledName;
9646 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
9662 auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
9663 GvAddrRef->setConstant(
true);
9665 GvAddrRef->setInitializer(
Addr);
9666 GeneratedRefs.push_back(GvAddrRef);
9676 VarName = (
Addr) ?
Addr->getName() :
"";
9680 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
9681 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
9682 LlvmPtrTy, GlobalInitializer, VariableLinkage);
9683 VarName = (
Addr) ?
Addr->getName() :
"";
9704 auto &&GetMDInt = [MN](
unsigned Idx) {
9705 auto *V = cast<ConstantAsMetadata>(MN->getOperand(
Idx));
9706 return cast<ConstantInt>(V->getValue())->getZExtValue();
9709 auto &&GetMDString = [MN](
unsigned Idx) {
9710 auto *V = cast<MDString>(MN->getOperand(
Idx));
9711 return V->getString();
9714 switch (GetMDInt(0)) {
9742 if (HostFilePath.
empty())
9746 if (std::error_code Err = Buf.getError()) {
9748 "OpenMPIRBuilder: " +
9756 if (std::error_code Err =
M.getError()) {
9758 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
9770 return OffloadEntriesTargetRegion.empty() &&
9771 OffloadEntriesDeviceGlobalVar.empty();
9774unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
9776 auto It = OffloadEntriesTargetRegionCount.find(
9777 getTargetRegionEntryCountKey(EntryInfo));
9778 if (It == OffloadEntriesTargetRegionCount.end())
9783void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
9785 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
9786 EntryInfo.
Count + 1;
9792 OffloadEntriesTargetRegion[EntryInfo] =
9794 OMPTargetRegionEntryTargetRegion);
9795 ++OffloadingEntriesNum;
9801 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
9804 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
9810 if (!hasTargetRegionEntryInfo(EntryInfo)) {
9813 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
9814 Entry.setAddress(
Addr);
9816 Entry.setFlags(
Flags);
9819 hasTargetRegionEntryInfo(EntryInfo,
true))
9821 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
9822 "Target region entry already registered!");
9824 OffloadEntriesTargetRegion[EntryInfo] = Entry;
9825 ++OffloadingEntriesNum;
9827 incrementTargetRegionEntryInfoCount(EntryInfo);
9834 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
9836 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
9837 if (It == OffloadEntriesTargetRegion.end()) {
9841 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
9849 for (
const auto &It : OffloadEntriesTargetRegion) {
9850 Action(It.first, It.second);
9856 OffloadEntriesDeviceGlobalVar.try_emplace(
Name, Order,
Flags);
9857 ++OffloadingEntriesNum;
9865 if (!hasDeviceGlobalVarEntryInfo(VarName))
9867 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
9868 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
9869 if (Entry.getVarSize() == 0) {
9870 Entry.setVarSize(VarSize);
9871 Entry.setLinkage(Linkage);
9875 Entry.setVarSize(VarSize);
9876 Entry.setLinkage(Linkage);
9877 Entry.setAddress(
Addr);
9879 if (hasDeviceGlobalVarEntryInfo(VarName)) {
9880 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
9881 assert(Entry.isValid() && Entry.getFlags() ==
Flags &&
9882 "Entry not initialized!");
9883 if (Entry.getVarSize() == 0) {
9884 Entry.setVarSize(VarSize);
9885 Entry.setLinkage(Linkage);
9890 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
9894 OffloadEntriesDeviceGlobalVar.try_emplace(
9895 VarName, OffloadingEntriesNum,
Addr, VarSize,
Flags, Linkage,
"");
9896 ++OffloadingEntriesNum;
9903 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
9904 Action(E.getKey(), E.getValue());
9911void CanonicalLoopInfo::collectControlBlocks(
9918 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
9930void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
9934 assert(isa<CmpInst>(CmpI) &&
"First inst must compare IV with TripCount");
9942void CanonicalLoopInfo::mapIndVar(
9952 for (
Use &U : OldIV->
uses()) {
9953 auto *
User = dyn_cast<Instruction>(U.getUser());
9956 if (
User->getParent() == getCond())
9958 if (
User->getParent() == getLatch())
9964 Value *NewIV = Updater(OldIV);
9967 for (
Use *U : ReplacableUses)
9988 "Preheader must terminate with unconditional branch");
9990 "Preheader must jump to header");
9993 assert(isa<BranchInst>(Header->getTerminator()) &&
9994 "Header must terminate with unconditional branch");
9995 assert(Header->getSingleSuccessor() ==
Cond &&
9996 "Header must jump to exiting block");
9999 assert(
Cond->getSinglePredecessor() == Header &&
10000 "Exiting block only reachable from header");
10002 assert(isa<BranchInst>(
Cond->getTerminator()) &&
10003 "Exiting block must terminate with conditional branch");
10005 "Exiting block must have two successors");
10006 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(0) == Body &&
10007 "Exiting block's first successor jump to the body");
10008 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(1) == Exit &&
10009 "Exiting block's second successor must exit the loop");
10013 "Body only reachable from exiting block");
10018 "Latch must terminate with unconditional branch");
10026 assert(isa<BranchInst>(Exit->getTerminator()) &&
10027 "Exit block must terminate with unconditional branch");
10029 "Exit block must jump to after block");
10033 "After block only reachable from exit block");
10037 assert(IndVar &&
"Canonical induction variable not found?");
10039 "Induction variable must be an integer");
10041 "Induction variable must be a PHI in the loop header");
10042 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
10044 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->
isZero());
10045 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
10047 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
10049 assert(cast<BinaryOperator>(NextIndVar)->
getOpcode() == BinaryOperator::Add);
10050 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
10051 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
10054 Value *TripCount = getTripCount();
10055 assert(TripCount &&
"Loop trip count not found?");
10057 "Trip count and induction variable must have the same type");
10059 auto *CmpI = cast<CmpInst>(&
Cond->front());
10061 "Exit condition must be a signed less-than comparison");
10063 "Exit condition must compare the induction variable");
10065 "Exit condition must compare with the trip count");
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
BlockVerifier::State From
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI)
Create an entry point for a target task with the following.
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static MDNode * getNVPTXMDNode(Function &Kernel, StringRef Name)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static llvm::Value * emitImplicitCast(IRBuilder<> &Builder, llvm::Value *XRead, llvm::Value *V)
Emit an implicit cast to convert XRead to type of variable V.
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, SmallVector< llvm::OpenMPIRBuilder::DependData > Dependencies={}, bool HasNoWait=false)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, Type *ParallelTaskPtr, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned unsigned DefaultVal
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
A container for analyses that lazily runs them and caches their results.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
AttrBuilder & addAttribute(Attribute::AttrKind Val)
Add an attribute to the builder.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getFnAttrs() const
The function attributes are returned.
AttributeList addFnAttributes(LLVMContext &C, const AttrBuilder &B) const
Add function attribute to the list.
AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
reverse_iterator rbegin()
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
const Instruction & front() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
InstListType::reverse_iterator reverse_iterator
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
const Instruction & back() const
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static ConstantInt * getFalse(LLVMContext &Context)
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DISubprogram * getSubprogram() const
Get the subprogram for this scope.
static DILocalScope * cloneScopeForSubprogram(DILocalScope &RootScope, DISubprogram &NewSP, LLVMContext &Ctx, DenseMap< const MDNode *, MDNode * > &Cache)
Traverses the scope chain rooted at RootScope until it hits a Subprogram, recreating the chain with "...
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
DISPFlags
Debug info subprogram flags.
Type array for a subprogram.
uint32_t getAlignInBits() const
StringRef getName() const
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
unsigned getDefaultGlobalsAddressSpace() const
Align getABIIntegerTypeAlignment(unsigned BitWidth) const
Returns the minimum ABI-required alignment for an integer type of the specified bitwidth.
unsigned getAllocaAddrSpace() const
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
unsigned getPointerSize(unsigned AS=0) const
Layout pointer size in bytes, rounded up to a whole number of bytes.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS, const Twine &Name="")
Return the i64 difference between two pointer values, dividing out the size of the pointed-to objects...
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
UnreachableInst * CreateUnreachable()
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, Value *OffsetValue=nullptr)
Create an assume intrinsic call that represents an alignment assumption on the provided pointer.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
IntegerType * getIndexTy(const DataLayout &DL, unsigned AddrSpace)
Fetch the type of an integer that should be used to index GEP operations within AddressSpace.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name="")
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
InsertPoint saveIP() const
Returns the current insert point.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateFPCast(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
DebugLoc getCurrentDebugLocation() const
Get location information used by debugging information.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
void ClearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg == 0.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Type * getVoidTy()
Fetch the type representing void.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
GlobalVariable * CreateGlobalString(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr, bool AddNull=true)
Make a new global variable with initializer type i8*.
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
LLVMContext & getContext() const
Get the global data context.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
StringRef getName() const
Get a short "name" for the module.
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
iterator_range< global_iterator > globals()
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
void setGridValue(omp::GV G)
StringRef separator() const
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
StringRef firstSeparator() const
std::optional< bool > EmitLLVMUsedMetaInfo
Flag for specifying if LLVMUsed information should be emitted.
omp::GV getGridValue() const
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
void setHasRequiresDynamicAllocators(bool Value)
void setEmitLLVMUsed(bool Value=true)
bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp task
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
void emitBranch(BasicBlock *Target)
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
Generate a target-task for the target construct.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
void emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, SmallVector< DependData > Dependencies={}, bool HasNowait=false)
Generator for '#omp target'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false, bool IsTeamsReduction=false, bool HasDistribute=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
Function * emitUserDefinedMapper(function_ref< MapInfosTy &(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, function_ref< bool(unsigned int, Function **)> CustomMapperCB=nullptr)
Emit the user-defined mapper function.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false)
Generator for '#omp reduction'.
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
BodyGenTy
Type of BodyGen to use for region codegen.
InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
unsigned getNumUses() const
This method computes the number of uses of this Value.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
iterator insertAfter(iterator where, pointer New)
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
void emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
CodeGenOptLevel
Code generation optimization level.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
DWARFExpression::Operation Op
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * DynCGGroupMem
The size of the dynamic shared memory.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static const Target * lookupTarget(StringRef Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
unsigned GV_Warp_Size
The default value of maximum number of threads in a worker warp.