64#define DEBUG_TYPE "openmp-ir-builder"
71 cl::desc(
"Use optimistic attributes describing "
72 "'as-if' properties of runtime calls."),
76 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
77 cl::desc(
"Factor for the unroll threshold to account for code "
78 "simplifications still taking place"),
89 if (!IP1.isSet() || !IP2.isSet())
91 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
96 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
97 case OMPScheduleType::UnorderedStaticChunked:
98 case OMPScheduleType::UnorderedStatic:
99 case OMPScheduleType::UnorderedDynamicChunked:
100 case OMPScheduleType::UnorderedGuidedChunked:
101 case OMPScheduleType::UnorderedRuntime:
102 case OMPScheduleType::UnorderedAuto:
103 case OMPScheduleType::UnorderedTrapezoidal:
104 case OMPScheduleType::UnorderedGreedy:
105 case OMPScheduleType::UnorderedBalanced:
106 case OMPScheduleType::UnorderedGuidedIterativeChunked:
107 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
108 case OMPScheduleType::UnorderedSteal:
109 case OMPScheduleType::UnorderedStaticBalancedChunked:
110 case OMPScheduleType::UnorderedGuidedSimd:
111 case OMPScheduleType::UnorderedRuntimeSimd:
112 case OMPScheduleType::OrderedStaticChunked:
113 case OMPScheduleType::OrderedStatic:
114 case OMPScheduleType::OrderedDynamicChunked:
115 case OMPScheduleType::OrderedGuidedChunked:
116 case OMPScheduleType::OrderedRuntime:
117 case OMPScheduleType::OrderedAuto:
118 case OMPScheduleType::OrderdTrapezoidal:
119 case OMPScheduleType::NomergeUnorderedStaticChunked:
120 case OMPScheduleType::NomergeUnorderedStatic:
121 case OMPScheduleType::NomergeUnorderedDynamicChunked:
122 case OMPScheduleType::NomergeUnorderedGuidedChunked:
123 case OMPScheduleType::NomergeUnorderedRuntime:
124 case OMPScheduleType::NomergeUnorderedAuto:
125 case OMPScheduleType::NomergeUnorderedTrapezoidal:
126 case OMPScheduleType::NomergeUnorderedGreedy:
127 case OMPScheduleType::NomergeUnorderedBalanced:
128 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
129 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
130 case OMPScheduleType::NomergeUnorderedSteal:
131 case OMPScheduleType::NomergeOrderedStaticChunked:
132 case OMPScheduleType::NomergeOrderedStatic:
133 case OMPScheduleType::NomergeOrderedDynamicChunked:
134 case OMPScheduleType::NomergeOrderedGuidedChunked:
135 case OMPScheduleType::NomergeOrderedRuntime:
136 case OMPScheduleType::NomergeOrderedAuto:
137 case OMPScheduleType::NomergeOrderedTrapezoidal:
145 SchedType & OMPScheduleType::MonotonicityMask;
146 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
158 Builder.restoreIP(IP);
168 Kernel->getFnAttribute(
"target-features").getValueAsString();
169 if (Features.
count(
"+wavefrontsize64"))
184 bool HasSimdModifier) {
186 switch (ClauseKind) {
187 case OMP_SCHEDULE_Default:
188 case OMP_SCHEDULE_Static:
189 return HasChunks ? OMPScheduleType::BaseStaticChunked
190 : OMPScheduleType::BaseStatic;
191 case OMP_SCHEDULE_Dynamic:
192 return OMPScheduleType::BaseDynamicChunked;
193 case OMP_SCHEDULE_Guided:
194 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
195 : OMPScheduleType::BaseGuidedChunked;
196 case OMP_SCHEDULE_Auto:
198 case OMP_SCHEDULE_Runtime:
199 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
200 : OMPScheduleType::BaseRuntime;
208 bool HasOrderedClause) {
209 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
210 OMPScheduleType::None &&
211 "Must not have ordering nor monotonicity flags already set");
214 ? OMPScheduleType::ModifierOrdered
215 : OMPScheduleType::ModifierUnordered;
216 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
219 if (OrderingScheduleType ==
220 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
221 return OMPScheduleType::OrderedGuidedChunked;
222 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
223 OMPScheduleType::ModifierOrdered))
224 return OMPScheduleType::OrderedRuntime;
226 return OrderingScheduleType;
232 bool HasSimdModifier,
bool HasMonotonic,
233 bool HasNonmonotonic,
bool HasOrderedClause) {
234 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
235 OMPScheduleType::None &&
236 "Must not have monotonicity flags already set");
237 assert((!HasMonotonic || !HasNonmonotonic) &&
238 "Monotonic and Nonmonotonic are contradicting each other");
241 return ScheduleType | OMPScheduleType::ModifierMonotonic;
242 }
else if (HasNonmonotonic) {
243 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
253 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
254 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
260 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
268 bool HasSimdModifier,
bool HasMonotonicModifier,
269 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
275 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
276 HasNonmonotonicModifier, HasOrderedClause);
291 assert(!Br->isConditional() &&
292 "BB's terminator must be an unconditional branch (or degenerate)");
295 Br->setSuccessor(0,
Target);
300 NewBr->setDebugLoc(
DL);
306 "Target BB must not have PHI nodes");
326 NewBr->setDebugLoc(
DL);
334 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
338 Builder.SetInsertPoint(Old);
342 Builder.SetCurrentDebugLocation(
DebugLoc);
351 spliceBB(IP, New, CreateBranch,
DL);
352 New->replaceSuccessorsPhiUsesWith(Old, New);
361 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
363 Builder.SetInsertPoint(Builder.GetInsertBlock());
366 Builder.SetCurrentDebugLocation(
DebugLoc);
375 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
377 Builder.SetInsertPoint(Builder.GetInsertBlock());
380 Builder.SetCurrentDebugLocation(
DebugLoc);
387 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
394 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
396 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
397 const Twine &Name =
"",
bool AsPtr =
true) {
398 Builder.restoreIP(OuterAllocaIP);
401 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
405 FakeVal = FakeValAddr;
408 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
413 Builder.restoreIP(InnerAllocaIP);
417 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
433enum OpenMPOffloadingRequiresDirFlags {
435 OMP_REQ_UNDEFINED = 0x000,
437 OMP_REQ_NONE = 0x001,
439 OMP_REQ_REVERSE_OFFLOAD = 0x002,
441 OMP_REQ_UNIFIED_ADDRESS = 0x004,
443 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
445 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
451OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
452 : RequiresFlags(OMP_REQ_UNDEFINED) {}
454OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
455 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
456 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
457 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
458 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
459 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
460 RequiresFlags(OMP_REQ_UNDEFINED) {
461 if (HasRequiresReverseOffload)
462 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
463 if (HasRequiresUnifiedAddress)
464 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
465 if (HasRequiresUnifiedSharedMemory)
466 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
467 if (HasRequiresDynamicAllocators)
468 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
471bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
472 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
475bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
476 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
479bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
480 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
483bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
484 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
487int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
488 return hasRequiresFlags() ? RequiresFlags
489 :
static_cast<int64_t
>(OMP_REQ_NONE);
492void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
494 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
496 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
499void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
501 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
503 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
506void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
508 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
510 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
513void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
515 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
517 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
524void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
528 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
530 constexpr const size_t MaxDim = 3;
532 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
534 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
537 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
538 Value *NumThreads3D =
539 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
541 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
543 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
545 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
547 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
549 ArgsVector = {Version,
551 KernelArgs.RTArgs.BasePointersArray,
552 KernelArgs.RTArgs.PointersArray,
553 KernelArgs.RTArgs.SizesArray,
554 KernelArgs.RTArgs.MapTypesArray,
555 KernelArgs.RTArgs.MapNamesArray,
556 KernelArgs.RTArgs.MappersArray,
557 KernelArgs.NumIterations,
561 KernelArgs.DynCGGroupMem};
569 auto FnAttrs =
Attrs.getFnAttrs();
570 auto RetAttrs =
Attrs.getRetAttrs();
572 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
577 bool Param =
true) ->
void {
578 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
579 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
580 if (HasSignExt || HasZeroExt) {
581 assert(AS.getNumAttributes() == 1 &&
582 "Currently not handling extension attr combined with others.");
584 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
587 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
594#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
595#include "llvm/Frontend/OpenMP/OMPKinds.def"
599#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
601 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
602 addAttrSet(RetAttrs, RetAttrSet, false); \
603 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
604 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
605 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
607#include "llvm/Frontend/OpenMP/OMPKinds.def"
621#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
623 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
625 Fn = M.getFunction(Str); \
627#include "llvm/Frontend/OpenMP/OMPKinds.def"
633#define OMP_RTL(Enum, Str, ...) \
635 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
637#include "llvm/Frontend/OpenMP/OMPKinds.def"
641 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
651 LLVMContext::MD_callback,
653 2, {-1, -1},
true)}));
659 addAttributes(FnID, *Fn);
666 assert(Fn &&
"Failed to create OpenMP runtime function");
674 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
678void OpenMPIRBuilder::initialize() { initializeTypes(M); }
689 for (
auto Inst =
Block->getReverseIterator()->begin();
690 Inst !=
Block->getReverseIterator()->end();) {
703void OpenMPIRBuilder::finalize(
Function *Fn) {
707 for (OutlineInfo &OI : OutlineInfos) {
710 if (Fn && OI.getFunction() != Fn) {
715 ParallelRegionBlockSet.
clear();
717 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
727 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
736 ".omp_par", ArgsInZeroAddressSpace);
740 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
741 assert(Extractor.isEligible() &&
742 "Expected OpenMP outlining to be possible!");
744 for (
auto *V : OI.ExcludeArgsFromAggregate)
745 Extractor.excludeArgFromAggregate(V);
747 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
751 if (TargetCpuAttr.isStringAttribute())
754 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
755 if (TargetFeaturesAttr.isStringAttribute())
756 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
759 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
761 "OpenMP outlined functions should not return a value!");
766 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
773 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
780 "Expected instructions to add in the outlined region entry");
782 End = ArtificialEntry.
rend();
787 if (
I.isTerminator()) {
789 if (OI.EntryBB->getTerminator())
790 OI.EntryBB->getTerminator()->adoptDbgRecords(
791 &ArtificialEntry,
I.getIterator(),
false);
795 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
798 OI.EntryBB->moveBefore(&ArtificialEntry);
805 if (OI.PostOutlineCB)
806 OI.PostOutlineCB(*OutlinedFn);
810 OutlineInfos = std::move(DeferredOutlines);
831 for (
Function *
F : ConstantAllocaRaiseCandidates)
834 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
835 [](EmitMetadataErrorKind Kind,
836 const TargetRegionEntryInfo &EntryInfo) ->
void {
837 errs() <<
"Error of kind: " << Kind
838 <<
" when emitting offload entries and metadata during "
839 "OMPIRBuilder finalization \n";
842 if (!OffloadInfoManager.empty())
843 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
845 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
846 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
847 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
848 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
854bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
856OpenMPIRBuilder::~OpenMPIRBuilder() {
857 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
865 ConstantInt::get(I32Ty,
Value), Name);
877 UsedArray.
resize(List.size());
878 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
882 if (UsedArray.
empty())
889 GV->setSection(
"llvm.metadata");
893OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
895 auto *Int8Ty = Builder.getInt8Ty();
898 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
906 unsigned Reserve2Flags) {
908 LocFlags |= OMP_IDENT_FLAG_KMPC;
911 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
916 ConstantInt::get(
Int32, Reserve2Flags),
917 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
919 size_t SrcLocStrArgIdx = 4;
920 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
924 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
931 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
932 if (
GV.getInitializer() == Initializer)
937 M, OpenMPIRBuilder::Ident,
940 M.getDataLayout().getDefaultGlobalsAddressSpace());
952 SrcLocStrSize = LocStr.
size();
953 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
961 if (
GV.isConstant() &&
GV.hasInitializer() &&
962 GV.getInitializer() == Initializer)
965 SrcLocStr = Builder.CreateGlobalString(
966 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
974 unsigned Line,
unsigned Column,
980 Buffer.
append(FunctionName);
982 Buffer.
append(std::to_string(Line));
984 Buffer.
append(std::to_string(Column));
987 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
991OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
992 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
993 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1001 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1003 if (
DIFile *DIF = DIL->getFile())
1004 if (std::optional<StringRef> Source = DIF->getSource())
1009 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1010 DIL->getColumn(), SrcLocStrSize);
1013Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1015 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1016 Loc.IP.getBlock()->getParent());
1019Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1020 return Builder.CreateCall(
1021 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1022 "omp_global_thread_num");
1025OpenMPIRBuilder::InsertPointOrErrorTy
1026OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1027 bool ForceSimpleCall,
bool CheckCancelFlag) {
1028 if (!updateToLocation(
Loc))
1037 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1040 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1043 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1046 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1049 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1054 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1056 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1057 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1062 bool UseCancelBarrier =
1063 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1066 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1067 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1068 : OMPRTL___kmpc_barrier),
1071 if (UseCancelBarrier && CheckCancelFlag)
1072 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1075 return Builder.saveIP();
1078OpenMPIRBuilder::InsertPointOrErrorTy
1079OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1081 omp::Directive CanceledDirective) {
1082 if (!updateToLocation(
Loc))
1086 auto *UI = Builder.CreateUnreachable();
1091 Builder.SetInsertPoint(ThenTI);
1093 Value *CancelKind =
nullptr;
1094 switch (CanceledDirective) {
1095#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1096 case DirectiveEnum: \
1097 CancelKind = Builder.getInt32(Value); \
1099#include "llvm/Frontend/OpenMP/OMPKinds.def"
1105 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1106 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1107 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1109 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1110 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1111 if (CanceledDirective == OMPD_parallel) {
1113 Builder.restoreIP(IP);
1114 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1115 omp::Directive::OMPD_unknown,
1124 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1128 Builder.SetInsertPoint(UI->getParent());
1129 UI->eraseFromParent();
1131 return Builder.saveIP();
1134OpenMPIRBuilder::InsertPointOrErrorTy
1135OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1136 omp::Directive CanceledDirective) {
1137 if (!updateToLocation(
Loc))
1141 auto *UI = Builder.CreateUnreachable();
1142 Builder.SetInsertPoint(UI);
1144 Value *CancelKind =
nullptr;
1145 switch (CanceledDirective) {
1146#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1147 case DirectiveEnum: \
1148 CancelKind = Builder.getInt32(Value); \
1150#include "llvm/Frontend/OpenMP/OMPKinds.def"
1156 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1157 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1158 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1160 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1161 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1162 if (CanceledDirective == OMPD_parallel) {
1164 Builder.restoreIP(IP);
1165 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1166 omp::Directive::OMPD_unknown,
1175 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1179 Builder.SetInsertPoint(UI->getParent());
1180 UI->eraseFromParent();
1182 return Builder.saveIP();
1185OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1186 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1189 if (!updateToLocation(
Loc))
1192 Builder.restoreIP(AllocaIP);
1193 auto *KernelArgsPtr =
1194 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1195 updateToLocation(
Loc);
1199 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1200 Builder.CreateAlignedStore(
1202 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1206 NumThreads, HostPtr, KernelArgsPtr};
1208 Return = Builder.CreateCall(
1209 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1212 return Builder.saveIP();
1215OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1216 const LocationDescription &
Loc,
Value *OutlinedFnID,
1217 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1218 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1220 if (!updateToLocation(
Loc))
1233 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1237 Value *Return =
nullptr;
1241 getKernelArgsVector(Args, Builder, ArgsVector);
1256 Builder.restoreIP(emitTargetKernel(
1257 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1258 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1265 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1267 auto CurFn = Builder.GetInsertBlock()->getParent();
1268 emitBlock(OffloadFailedBlock, CurFn);
1269 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1271 return AfterIP.takeError();
1272 Builder.restoreIP(*AfterIP);
1273 emitBranch(OffloadContBlock);
1274 emitBlock(OffloadContBlock, CurFn,
true);
1275 return Builder.saveIP();
1278Error OpenMPIRBuilder::emitCancelationCheckImpl(
1279 Value *CancelFlag, omp::Directive CanceledDirective,
1280 FinalizeCallbackTy ExitCB) {
1281 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1282 "Unexpected cancellation!");
1287 if (Builder.GetInsertPoint() == BB->
end()) {
1293 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1295 Builder.SetInsertPoint(BB);
1301 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1302 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1307 Builder.SetInsertPoint(CancellationBlock);
1309 if (
Error Err = ExitCB(Builder.saveIP()))
1311 auto &FI = FinalizationStack.back();
1312 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1316 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1335 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1338 "Expected at least tid and bounded tid as arguments");
1339 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1342 assert(CI &&
"Expected call instruction to outlined function");
1343 CI->
getParent()->setName(
"omp_parallel");
1345 Builder.SetInsertPoint(CI);
1346 Type *PtrTy = OMPIRBuilder->VoidPtr;
1350 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1354 Value *Args = ArgsAlloca;
1358 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1359 Builder.restoreIP(CurrentIP);
1362 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1364 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1366 Builder.CreateStore(V, StoreAddress);
1370 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1371 : Builder.getInt32(1);
1374 Value *Parallel51CallArgs[] = {
1378 NumThreads ? NumThreads : Builder.getInt32(-1),
1379 Builder.getInt32(-1),
1383 Builder.getInt64(NumCapturedVars)};
1386 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1388 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1391 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1394 Builder.SetInsertPoint(PrivTID);
1396 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1403 I->eraseFromParent();
1420 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1423 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1426 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1434 F->addMetadata(LLVMContext::MD_callback,
1443 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1446 "Expected at least tid and bounded tid as arguments");
1447 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1450 CI->
getParent()->setName(
"omp_parallel");
1451 Builder.SetInsertPoint(CI);
1454 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1458 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1460 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1467 auto PtrTy = OMPIRBuilder->VoidPtr;
1468 if (IfCondition && NumCapturedVars == 0) {
1473 Builder.CreateCall(RTLFn, RealArgs);
1476 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1479 Builder.SetInsertPoint(PrivTID);
1481 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1488 I->eraseFromParent();
1492OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1493 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1494 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1495 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1496 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1499 if (!updateToLocation(
Loc))
1503 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1504 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1505 Value *ThreadID = getOrCreateThreadID(Ident);
1511 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1515 if (NumThreads && !Config.isTargetDevice()) {
1518 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1520 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1523 if (ProcBind != OMP_PROC_BIND_default) {
1527 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1529 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1532 BasicBlock *InsertBB = Builder.GetInsertBlock();
1537 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1545 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1546 Builder.restoreIP(NewOuter);
1547 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1549 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1552 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1555 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1559 PointerType ::get(M.getContext(), 0),
1560 "zero.addr.ascast");
1581 auto FiniCBWrapper = [&](InsertPointTy IP) {
1586 Builder.restoreIP(IP);
1588 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1592 "Unexpected insertion point for finalization call!");
1596 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1601 InsertPointTy InnerAllocaIP = Builder.saveIP();
1604 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1608 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1610 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1628 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1631 assert(BodyGenCB &&
"Expected body generation callback!");
1632 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1633 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1636 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1639 if (Config.isTargetDevice()) {
1641 OI.PostOutlineCB = [=, ToBeDeletedVec =
1642 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1644 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1645 ThreadID, ToBeDeletedVec);
1649 OI.PostOutlineCB = [=, ToBeDeletedVec =
1650 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1652 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1656 OI.OuterAllocaBB = OuterAllocaBlock;
1657 OI.EntryBB = PRegEntryBB;
1658 OI.ExitBB = PRegExitBB;
1662 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1673 ".omp_par", ArgsInZeroAddressSpace);
1678 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1680 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1685 return GV->getValueType() == OpenMPIRBuilder::Ident;
1690 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1693 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1696 if (&V == TIDAddr || &V == ZeroAddr) {
1697 OI.ExcludeArgsFromAggregate.push_back(&V);
1702 for (
Use &U : V.uses())
1704 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1714 if (!V.getType()->isPointerTy()) {
1718 Builder.restoreIP(OuterAllocaIP);
1720 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1724 Builder.SetInsertPoint(InsertBB,
1726 Builder.CreateStore(&V,
Ptr);
1729 Builder.restoreIP(InnerAllocaIP);
1730 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1733 Value *ReplacementValue =
nullptr;
1736 ReplacementValue = PrivTID;
1738 InsertPointOrErrorTy AfterIP =
1739 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1741 return AfterIP.takeError();
1742 Builder.restoreIP(*AfterIP);
1744 InnerAllocaIP.getBlock(),
1745 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1747 assert(ReplacementValue &&
1748 "Expected copy/create callback to set replacement value!");
1749 if (ReplacementValue == &V)
1754 UPtr->set(ReplacementValue);
1779 for (
Value *Output : Outputs)
1782 assert(Outputs.empty() &&
1783 "OpenMP outlining should not produce live-out values!");
1785 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1787 for (
auto *BB : Blocks)
1794 auto FiniInfo = FinalizationStack.pop_back_val();
1796 assert(FiniInfo.DK == OMPD_parallel &&
1797 "Unexpected finalization stack state!");
1801 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1802 if (
Error Err = FiniCB(PreFiniIP))
1806 addOutlineInfo(std::move(OI));
1808 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1809 UI->eraseFromParent();
1814void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1817 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1818 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1820 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1823void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1824 if (!updateToLocation(
Loc))
1829void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1833 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1834 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1835 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1838 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1842void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1843 if (!updateToLocation(
Loc))
1845 emitTaskwaitImpl(
Loc);
1848void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1851 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1852 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1854 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1856 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1860void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1861 if (!updateToLocation(
Loc))
1863 emitTaskyieldImpl(
Loc);
1872 OpenMPIRBuilder &OMPBuilder,
1875 if (Dependencies.
empty())
1895 Type *DependInfo = OMPBuilder.DependInfo;
1896 Module &M = OMPBuilder.M;
1898 Value *DepArray =
nullptr;
1899 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1900 Builder.SetInsertPoint(
1901 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1904 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1906 Builder.restoreIP(OldIP);
1908 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1910 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1912 Value *Addr = Builder.CreateStructGEP(
1914 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1915 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1916 Builder.CreateStore(DepValPtr, Addr);
1919 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1920 Builder.CreateStore(
1921 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1924 Value *Flags = Builder.CreateStructGEP(
1926 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1927 Builder.CreateStore(
1928 ConstantInt::get(Builder.getInt8Ty(),
1929 static_cast<unsigned int>(Dep.DepKind)),
1935OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1936 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1937 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1941 if (!updateToLocation(
Loc))
1942 return InsertPointTy();
1945 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1946 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1963 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1964 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1966 splitBB(Builder,
true,
"task.alloca");
1968 InsertPointTy TaskAllocaIP =
1969 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1970 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1971 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1975 OI.EntryBB = TaskAllocaBB;
1976 OI.OuterAllocaBB = AllocaIP.getBlock();
1977 OI.ExitBB = TaskExitBB;
1982 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1984 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1985 Mergeable, Priority, EventHandle, TaskAllocaBB,
1986 ToBeDeleted](
Function &OutlinedFn)
mutable {
1989 "there must be a single user for the outlined function");
1994 bool HasShareds = StaleCI->
arg_size() > 1;
1995 Builder.SetInsertPoint(StaleCI);
2000 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2004 Value *ThreadID = getOrCreateThreadID(Ident);
2016 Value *Flags = Builder.getInt32(Tied);
2019 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2020 Flags = Builder.CreateOr(FinalFlag, Flags);
2024 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2026 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2032 Value *TaskSize = Builder.getInt64(
2033 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2038 Value *SharedsSize = Builder.getInt64(0);
2042 assert(ArgStructAlloca &&
2043 "Unable to find the alloca instruction corresponding to arguments "
2044 "for extracted function");
2047 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2048 "arguments for extracted function");
2050 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2055 CallInst *TaskData = Builder.CreateCall(
2056 TaskAllocFn, {Ident, ThreadID, Flags,
2057 TaskSize, SharedsSize,
2064 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2065 OMPRTL___kmpc_task_allow_completion_event);
2067 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2069 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2070 Builder.getPtrTy(0));
2071 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2072 Builder.CreateStore(EventVal, EventHandleAddr);
2078 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2079 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2097 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2100 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2101 Value *PriorityData = Builder.CreateInBoundsGEP(
2102 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2105 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2107 Builder.CreateStore(Priority, CmplrData);
2132 splitBB(Builder,
true,
"if.end");
2134 Builder.GetInsertPoint()->
getParent()->getTerminator();
2135 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2136 Builder.SetInsertPoint(IfTerminator);
2139 Builder.SetInsertPoint(ElseTI);
2141 if (Dependencies.size()) {
2143 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2146 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2147 ConstantInt::get(Builder.getInt32Ty(), 0),
2151 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2153 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2154 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2157 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2159 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2161 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2162 Builder.SetInsertPoint(ThenTI);
2165 if (Dependencies.size()) {
2167 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2170 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2171 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2176 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2177 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2182 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2184 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2186 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2190 I->eraseFromParent();
2193 addOutlineInfo(std::move(OI));
2194 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2196 return Builder.saveIP();
2199OpenMPIRBuilder::InsertPointOrErrorTy
2200OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2201 InsertPointTy AllocaIP,
2202 BodyGenCallbackTy BodyGenCB) {
2203 if (!updateToLocation(
Loc))
2204 return InsertPointTy();
2207 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2208 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2209 Value *ThreadID = getOrCreateThreadID(Ident);
2213 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2214 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2216 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2217 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2220 Builder.SetInsertPoint(TaskgroupExitBB);
2223 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2224 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2226 return Builder.saveIP();
2229OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2230 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2232 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2235 if (!updateToLocation(
Loc))
2241 auto FiniCBWrapper = [&](InsertPointTy IP) {
2250 CancellationBranches.
push_back(DummyBranch);
2254 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2272 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2273 Builder.restoreIP(CodeGenIP);
2275 splitBBWithSuffix(Builder,
false,
".sections.after");
2279 unsigned CaseNumber = 0;
2280 for (
auto SectionCB : SectionCBs) {
2282 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2283 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2284 Builder.SetInsertPoint(CaseBB);
2286 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2298 Value *LB = ConstantInt::get(I32Ty, 0);
2299 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2300 Value *
ST = ConstantInt::get(I32Ty, 1);
2302 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2306 InsertPointOrErrorTy WsloopIP =
2307 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2308 WorksharingLoopType::ForStaticLoop, !IsNowait);
2310 return WsloopIP.takeError();
2311 InsertPointTy AfterIP = *WsloopIP;
2314 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2317 auto FiniInfo = FinalizationStack.pop_back_val();
2318 assert(FiniInfo.DK == OMPD_sections &&
2319 "Unexpected finalization stack state!");
2320 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2321 Builder.restoreIP(AfterIP);
2323 splitBBWithSuffix(Builder,
true,
"sections.fini");
2324 if (
Error Err = CB(Builder.saveIP()))
2326 AfterIP = {FiniBB, FiniBB->
begin()};
2330 for (
BranchInst *DummyBranch : CancellationBranches) {
2338OpenMPIRBuilder::InsertPointOrErrorTy
2339OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2340 BodyGenCallbackTy BodyGenCB,
2341 FinalizeCallbackTy FiniCB) {
2342 if (!updateToLocation(
Loc))
2345 auto FiniCBWrapper = [&](InsertPointTy IP) {
2356 Builder.restoreIP(IP);
2357 auto *CaseBB =
Loc.IP.getBlock();
2361 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2365 Directive OMPD = Directive::OMPD_sections;
2368 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2376 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2379Value *OpenMPIRBuilder::getGPUThreadID() {
2380 return Builder.CreateCall(
2381 getOrCreateRuntimeFunction(M,
2382 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2386Value *OpenMPIRBuilder::getGPUWarpSize() {
2387 return Builder.CreateCall(
2388 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2391Value *OpenMPIRBuilder::getNVPTXWarpID() {
2392 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2393 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2396Value *OpenMPIRBuilder::getNVPTXLaneID() {
2397 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2398 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2399 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2400 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2404Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2407 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2408 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2409 assert(FromSize > 0 &&
"From size must be greater than zero");
2410 assert(ToSize > 0 &&
"To size must be greater than zero");
2411 if (FromType == ToType)
2413 if (FromSize == ToSize)
2414 return Builder.CreateBitCast(From, ToType);
2416 return Builder.CreateIntCast(From, ToType,
true);
2417 InsertPointTy SaveIP = Builder.saveIP();
2418 Builder.restoreIP(AllocaIP);
2419 Value *CastItem = Builder.CreateAlloca(ToType);
2420 Builder.restoreIP(SaveIP);
2422 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2423 CastItem, Builder.getPtrTy(0));
2424 Builder.CreateStore(From, ValCastItem);
2425 return Builder.CreateLoad(ToType, CastItem);
2428Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2432 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2433 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2437 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2439 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2440 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2441 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2442 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2443 Value *WarpSizeCast =
2444 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2445 Value *ShuffleCall =
2446 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2447 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2450void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2453 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2464 Type *IndexTy = Builder.getIndexTy(
2465 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2466 Value *ElemPtr = DstAddr;
2468 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2472 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2473 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2475 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2476 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2477 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2480 if ((
Size / IntSize) > 1) {
2481 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2482 SrcAddrGEP, Builder.getPtrTy());
2487 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2488 emitBlock(PreCondBB, CurFunc);
2490 Builder.CreatePHI(
Ptr->getType(), 2);
2493 Builder.CreatePHI(ElemPtr->
getType(), 2);
2497 Value *PtrDiff = Builder.CreatePtrDiff(
2498 Builder.getInt8Ty(), PtrEnd,
2499 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2500 Builder.CreateCondBr(
2501 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2503 emitBlock(ThenBB, CurFunc);
2504 Value *Res = createRuntimeShuffleFunction(
2506 Builder.CreateAlignedLoad(
2507 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2509 Builder.CreateAlignedStore(Res, ElemPtr,
2510 M.getDataLayout().getPrefTypeAlign(ElemType));
2512 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2513 Value *LocalElemPtr =
2514 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2517 emitBranch(PreCondBB);
2518 emitBlock(ExitBB, CurFunc);
2520 Value *Res = createRuntimeShuffleFunction(
2521 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2524 Res = Builder.CreateTrunc(Res, ElemType);
2525 Builder.CreateStore(Res, ElemPtr);
2526 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2528 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2534void OpenMPIRBuilder::emitReductionListCopy(
2535 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2537 CopyOptionsTy CopyOptions) {
2538 Type *IndexTy = Builder.getIndexTy(
2539 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2540 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2544 for (
auto En :
enumerate(ReductionInfos)) {
2545 const ReductionInfo &RI = En.value();
2546 Value *SrcElementAddr =
nullptr;
2547 Value *DestElementAddr =
nullptr;
2548 Value *DestElementPtrAddr =
nullptr;
2550 bool ShuffleInElement =
false;
2553 bool UpdateDestListPtr =
false;
2556 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2557 ReductionArrayTy, SrcBase,
2558 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2559 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2563 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2564 ReductionArrayTy, DestBase,
2565 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2567 case CopyAction::RemoteLaneToThread: {
2568 InsertPointTy CurIP = Builder.saveIP();
2569 Builder.restoreIP(AllocaIP);
2570 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2571 ".omp.reduction.element");
2573 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2574 DestElementAddr = DestAlloca;
2576 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2577 DestElementAddr->
getName() +
".ascast");
2578 Builder.restoreIP(CurIP);
2579 ShuffleInElement =
true;
2580 UpdateDestListPtr =
true;
2583 case CopyAction::ThreadCopy: {
2585 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2592 if (ShuffleInElement) {
2593 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2594 RemoteLaneOffset, ReductionArrayTy);
2596 switch (RI.EvaluationKind) {
2597 case EvalKind::Scalar: {
2598 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2600 Builder.CreateStore(Elem, DestElementAddr);
2603 case EvalKind::Complex: {
2604 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2605 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2606 Value *SrcReal = Builder.CreateLoad(
2607 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2608 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2609 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2610 Value *SrcImg = Builder.CreateLoad(
2611 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2613 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2614 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2615 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2616 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2617 Builder.CreateStore(SrcReal, DestRealPtr);
2618 Builder.CreateStore(SrcImg, DestImgPtr);
2621 case EvalKind::Aggregate: {
2622 Value *SizeVal = Builder.getInt64(
2623 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2624 Builder.CreateMemCpy(
2625 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2626 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2638 if (UpdateDestListPtr) {
2639 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2640 DestElementAddr, Builder.getPtrTy(),
2641 DestElementAddr->
getName() +
".ascast");
2642 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2649 AttributeList FuncAttrs) {
2650 InsertPointTy SavedIP = Builder.saveIP();
2653 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2657 "_omp_reduction_inter_warp_copy_func", &M);
2662 Builder.SetInsertPoint(EntryBB);
2680 "__openmp_nvptx_data_transfer_temporary_storage";
2681 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2682 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2684 if (!TransferMedium) {
2693 Value *GPUThreadID = getGPUThreadID();
2695 Value *LaneID = getNVPTXLaneID();
2697 Value *WarpID = getNVPTXWarpID();
2699 InsertPointTy AllocaIP =
2700 InsertPointTy(Builder.GetInsertBlock(),
2701 Builder.GetInsertBlock()->getFirstInsertionPt());
2704 Builder.restoreIP(AllocaIP);
2705 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2706 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2708 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2709 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2710 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2711 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2712 NumWarpsAlloca, Builder.getPtrTy(0),
2713 NumWarpsAlloca->
getName() +
".ascast");
2714 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2715 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2717 InsertPointTy CodeGenIP =
2719 Builder.restoreIP(CodeGenIP);
2722 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2724 for (
auto En :
enumerate(ReductionInfos)) {
2729 const ReductionInfo &RI = En.value();
2730 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2731 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2734 unsigned NumIters = RealTySize / TySize;
2737 Value *Cnt =
nullptr;
2738 Value *CntAddr =
nullptr;
2742 CodeGenIP = Builder.saveIP();
2743 Builder.restoreIP(AllocaIP);
2745 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2747 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2748 CntAddr->
getName() +
".ascast");
2749 Builder.restoreIP(CodeGenIP);
2756 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2757 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2759 Value *
Cmp = Builder.CreateICmpULT(
2760 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2761 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2762 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2766 InsertPointOrErrorTy BarrierIP1 =
2767 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2768 omp::Directive::OMPD_unknown,
2772 return BarrierIP1.takeError();
2778 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2779 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2780 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2783 auto *RedListArrayTy =
2785 Type *IndexTy = Builder.getIndexTy(
2786 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2788 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2789 {ConstantInt::get(IndexTy, 0),
2790 ConstantInt::get(IndexTy, En.index())});
2792 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2794 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2798 Value *MediumPtr = Builder.CreateInBoundsGEP(
2799 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2802 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2804 Builder.CreateStore(Elem, MediumPtr,
2806 Builder.CreateBr(MergeBB);
2809 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2810 Builder.CreateBr(MergeBB);
2813 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2814 InsertPointOrErrorTy BarrierIP2 =
2815 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2816 omp::Directive::OMPD_unknown,
2820 return BarrierIP2.takeError();
2827 Value *NumWarpsVal =
2828 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2830 Value *IsActiveThread =
2831 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2832 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2834 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2838 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2839 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2841 Value *TargetElemPtrPtr =
2842 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2843 {ConstantInt::get(IndexTy, 0),
2844 ConstantInt::get(IndexTy, En.index())});
2845 Value *TargetElemPtrVal =
2846 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2847 Value *TargetElemPtr = TargetElemPtrVal;
2850 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2853 Value *SrcMediumValue =
2854 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2855 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2856 Builder.CreateBr(W0MergeBB);
2858 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2859 Builder.CreateBr(W0MergeBB);
2861 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2864 Cnt = Builder.CreateNSWAdd(
2865 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2866 Builder.CreateStore(Cnt, CntAddr,
false);
2868 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2869 emitBranch(PrecondBB);
2870 emitBlock(ExitBB, CurFn);
2872 RealTySize %= TySize;
2876 Builder.CreateRetVoid();
2877 Builder.restoreIP(SavedIP);
2882Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2884 AttributeList FuncAttrs) {
2888 {Builder.getPtrTy(), Builder.getInt16Ty(),
2889 Builder.getInt16Ty(), Builder.getInt16Ty()},
2893 "_omp_reduction_shuffle_and_reduce_func", &M);
2903 Builder.SetInsertPoint(EntryBB);
2914 Type *ReduceListArgType = ReduceListArg->
getType();
2916 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2917 Value *ReduceListAlloca = Builder.CreateAlloca(
2918 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2919 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2920 LaneIDArg->
getName() +
".addr");
2921 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2922 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2923 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2924 AlgoVerArg->
getName() +
".addr");
2930 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2931 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2933 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2934 ReduceListAlloca, ReduceListArgType,
2935 ReduceListAlloca->
getName() +
".ascast");
2936 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2937 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2938 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2939 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2940 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2941 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2942 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2943 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2944 RemoteReductionListAlloca, Builder.getPtrTy(),
2945 RemoteReductionListAlloca->
getName() +
".ascast");
2947 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2948 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2949 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2950 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2952 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2953 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2954 Value *RemoteLaneOffset =
2955 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2956 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2963 emitReductionListCopy(
2964 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2965 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2988 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2989 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2990 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2991 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2992 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
2993 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
2994 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
2995 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
2996 Value *RemoteOffsetComp =
2997 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
2998 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
2999 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3000 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3006 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3007 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3008 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3009 ReduceList, Builder.getPtrTy());
3010 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3011 RemoteListAddrCast, Builder.getPtrTy());
3012 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3013 ->addFnAttr(Attribute::NoUnwind);
3014 Builder.CreateBr(MergeBB);
3016 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3017 Builder.CreateBr(MergeBB);
3019 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3023 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3024 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3025 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3030 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3032 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3033 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3034 ReductionInfos, RemoteListAddrCast, ReduceList);
3035 Builder.CreateBr(CpyMergeBB);
3037 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3038 Builder.CreateBr(CpyMergeBB);
3040 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3042 Builder.CreateRetVoid();
3047Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3049 AttributeList FuncAttrs) {
3050 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3053 Builder.getVoidTy(),
3054 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3058 "_omp_reduction_list_to_global_copy_func", &M);
3065 Builder.SetInsertPoint(EntryBlock);
3074 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3075 BufferArg->
getName() +
".addr");
3076 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3078 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3079 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3080 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3081 BufferArgAlloca, Builder.getPtrTy(),
3082 BufferArgAlloca->
getName() +
".ascast");
3083 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3084 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3085 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3086 ReduceListArgAlloca, Builder.getPtrTy(),
3087 ReduceListArgAlloca->
getName() +
".ascast");
3089 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3090 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3091 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3093 Value *LocalReduceList =
3094 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3095 Value *BufferArgVal =
3096 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3097 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3098 Type *IndexTy = Builder.getIndexTy(
3099 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3100 for (
auto En :
enumerate(ReductionInfos)) {
3101 const ReductionInfo &RI = En.value();
3102 auto *RedListArrayTy =
3105 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3106 RedListArrayTy, LocalReduceList,
3107 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3109 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3113 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3114 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3115 ReductionsBufferTy, BufferVD, 0, En.index());
3117 switch (RI.EvaluationKind) {
3118 case EvalKind::Scalar: {
3119 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3120 Builder.CreateStore(TargetElement, GlobVal);
3123 case EvalKind::Complex: {
3124 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3125 RI.ElementType, ElemPtr, 0, 0,
".realp");
3126 Value *SrcReal = Builder.CreateLoad(
3127 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3128 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3129 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3130 Value *SrcImg = Builder.CreateLoad(
3131 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3133 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3134 RI.ElementType, GlobVal, 0, 0,
".realp");
3135 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3136 RI.ElementType, GlobVal, 0, 1,
".imagp");
3137 Builder.CreateStore(SrcReal, DestRealPtr);
3138 Builder.CreateStore(SrcImg, DestImgPtr);
3141 case EvalKind::Aggregate: {
3143 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3144 Builder.CreateMemCpy(
3145 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3146 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3152 Builder.CreateRetVoid();
3153 Builder.restoreIP(OldIP);
3157Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3159 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3160 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3163 Builder.getVoidTy(),
3164 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3168 "_omp_reduction_list_to_global_reduce_func", &M);
3175 Builder.SetInsertPoint(EntryBlock);
3184 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3185 BufferArg->
getName() +
".addr");
3186 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3188 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3189 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3190 auto *RedListArrayTy =
3195 Value *LocalReduceList =
3196 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3198 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3199 BufferArgAlloca, Builder.getPtrTy(),
3200 BufferArgAlloca->
getName() +
".ascast");
3201 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3202 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3203 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3204 ReduceListArgAlloca, Builder.getPtrTy(),
3205 ReduceListArgAlloca->
getName() +
".ascast");
3206 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3207 LocalReduceList, Builder.getPtrTy(),
3208 LocalReduceList->
getName() +
".ascast");
3210 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3211 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3212 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3214 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3215 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3216 Type *IndexTy = Builder.getIndexTy(
3217 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3218 for (
auto En :
enumerate(ReductionInfos)) {
3219 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3220 RedListArrayTy, LocalReduceListAddrCast,
3221 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3223 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3225 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3226 ReductionsBufferTy, BufferVD, 0, En.index());
3227 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3232 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3233 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3234 ->addFnAttr(Attribute::NoUnwind);
3235 Builder.CreateRetVoid();
3236 Builder.restoreIP(OldIP);
3240Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3242 AttributeList FuncAttrs) {
3243 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3246 Builder.getVoidTy(),
3247 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3251 "_omp_reduction_global_to_list_copy_func", &M);
3258 Builder.SetInsertPoint(EntryBlock);
3267 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3268 BufferArg->
getName() +
".addr");
3269 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3271 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3272 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3273 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3274 BufferArgAlloca, Builder.getPtrTy(),
3275 BufferArgAlloca->
getName() +
".ascast");
3276 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3277 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3278 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3279 ReduceListArgAlloca, Builder.getPtrTy(),
3280 ReduceListArgAlloca->
getName() +
".ascast");
3281 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3282 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3283 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3285 Value *LocalReduceList =
3286 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3287 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3288 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3289 Type *IndexTy = Builder.getIndexTy(
3290 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3291 for (
auto En :
enumerate(ReductionInfos)) {
3292 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3293 auto *RedListArrayTy =
3296 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3297 RedListArrayTy, LocalReduceList,
3298 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3300 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3303 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3304 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3305 ReductionsBufferTy, BufferVD, 0, En.index());
3307 switch (RI.EvaluationKind) {
3308 case EvalKind::Scalar: {
3309 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3310 Builder.CreateStore(TargetElement, ElemPtr);
3313 case EvalKind::Complex: {
3314 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3315 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3316 Value *SrcReal = Builder.CreateLoad(
3317 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3318 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3319 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3320 Value *SrcImg = Builder.CreateLoad(
3321 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3323 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3324 RI.ElementType, ElemPtr, 0, 0,
".realp");
3325 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3326 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3327 Builder.CreateStore(SrcReal, DestRealPtr);
3328 Builder.CreateStore(SrcImg, DestImgPtr);
3331 case EvalKind::Aggregate: {
3333 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3334 Builder.CreateMemCpy(
3335 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3336 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3343 Builder.CreateRetVoid();
3344 Builder.restoreIP(OldIP);
3348Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3350 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3351 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3354 Builder.getVoidTy(),
3355 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3359 "_omp_reduction_global_to_list_reduce_func", &M);
3366 Builder.SetInsertPoint(EntryBlock);
3375 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3376 BufferArg->
getName() +
".addr");
3377 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3379 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3380 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3386 Value *LocalReduceList =
3387 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3389 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3390 BufferArgAlloca, Builder.getPtrTy(),
3391 BufferArgAlloca->
getName() +
".ascast");
3392 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3393 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3394 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3395 ReduceListArgAlloca, Builder.getPtrTy(),
3396 ReduceListArgAlloca->
getName() +
".ascast");
3397 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3398 LocalReduceList, Builder.getPtrTy(),
3399 LocalReduceList->
getName() +
".ascast");
3401 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3402 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3403 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3405 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3406 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3407 Type *IndexTy = Builder.getIndexTy(
3408 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3409 for (
auto En :
enumerate(ReductionInfos)) {
3410 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3411 RedListArrayTy, ReductionList,
3412 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3415 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3416 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3417 ReductionsBufferTy, BufferVD, 0, En.index());
3418 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3423 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3424 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3425 ->addFnAttr(Attribute::NoUnwind);
3426 Builder.CreateRetVoid();
3427 Builder.restoreIP(OldIP);
3431std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3432 std::string Suffix =
3433 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3434 return (Name + Suffix).
str();
3439 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3441 {Builder.getPtrTy(), Builder.getPtrTy()},
3443 std::string
Name = getReductionFuncName(ReducerName);
3451 Builder.SetInsertPoint(EntryBB);
3455 Value *LHSArrayPtr =
nullptr;
3456 Value *RHSArrayPtr =
nullptr;
3463 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3465 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3466 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3467 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3468 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3469 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3470 Builder.CreateStore(Arg0, LHSAddrCast);
3471 Builder.CreateStore(Arg1, RHSAddrCast);
3472 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3473 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3476 Type *IndexTy = Builder.getIndexTy(
3477 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3479 for (
auto En :
enumerate(ReductionInfos)) {
3480 const ReductionInfo &RI = En.value();
3481 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3482 RedArrayTy, RHSArrayPtr,
3483 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3484 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3485 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3486 RHSI8Ptr, RI.PrivateVariable->getType(),
3487 RHSI8Ptr->
getName() +
".ascast");
3489 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3490 RedArrayTy, LHSArrayPtr,
3491 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3492 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3493 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3494 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3496 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3500 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3501 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3503 InsertPointOrErrorTy AfterIP =
3504 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3506 return AfterIP.takeError();
3507 if (!Builder.GetInsertBlock())
3508 return ReductionFunc;
3509 Builder.CreateStore(Reduced, LHSPtr);
3513 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3514 for (
auto En :
enumerate(ReductionInfos)) {
3515 unsigned Index = En.index();
3516 const ReductionInfo &RI = En.value();
3517 Value *LHSFixupPtr, *RHSFixupPtr;
3518 Builder.restoreIP(RI.ReductionGenClang(
3519 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3524 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3529 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3535 Builder.CreateRetVoid();
3536 return ReductionFunc;
3542 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3544 assert(RI.Variable &&
"expected non-null variable");
3545 assert(RI.PrivateVariable &&
"expected non-null private variable");
3546 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3547 "expected non-null reduction generator callback");
3550 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3551 "expected variables and their private equivalents to have the same "
3554 assert(RI.Variable->getType()->isPointerTy() &&
3555 "expected variables to be pointers");
3559OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3560 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3562 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3563 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3564 Value *SrcLocInfo) {
3565 if (!updateToLocation(
Loc))
3566 return InsertPointTy();
3567 Builder.restoreIP(CodeGenIP);
3574 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3575 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3578 if (ReductionInfos.
size() == 0)
3579 return Builder.saveIP();
3582 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3588 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3592 AttributeList FuncAttrs;
3593 AttrBuilder AttrBldr(Ctx);
3595 AttrBldr.addAttribute(Attr);
3596 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3597 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3599 CodeGenIP = Builder.saveIP();
3601 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3602 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3603 if (!ReductionResult)
3605 Function *ReductionFunc = *ReductionResult;
3606 Builder.restoreIP(CodeGenIP);
3609 if (GridValue.has_value())
3610 Config.setGridValue(GridValue.value());
3625 CodeGenIP = Builder.saveIP();
3626 Builder.restoreIP(AllocaIP);
3627 Value *ReductionListAlloca =
3628 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3629 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3630 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3631 Builder.restoreIP(CodeGenIP);
3632 Type *IndexTy = Builder.getIndexTy(
3633 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3634 for (
auto En :
enumerate(ReductionInfos)) {
3635 const ReductionInfo &RI = En.value();
3636 Value *ElemPtr = Builder.CreateInBoundsGEP(
3637 RedArrayTy, ReductionList,
3638 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3640 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3641 Builder.CreateStore(CastElem, ElemPtr);
3643 CodeGenIP = Builder.saveIP();
3645 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3647 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3651 Builder.restoreIP(CodeGenIP);
3653 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3655 unsigned MaxDataSize = 0;
3657 for (
auto En :
enumerate(ReductionInfos)) {
3658 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3659 if (
Size > MaxDataSize)
3661 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3663 Value *ReductionDataSize =
3664 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3665 if (!IsTeamsReduction) {
3666 Value *SarFuncCast =
3667 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
3669 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
3670 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3672 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3673 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3674 Res = Builder.CreateCall(Pv2Ptr, Args);
3676 CodeGenIP = Builder.saveIP();
3678 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3679 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3680 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3681 Function *LtGCFunc = emitListToGlobalCopyFunction(
3682 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3683 Function *LtGRFunc = emitListToGlobalReduceFunction(
3684 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3685 Function *GtLCFunc = emitGlobalToListCopyFunction(
3686 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3687 Function *GtLRFunc = emitGlobalToListReduceFunction(
3688 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3689 Builder.restoreIP(CodeGenIP);
3691 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3692 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3694 Value *Args3[] = {SrcLocInfo,
3695 KernelTeamsReductionPtr,
3696 Builder.getInt32(ReductionBufNum),
3706 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3707 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3708 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3714 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3715 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3721 emitBlock(ThenBB, CurFunc);
3724 for (
auto En :
enumerate(ReductionInfos)) {
3725 const ReductionInfo &RI = En.value();
3728 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3730 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3731 Value *LHSPtr, *RHSPtr;
3732 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3733 &LHSPtr, &RHSPtr, CurFunc));
3746 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3747 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3749 InsertPointOrErrorTy AfterIP =
3750 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3752 return AfterIP.takeError();
3753 Builder.CreateStore(Reduced,
LHS,
false);
3756 emitBlock(ExitBB, CurFunc);
3757 if (ContinuationBlock) {
3758 Builder.CreateBr(ContinuationBlock);
3759 Builder.SetInsertPoint(ContinuationBlock);
3761 Config.setEmitLLVMUsed();
3763 return Builder.saveIP();
3772 ".omp.reduction.func", &M);
3782 Builder.SetInsertPoint(ReductionFuncBlock);
3783 Value *LHSArrayPtr =
nullptr;
3784 Value *RHSArrayPtr =
nullptr;
3795 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3797 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3798 Value *LHSAddrCast =
3799 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3800 Value *RHSAddrCast =
3801 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3802 Builder.CreateStore(Arg0, LHSAddrCast);
3803 Builder.CreateStore(Arg1, RHSAddrCast);
3804 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3805 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3807 LHSArrayPtr = ReductionFunc->
getArg(0);
3808 RHSArrayPtr = ReductionFunc->
getArg(1);
3811 unsigned NumReductions = ReductionInfos.
size();
3814 for (
auto En :
enumerate(ReductionInfos)) {
3815 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3816 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3817 RedArrayTy, LHSArrayPtr, 0, En.index());
3818 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3819 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3820 LHSI8Ptr, RI.Variable->
getType());
3821 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3822 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3823 RedArrayTy, RHSArrayPtr, 0, En.index());
3824 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3825 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3826 RHSI8Ptr, RI.PrivateVariable->
getType());
3827 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3829 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3830 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3832 return AfterIP.takeError();
3834 Builder.restoreIP(*AfterIP);
3836 if (!Builder.GetInsertBlock())
3840 if (!IsByRef[En.index()])
3841 Builder.CreateStore(Reduced, LHSPtr);
3843 Builder.CreateRetVoid();
3847OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3848 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3850 bool IsNoWait,
bool IsTeamsReduction) {
3853 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3854 IsNoWait, IsTeamsReduction);
3858 if (!updateToLocation(
Loc))
3859 return InsertPointTy();
3861 if (ReductionInfos.
size() == 0)
3862 return Builder.saveIP();
3871 unsigned NumReductions = ReductionInfos.
size();
3873 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3874 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3876 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3878 for (
auto En :
enumerate(ReductionInfos)) {
3879 unsigned Index = En.index();
3880 const ReductionInfo &RI = En.value();
3881 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3882 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3883 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3888 Type *IndexTy = Builder.getIndexTy(
3889 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3890 Function *
Func = Builder.GetInsertBlock()->getParent();
3893 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3894 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3895 return RI.AtomicReductionGen;
3897 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3899 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3901 Value *ThreadId = getOrCreateThreadID(Ident);
3902 Constant *NumVariables = Builder.getInt32(NumReductions);
3904 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3905 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3907 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3908 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3909 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3910 : RuntimeFunction::OMPRTL___kmpc_reduce);
3912 Builder.CreateCall(ReduceFunc,
3913 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3914 ReductionFunc, Lock},
3925 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3926 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3927 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3932 Builder.SetInsertPoint(NonAtomicRedBlock);
3933 for (
auto En :
enumerate(ReductionInfos)) {
3934 const ReductionInfo &RI = En.value();
3938 Value *RedValue = RI.Variable;
3939 if (!IsByRef[En.index()]) {
3940 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3941 "red.value." +
Twine(En.index()));
3943 Value *PrivateRedValue =
3944 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3945 "red.private.value." +
Twine(En.index()));
3947 InsertPointOrErrorTy AfterIP =
3948 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3950 return AfterIP.takeError();
3951 Builder.restoreIP(*AfterIP);
3953 if (!Builder.GetInsertBlock())
3954 return InsertPointTy();
3956 if (!IsByRef[En.index()])
3957 Builder.CreateStore(Reduced, RI.Variable);
3959 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3960 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3961 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3962 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3963 Builder.CreateBr(ContinuationBlock);
3968 Builder.SetInsertPoint(AtomicRedBlock);
3969 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3970 for (
const ReductionInfo &RI : ReductionInfos) {
3971 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3972 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3974 return AfterIP.takeError();
3975 Builder.restoreIP(*AfterIP);
3976 if (!Builder.GetInsertBlock())
3977 return InsertPointTy();
3979 Builder.CreateBr(ContinuationBlock);
3981 Builder.CreateUnreachable();
3992 if (!Builder.GetInsertBlock())
3993 return InsertPointTy();
3995 Builder.SetInsertPoint(ContinuationBlock);
3996 return Builder.saveIP();
3999OpenMPIRBuilder::InsertPointOrErrorTy
4000OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4001 BodyGenCallbackTy BodyGenCB,
4002 FinalizeCallbackTy FiniCB) {
4003 if (!updateToLocation(
Loc))
4006 Directive OMPD = Directive::OMPD_master;
4008 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4009 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4010 Value *ThreadId = getOrCreateThreadID(Ident);
4013 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4014 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4016 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4017 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4019 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4023OpenMPIRBuilder::InsertPointOrErrorTy
4024OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4025 BodyGenCallbackTy BodyGenCB,
4027 if (!updateToLocation(
Loc))
4030 Directive OMPD = Directive::OMPD_masked;
4032 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4033 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4034 Value *ThreadId = getOrCreateThreadID(Ident);
4036 Value *ArgsEnd[] = {Ident, ThreadId};
4038 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4039 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4041 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4042 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4044 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4054 Call->setDoesNotThrow();
4066OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4067 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4069 bool IsInclusive, ScanInfo *ScanRedInfo) {
4070 if (ScanRedInfo->OMPFirstScanLoop) {
4071 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4072 ScanVarsType, ScanRedInfo);
4076 if (!updateToLocation(
Loc))
4081 if (ScanRedInfo->OMPFirstScanLoop) {
4083 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4084 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4085 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4086 Type *DestTy = ScanVarsType[i];
4087 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4088 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4090 Builder.CreateStore(Src, Val);
4093 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4094 emitBlock(ScanRedInfo->OMPScanDispatch,
4095 Builder.GetInsertBlock()->getParent());
4097 if (!ScanRedInfo->OMPFirstScanLoop) {
4098 IV = ScanRedInfo->IV;
4101 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4102 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4103 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4104 Type *DestTy = ScanVarsType[i];
4106 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4107 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4108 Builder.CreateStore(Src, ScanVars[i]);
4114 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4115 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4116 ScanRedInfo->OMPAfterScanBlock);
4118 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4119 ScanRedInfo->OMPBeforeScanBlock);
4121 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4122 Builder.GetInsertBlock()->getParent());
4123 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4124 return Builder.saveIP();
4127Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4131 Builder.restoreIP(AllocaIP);
4133 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4135 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4136 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4140 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4141 InsertPointTy CodeGenIP) ->
Error {
4142 Builder.restoreIP(CodeGenIP);
4144 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4145 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4149 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4150 AllocSpan,
nullptr,
"arr");
4151 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4159 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4161 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4162 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4165 return AfterIP.takeError();
4166 Builder.restoreIP(*AfterIP);
4167 BasicBlock *InputBB = Builder.GetInsertBlock();
4169 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4170 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4172 return AfterIP.takeError();
4173 Builder.restoreIP(*AfterIP);
4178Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4180 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4181 InsertPointTy CodeGenIP) ->
Error {
4182 Builder.restoreIP(CodeGenIP);
4183 for (ReductionInfo RedInfo : ReductionInfos) {
4184 Value *PrivateVar = RedInfo.PrivateVariable;
4185 Value *OrigVar = RedInfo.Variable;
4186 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4187 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4189 Type *SrcTy = RedInfo.ElementType;
4190 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4192 Value *Src = Builder.CreateLoad(SrcTy, Val);
4194 Builder.CreateStore(Src, OrigVar);
4195 Builder.CreateFree(Buff);
4203 if (ScanRedInfo->OMPScanFinish->getTerminator())
4204 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4206 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4209 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4210 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4213 return AfterIP.takeError();
4214 Builder.restoreIP(*AfterIP);
4215 BasicBlock *InputBB = Builder.GetInsertBlock();
4217 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4218 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4220 return AfterIP.takeError();
4221 Builder.restoreIP(*AfterIP);
4225OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4226 const LocationDescription &
Loc,
4228 ScanInfo *ScanRedInfo) {
4230 if (!updateToLocation(
Loc))
4232 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4233 InsertPointTy CodeGenIP) ->
Error {
4234 Builder.restoreIP(CodeGenIP);
4240 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4242 Builder.GetInsertBlock()->getModule(),
4246 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4249 Builder.GetInsertBlock()->getModule(),
4252 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4255 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4256 Builder.SetInsertPoint(InputBB);
4257 Builder.CreateBr(LoopBB);
4258 emitBlock(LoopBB, CurFn);
4259 Builder.SetInsertPoint(LoopBB);
4261 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4263 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4264 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4266 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4274 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4275 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4276 emitBlock(InnerLoopBB, CurFn);
4277 Builder.SetInsertPoint(InnerLoopBB);
4278 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4280 for (ReductionInfo RedInfo : ReductionInfos) {
4281 Value *ReductionVal = RedInfo.PrivateVariable;
4282 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4283 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4284 Type *DestTy = RedInfo.ElementType;
4285 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4287 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4288 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4290 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4291 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4292 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4294 InsertPointOrErrorTy AfterIP =
4295 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4297 return AfterIP.takeError();
4298 Builder.CreateStore(Result, LHSPtr);
4301 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4302 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4303 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4304 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4305 emitBlock(InnerExitBB, CurFn);
4307 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4310 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4311 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4313 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4323 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4324 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4327 return AfterIP.takeError();
4328 Builder.restoreIP(*AfterIP);
4329 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4332 return AfterIP.takeError();
4333 Builder.restoreIP(*AfterIP);
4334 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4341Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4344 ScanInfo *ScanRedInfo) {
4352 ScanRedInfo->OMPFirstScanLoop =
true;
4353 Error Err = InputLoopGen();
4363 ScanRedInfo->OMPFirstScanLoop =
false;
4364 Error Err = ScanLoopGen(Builder.saveIP());
4371void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4372 Function *
Fun = Builder.GetInsertBlock()->getParent();
4373 ScanRedInfo->OMPScanDispatch =
4375 ScanRedInfo->OMPAfterScanBlock =
4377 ScanRedInfo->OMPBeforeScanBlock =
4379 ScanRedInfo->OMPScanLoopExit =
4382CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4406 Builder.SetCurrentDebugLocation(
DL);
4408 Builder.SetInsertPoint(Preheader);
4409 Builder.CreateBr(Header);
4411 Builder.SetInsertPoint(Header);
4412 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4413 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4414 Builder.CreateBr(
Cond);
4416 Builder.SetInsertPoint(
Cond);
4418 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4419 Builder.CreateCondBr(Cmp, Body, Exit);
4421 Builder.SetInsertPoint(Body);
4422 Builder.CreateBr(Latch);
4424 Builder.SetInsertPoint(Latch);
4425 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4426 "omp_" + Name +
".next",
true);
4427 Builder.CreateBr(Header);
4430 Builder.SetInsertPoint(Exit);
4431 Builder.CreateBr(After);
4434 LoopInfos.emplace_front();
4435 CanonicalLoopInfo *CL = &LoopInfos.front();
4437 CL->Header = Header;
4449OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4450 LoopBodyGenCallbackTy BodyGenCB,
4455 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4456 NextBB, NextBB, Name);
4460 if (updateToLocation(
Loc)) {
4464 spliceBB(Builder, After,
false);
4465 Builder.CreateBr(CL->getPreheader());
4470 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4480 ScanInfos.emplace_front();
4481 ScanInfo *
Result = &ScanInfos.front();
4486OpenMPIRBuilder::createCanonicalScanLoops(
4487 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4488 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4489 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4490 LocationDescription ComputeLoc =
4491 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4492 updateToLocation(ComputeLoc);
4496 Value *TripCount = calculateCanonicalLoopTripCount(
4497 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4498 ScanRedInfo->Span = TripCount;
4499 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4500 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4502 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4503 Builder.restoreIP(CodeGenIP);
4504 ScanRedInfo->IV =
IV;
4505 createScanBBs(ScanRedInfo);
4506 BasicBlock *InputBlock = Builder.GetInsertBlock();
4510 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4511 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4512 Builder.GetInsertBlock()->getParent());
4513 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4514 emitBlock(ScanRedInfo->OMPScanLoopExit,
4515 Builder.GetInsertBlock()->getParent());
4516 Builder.CreateBr(ContinueBlock);
4517 Builder.SetInsertPoint(
4518 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4519 return BodyGenCB(Builder.saveIP(),
IV);
4522 const auto &&InputLoopGen = [&]() ->
Error {
4524 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4525 ComputeIP, Name,
true, ScanRedInfo);
4529 Builder.restoreIP((*LoopInfo)->getAfterIP());
4532 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4534 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4535 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4539 Builder.restoreIP((*LoopInfo)->getAfterIP());
4540 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4543 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4549Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4551 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4561 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4562 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4564 updateToLocation(
Loc);
4581 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4582 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4583 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4584 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4585 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4586 ZeroCmp = Builder.CreateICmp(
4589 Span = Builder.CreateSub(Stop, Start,
"",
true);
4590 ZeroCmp = Builder.CreateICmp(
4594 Value *CountIfLooping;
4595 if (InclusiveStop) {
4596 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4599 Value *CountIfTwo = Builder.CreateAdd(
4600 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4602 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4605 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4606 "omp_" + Name +
".tripcount");
4610 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4611 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4612 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4613 ScanInfo *ScanRedInfo) {
4614 LocationDescription ComputeLoc =
4615 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4617 Value *TripCount = calculateCanonicalLoopTripCount(
4618 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4620 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4621 Builder.restoreIP(CodeGenIP);
4622 Value *Span = Builder.CreateMul(
IV, Step);
4623 Value *IndVar = Builder.CreateAdd(Span, Start);
4625 ScanRedInfo->IV = IndVar;
4626 return BodyGenCB(Builder.saveIP(), IndVar);
4628 LocationDescription LoopLoc =
4631 : LocationDescription(Builder.saveIP(),
4632 Builder.getCurrentDebugLocation());
4633 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4642 OpenMPIRBuilder &OMPBuilder) {
4643 unsigned Bitwidth = Ty->getIntegerBitWidth();
4645 return OMPBuilder.getOrCreateRuntimeFunction(
4646 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4648 return OMPBuilder.getOrCreateRuntimeFunction(
4649 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4658 OpenMPIRBuilder &OMPBuilder) {
4659 unsigned Bitwidth = Ty->getIntegerBitWidth();
4661 return OMPBuilder.getOrCreateRuntimeFunction(
4662 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4664 return OMPBuilder.getOrCreateRuntimeFunction(
4665 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4669OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4670 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4672 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4674 "Require dedicated allocate IP");
4677 Builder.restoreIP(CLI->getPreheaderIP());
4678 Builder.SetCurrentDebugLocation(
DL);
4681 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4682 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4686 Type *IVTy =
IV->getType();
4688 LoopType == WorksharingLoopType::DistributeForStaticLoop
4692 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4695 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4698 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4699 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4700 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4701 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4702 CLI->setLastIter(PLastIter);
4708 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4710 Constant *One = ConstantInt::get(IVTy, 1);
4711 Builder.CreateStore(Zero, PLowerBound);
4712 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4713 Builder.CreateStore(UpperBound, PUpperBound);
4714 Builder.CreateStore(One, PStride);
4716 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4719 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4720 ? OMPScheduleType::OrderedDistribute
4723 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4728 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4729 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4730 Value *PDistUpperBound =
4731 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4732 Args.push_back(PDistUpperBound);
4735 Builder.CreateCall(StaticInit, Args);
4736 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4737 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4738 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4739 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4740 CLI->setTripCount(TripCount);
4747 Builder.SetInsertPoint(CLI->getBody(),
4748 CLI->getBody()->getFirstInsertionPt());
4749 Builder.SetCurrentDebugLocation(
DL);
4750 return Builder.CreateAdd(OldIV, LowerBound);
4754 Builder.SetInsertPoint(CLI->getExit(),
4755 CLI->getExit()->getTerminator()->getIterator());
4756 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4760 InsertPointOrErrorTy BarrierIP =
4761 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4762 omp::Directive::OMPD_for,
false,
4765 return BarrierIP.takeError();
4768 InsertPointTy AfterIP = CLI->getAfterIP();
4774OpenMPIRBuilder::InsertPointOrErrorTy
4775OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4776 CanonicalLoopInfo *CLI,
4777 InsertPointTy AllocaIP,
4780 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4781 assert(ChunkSize &&
"Chunk size is required");
4783 LLVMContext &Ctx = CLI->getFunction()->getContext();
4785 Value *OrigTripCount = CLI->getTripCount();
4786 Type *IVTy =
IV->getType();
4788 "Max supported tripcount bitwidth is 64 bits");
4790 :
Type::getInt64Ty(Ctx);
4793 Constant *One = ConstantInt::get(InternalIVTy, 1);
4799 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4802 Builder.restoreIP(AllocaIP);
4803 Builder.SetCurrentDebugLocation(
DL);
4804 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4805 Value *PLowerBound =
4806 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4807 Value *PUpperBound =
4808 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4809 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4810 CLI->setLastIter(PLastIter);
4813 Builder.restoreIP(CLI->getPreheaderIP());
4814 Builder.SetCurrentDebugLocation(
DL);
4817 Value *CastedChunkSize =
4818 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4819 Value *CastedTripCount =
4820 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4822 Constant *SchedulingType = ConstantInt::get(
4823 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4824 Builder.CreateStore(Zero, PLowerBound);
4825 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4826 Builder.CreateStore(OrigUpperBound, PUpperBound);
4827 Builder.CreateStore(One, PStride);
4832 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4833 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4834 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4835 Builder.CreateCall(StaticInit,
4837 SchedulingType, PLastIter,
4838 PLowerBound, PUpperBound,
4843 Value *FirstChunkStart =
4844 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4845 Value *FirstChunkStop =
4846 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4847 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4849 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4850 Value *NextChunkStride =
4851 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4854 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4855 Value *DispatchCounter;
4860 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4861 {Builder.saveIP(),
DL},
4862 [&](InsertPointTy BodyIP,
Value *Counter) {
4863 DispatchCounter = Counter;
4866 FirstChunkStart, CastedTripCount, NextChunkStride,
4872 BasicBlock *DispatchBody = DispatchCLI->getBody();
4873 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4874 BasicBlock *DispatchExit = DispatchCLI->getExit();
4875 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4876 DispatchCLI->invalidate();
4884 Builder.restoreIP(CLI->getPreheaderIP());
4885 Builder.SetCurrentDebugLocation(
DL);
4888 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4889 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4890 Value *IsLastChunk =
4891 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4892 Value *CountUntilOrigTripCount =
4893 Builder.CreateSub(CastedTripCount, DispatchCounter);
4894 Value *ChunkTripCount = Builder.CreateSelect(
4895 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4896 Value *BackcastedChunkTC =
4897 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4898 CLI->setTripCount(BackcastedChunkTC);
4903 Value *BackcastedDispatchCounter =
4904 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4906 Builder.restoreIP(CLI->getBodyIP());
4907 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4912 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4916 InsertPointOrErrorTy AfterIP =
4917 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4920 return AfterIP.takeError();
4938 unsigned Bitwidth = Ty->getIntegerBitWidth();
4939 Module &M = OMPBuilder->M;
4941 case WorksharingLoopType::ForStaticLoop:
4943 return OMPBuilder->getOrCreateRuntimeFunction(
4944 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4946 return OMPBuilder->getOrCreateRuntimeFunction(
4947 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4949 case WorksharingLoopType::DistributeStaticLoop:
4951 return OMPBuilder->getOrCreateRuntimeFunction(
4952 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4954 return OMPBuilder->getOrCreateRuntimeFunction(
4955 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4957 case WorksharingLoopType::DistributeForStaticLoop:
4959 return OMPBuilder->getOrCreateRuntimeFunction(
4960 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4962 return OMPBuilder->getOrCreateRuntimeFunction(
4963 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4966 if (Bitwidth != 32 && Bitwidth != 64) {
4980 Module &M = OMPBuilder->M;
4989 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4990 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4991 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
4992 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4993 Builder.CreateCall(RTLFn, RealArgs);
4996 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
4997 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4998 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4999 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5002 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5003 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5004 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5005 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5007 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5009 Builder.CreateCall(RTLFn, RealArgs);
5013 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5018 Value *TripCount = CLI->getTripCount();
5024 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5025 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5030 Builder.restoreIP({Preheader, Preheader->
end()});
5033 Builder.CreateBr(CLI->getExit());
5036 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5039 CleanUpInfo.EntryBB = CLI->getHeader();
5040 CleanUpInfo.ExitBB = CLI->getExit();
5041 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5049 "Expected unique undroppable user of outlined function");
5051 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5053 "Expected outlined function call to be located in loop preheader");
5055 if (OutlinedFnCallInstruction->
arg_size() > 1)
5062 LoopBodyArg, TripCount, OutlinedFn);
5064 for (
auto &ToBeDeletedItem : ToBeDeleted)
5065 ToBeDeletedItem->eraseFromParent();
5069OpenMPIRBuilder::InsertPointTy
5070OpenMPIRBuilder::applyWorkshareLoopTarget(
DebugLoc DL, CanonicalLoopInfo *CLI,
5071 InsertPointTy AllocaIP,
5074 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5075 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5078 OI.OuterAllocaBB = CLI->getPreheader();
5084 OI.OuterAllocaBB = AllocaIP.getBlock();
5087 OI.EntryBB = CLI->getBody();
5088 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5089 "omp.prelatch",
true);
5092 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5096 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5098 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5109 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5120 CLI->getPreheader(),
5129 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5135 CLI->getIndVar()->user_end());
5138 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5139 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5145 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5152 OI.PostOutlineCB = [=, ToBeDeletedVec =
5153 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5157 addOutlineInfo(std::move(OI));
5158 return CLI->getAfterIP();
5161OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5162 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5163 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5164 bool HasSimdModifier,
bool HasMonotonicModifier,
5165 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5167 if (Config.isTargetDevice())
5168 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
5170 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5171 HasNonmonotonicModifier, HasOrderedClause);
5173 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5174 OMPScheduleType::ModifierOrdered;
5175 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5176 case OMPScheduleType::BaseStatic:
5177 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5179 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5180 NeedsBarrier, ChunkSize);
5182 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5184 case OMPScheduleType::BaseStaticChunked:
5186 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5187 NeedsBarrier, ChunkSize);
5189 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5192 case OMPScheduleType::BaseRuntime:
5193 case OMPScheduleType::BaseAuto:
5194 case OMPScheduleType::BaseGreedy:
5195 case OMPScheduleType::BaseBalanced:
5196 case OMPScheduleType::BaseSteal:
5197 case OMPScheduleType::BaseGuidedSimd:
5198 case OMPScheduleType::BaseRuntimeSimd:
5200 "schedule type does not support user-defined chunk sizes");
5202 case OMPScheduleType::BaseDynamicChunked:
5203 case OMPScheduleType::BaseGuidedChunked:
5204 case OMPScheduleType::BaseGuidedIterativeChunked:
5205 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5206 case OMPScheduleType::BaseStaticBalancedChunked:
5207 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5208 NeedsBarrier, ChunkSize);
5221 unsigned Bitwidth = Ty->getIntegerBitWidth();
5223 return OMPBuilder.getOrCreateRuntimeFunction(
5224 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5226 return OMPBuilder.getOrCreateRuntimeFunction(
5227 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5237 unsigned Bitwidth = Ty->getIntegerBitWidth();
5239 return OMPBuilder.getOrCreateRuntimeFunction(
5240 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5242 return OMPBuilder.getOrCreateRuntimeFunction(
5243 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5252 unsigned Bitwidth = Ty->getIntegerBitWidth();
5254 return OMPBuilder.getOrCreateRuntimeFunction(
5255 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5257 return OMPBuilder.getOrCreateRuntimeFunction(
5258 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5262OpenMPIRBuilder::InsertPointOrErrorTy
5263OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5264 InsertPointTy AllocaIP,
5266 bool NeedsBarrier,
Value *Chunk) {
5267 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5269 "Require dedicated allocate IP");
5271 "Require valid schedule type");
5273 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5274 OMPScheduleType::ModifierOrdered;
5277 Builder.SetCurrentDebugLocation(
DL);
5280 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5281 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5285 Type *IVTy =
IV->getType();
5290 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5292 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5293 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5294 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5295 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5296 CLI->setLastIter(PLastIter);
5304 Constant *One = ConstantInt::get(IVTy, 1);
5305 Builder.CreateStore(One, PLowerBound);
5306 Value *UpperBound = CLI->getTripCount();
5307 Builder.CreateStore(UpperBound, PUpperBound);
5308 Builder.CreateStore(One, PStride);
5314 InsertPointTy AfterIP = CLI->getAfterIP();
5322 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5325 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5328 Builder.CreateCall(DynamicInit,
5329 {SrcLoc, ThreadNum, SchedulingType, One,
5330 UpperBound, One, Chunk});
5339 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5340 PLowerBound, PUpperBound, PStride});
5341 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5344 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5345 Builder.CreateCondBr(MoreWork, Header, Exit);
5351 PI->setIncomingBlock(0, OuterCond);
5352 PI->setIncomingValue(0, LowerBound);
5357 Br->setSuccessor(0, OuterCond);
5362 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5363 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5370 assert(BI->getSuccessor(1) == Exit);
5371 BI->setSuccessor(1, OuterCond);
5375 Builder.SetInsertPoint(&Latch->
back());
5377 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5382 Builder.SetInsertPoint(&
Exit->back());
5383 InsertPointOrErrorTy BarrierIP =
5384 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5385 omp::Directive::OMPD_for,
false,
5388 return BarrierIP.takeError();
5407 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5412 if (BBsToErase.
count(UseInst->getParent()))
5419 while (BBsToErase.
remove_if(HasRemainingUses)) {
5429 InsertPointTy ComputeIP) {
5430 assert(
Loops.size() >= 1 &&
"At least one loop required");
5431 size_t NumLoops =
Loops.size();
5435 return Loops.front();
5437 CanonicalLoopInfo *Outermost =
Loops.front();
5438 CanonicalLoopInfo *Innermost =
Loops.back();
5439 BasicBlock *OrigPreheader = Outermost->getPreheader();
5440 BasicBlock *OrigAfter = Outermost->getAfter();
5447 Loop->collectControlBlocks(OldControlBBs);
5450 Builder.SetCurrentDebugLocation(
DL);
5451 if (ComputeIP.isSet())
5452 Builder.restoreIP(ComputeIP);
5454 Builder.restoreIP(Outermost->getPreheaderIP());
5458 Value *CollapsedTripCount =
nullptr;
5459 for (CanonicalLoopInfo *L :
Loops) {
5461 "All loops to collapse must be valid canonical loops");
5462 Value *OrigTripCount =
L->getTripCount();
5463 if (!CollapsedTripCount) {
5464 CollapsedTripCount = OrigTripCount;
5469 CollapsedTripCount = Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5473 CanonicalLoopInfo *
Result =
5474 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5475 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5481 Builder.restoreIP(
Result->getBodyIP());
5485 NewIndVars.
resize(NumLoops);
5486 for (
int i = NumLoops - 1; i >= 1; --i) {
5487 Value *OrigTripCount =
Loops[i]->getTripCount();
5489 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5490 NewIndVars[i] = NewIndVar;
5492 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5495 NewIndVars[0] = Leftover;
5506 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5513 ContinueBlock =
nullptr;
5514 ContinuePred = NextSrc;
5521 for (
size_t i = 0; i < NumLoops - 1; ++i)
5522 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5525 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5528 for (
size_t i = NumLoops - 1; i > 0; --i)
5529 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5532 ContinueWith(
Result->getLatch(),
nullptr);
5539 for (
size_t i = 0; i < NumLoops; ++i)
5540 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5545 for (CanonicalLoopInfo *L :
Loops)
5554std::vector<CanonicalLoopInfo *>
5558 "Must pass as many tile sizes as there are loops");
5559 int NumLoops =
Loops.size();
5560 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5562 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5563 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5564 Function *
F = OutermostLoop->getBody()->getParent();
5565 BasicBlock *InnerEnter = InnermostLoop->getBody();
5566 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5572 Loop->collectControlBlocks(OldControlBBs);
5579 for (CanonicalLoopInfo *L :
Loops) {
5580 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5592 for (
int i = 0; i < NumLoops - 1; ++i) {
5593 CanonicalLoopInfo *Surrounding =
Loops[i];
5596 BasicBlock *EnterBB = Surrounding->getBody();
5602 Builder.SetCurrentDebugLocation(
DL);
5603 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5605 for (
int i = 0; i < NumLoops; ++i) {
5607 Value *OrigTripCount = OrigTripCounts[i];
5610 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5611 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5620 Value *FloorTripOverflow =
5621 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5623 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5624 Value *FloorTripCount =
5625 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5626 "omp_floor" +
Twine(i) +
".tripcount",
true);
5629 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5635 std::vector<CanonicalLoopInfo *>
Result;
5636 Result.reserve(NumLoops * 2);
5640 BasicBlock *Enter = OutermostLoop->getPreheader();
5647 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5649 auto EmbeddNewLoop =
5650 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5652 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5653 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5658 Enter = EmbeddedLoop->getBody();
5659 Continue = EmbeddedLoop->getLatch();
5660 OutroInsertBefore = EmbeddedLoop->getLatch();
5661 return EmbeddedLoop;
5665 const Twine &NameBase) {
5667 CanonicalLoopInfo *EmbeddedLoop =
5668 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5669 Result.push_back(EmbeddedLoop);
5673 EmbeddNewLoops(FloorCount,
"floor");
5677 Builder.SetInsertPoint(Enter->getTerminator());
5679 for (
int i = 0; i < NumLoops; ++i) {
5680 CanonicalLoopInfo *FloorLoop =
Result[i];
5683 Value *FloorIsEpilogue =
5684 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5685 Value *TileTripCount =
5686 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5692 EmbeddNewLoops(TileCounts,
"tile");
5697 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5706 BodyEnter =
nullptr;
5707 BodyEntered = ExitBB;
5719 Builder.restoreIP(
Result.back()->getBodyIP());
5720 for (
int i = 0; i < NumLoops; ++i) {
5721 CanonicalLoopInfo *FloorLoop =
Result[i];
5722 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5723 Value *OrigIndVar = OrigIndVars[i];
5727 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5729 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5736 for (CanonicalLoopInfo *L :
Loops)
5740 for (CanonicalLoopInfo *GenL : Result)
5751 if (Properties.
empty())
5774 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5778 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5786 if (
I.mayReadOrWriteMemory()) {
5790 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5795void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5802void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5810void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5813 const Twine &NamePrefix) {
5814 Function *
F = CanonicalLoop->getFunction();
5836 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5842 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5844 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5847 Builder.SetInsertPoint(SplitBeforeIt);
5849 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5852 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5855 Builder.SetInsertPoint(ElseBlock);
5861 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5863 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5869 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5871 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5878 if (
Block == ThenBlock)
5879 NewBB->
setName(NamePrefix +
".if.else");
5882 VMap[
Block] = NewBB;
5886 Builder.CreateBr(NewBlocks.
front());
5890 L->getLoopLatch()->splitBasicBlock(
5891 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5895 L->addBasicBlockToLoop(ThenBlock, LI);
5899OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5901 if (TargetTriple.
isX86()) {
5902 if (Features.
lookup(
"avx512f"))
5904 else if (Features.
lookup(
"avx"))
5908 if (TargetTriple.
isPPC())
5910 if (TargetTriple.
isWasm())
5915void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5917 Value *IfCond, OrderKind Order,
5921 Function *
F = CanonicalLoop->getFunction();
5936 if (AlignedVars.
size()) {
5937 InsertPointTy IP = Builder.saveIP();
5938 for (
auto &AlignedItem : AlignedVars) {
5939 Value *AlignedPtr = AlignedItem.first;
5940 Value *Alignment = AlignedItem.second;
5943 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5946 Builder.restoreIP(IP);
5951 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5961 if (
Block == CanonicalLoop->getCond() ||
5962 Block == CanonicalLoop->getHeader())
5964 Reachable.insert(
Block);
5974 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5982 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5998 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6000 if (Simdlen || Safelen) {
6004 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6030static std::unique_ptr<TargetMachine>
6034 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6035 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6046 std::nullopt, OptLevel));
6070 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6071 FAM.registerPass([&]() {
return TIRA; });
6085 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6090 nullptr, ORE,
static_cast<int>(OptLevel),
6111 <<
" Threshold=" << UP.
Threshold <<
"\n"
6114 <<
" PartialOptSizeThreshold="
6134 Ptr = Load->getPointerOperand();
6136 Ptr = Store->getPointerOperand();
6140 Ptr =
Ptr->stripPointerCasts();
6143 if (Alloca->getParent() == &
F->getEntryBlock())
6163 int MaxTripCount = 0;
6164 bool MaxOrZero =
false;
6165 unsigned TripMultiple = 0;
6167 bool UseUpperBound =
false;
6169 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6171 unsigned Factor = UP.
Count;
6172 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6180void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6182 CanonicalLoopInfo **UnrolledCLI) {
6183 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6199 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6212 *UnrolledCLI =
Loop;
6217 "unrolling only makes sense with a factor of 2 or larger");
6219 Type *IndVarTy =
Loop->getIndVarType();
6226 std::vector<CanonicalLoopInfo *>
LoopNest =
6227 tileLoops(
DL, {
Loop}, {FactorVal});
6230 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6241 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6244 (*UnrolledCLI)->assertOK();
6248OpenMPIRBuilder::InsertPointTy
6249OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6252 if (!updateToLocation(
Loc))
6256 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6257 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6258 Value *ThreadId = getOrCreateThreadID(Ident);
6260 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6262 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6264 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6265 Builder.CreateCall(Fn, Args);
6267 return Builder.saveIP();
6270OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6271 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6275 if (!updateToLocation(
Loc))
6281 if (!CPVars.
empty()) {
6283 Builder.CreateStore(Builder.getInt32(0), DidIt);
6286 Directive OMPD = Directive::OMPD_single;
6288 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6289 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6290 Value *ThreadId = getOrCreateThreadID(Ident);
6293 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6294 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6296 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6297 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6299 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6300 if (
Error Err = FiniCB(IP))
6307 Builder.CreateStore(Builder.getInt32(1), DidIt);
6320 InsertPointOrErrorTy AfterIP =
6321 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6325 return AfterIP.takeError();
6328 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6330 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6331 ConstantInt::get(
Int64, 0), CPVars[
I],
6334 }
else if (!IsNowait) {
6335 InsertPointOrErrorTy AfterIP =
6336 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6337 omp::Directive::OMPD_unknown,
false,
6340 return AfterIP.takeError();
6342 return Builder.saveIP();
6345OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6346 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6347 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6349 if (!updateToLocation(
Loc))
6352 Directive OMPD = Directive::OMPD_critical;
6354 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6355 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6356 Value *ThreadId = getOrCreateThreadID(Ident);
6357 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6358 Value *
Args[] = {Ident, ThreadId, LockVar};
6364 EnterArgs.push_back(HintInst);
6365 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6367 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6369 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6372 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6373 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6375 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6379OpenMPIRBuilder::InsertPointTy
6380OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6381 InsertPointTy AllocaIP,
unsigned NumLoops,
6383 const Twine &Name,
bool IsDependSource) {
6387 "OpenMP runtime requires depend vec with i64 type");
6389 if (!updateToLocation(
Loc))
6394 Builder.restoreIP(AllocaIP);
6395 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6397 updateToLocation(
Loc);
6400 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6401 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6402 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6403 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6407 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6408 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6411 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6412 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6413 Value *ThreadId = getOrCreateThreadID(Ident);
6414 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6418 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6420 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6421 Builder.CreateCall(RTLFn, Args);
6423 return Builder.saveIP();
6426OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6427 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6428 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6429 if (!updateToLocation(
Loc))
6432 Directive OMPD = Directive::OMPD_ordered;
6438 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6439 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6440 Value *ThreadId = getOrCreateThreadID(Ident);
6443 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6444 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6447 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6448 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6451 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6455OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6457 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6458 bool HasFinalize,
bool IsCancellable) {
6461 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6465 BasicBlock *EntryBB = Builder.GetInsertBlock();
6474 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6477 if (
Error Err = BodyGenCB( InsertPointTy(),
6485 "Unexpected control flow graph state!!");
6486 InsertPointOrErrorTy AfterIP =
6487 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6489 return AfterIP.takeError();
6491 "Unexpected Control Flow State!");
6497 "Unexpected Insertion point location!");
6500 auto InsertBB = merged ? ExitPredBB : ExitBB;
6503 Builder.SetInsertPoint(InsertBB);
6505 return Builder.saveIP();
6508OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6511 if (!Conditional || !EntryCall)
6512 return Builder.saveIP();
6514 BasicBlock *EntryBB = Builder.GetInsertBlock();
6515 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6527 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6529 Builder.SetInsertPoint(UI);
6530 Builder.Insert(EntryBBTI);
6531 UI->eraseFromParent();
6538OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6539 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6542 Builder.restoreIP(FinIP);
6546 assert(!FinalizationStack.empty() &&
6547 "Unexpected finalization stack state!");
6549 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6550 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6552 if (
Error Err = Fi.FiniCB(FinIP))
6559 Builder.SetInsertPoint(FiniBBTI);
6563 return Builder.saveIP();
6567 Builder.Insert(ExitCall);
6573OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6574 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6603 "copyin.not.master.end");
6610 Builder.SetInsertPoint(OMP_Entry);
6611 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6612 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6613 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6614 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6616 Builder.SetInsertPoint(CopyBegin);
6618 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6620 return Builder.saveIP();
6623CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6627 updateToLocation(
Loc);
6630 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6631 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6632 Value *ThreadId = getOrCreateThreadID(Ident);
6635 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6637 return Builder.CreateCall(Fn, Args, Name);
6640CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6644 updateToLocation(
Loc);
6647 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6648 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6649 Value *ThreadId = getOrCreateThreadID(Ident);
6651 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6652 return Builder.CreateCall(Fn, Args, Name);
6655CallInst *OpenMPIRBuilder::createOMPInteropInit(
6656 const LocationDescription &
Loc,
Value *InteropVar,
6658 Value *DependenceAddress,
bool HaveNowaitClause) {
6660 updateToLocation(
Loc);
6663 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6664 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6665 Value *ThreadId = getOrCreateThreadID(Ident);
6666 if (Device ==
nullptr)
6668 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6669 if (NumDependences ==
nullptr) {
6670 NumDependences = ConstantInt::get(
Int32, 0);
6674 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6676 Ident, ThreadId, InteropVar, InteropTypeVal,
6677 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6679 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6681 return Builder.CreateCall(Fn, Args);
6684CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6685 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6686 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6688 updateToLocation(
Loc);
6691 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6692 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6693 Value *ThreadId = getOrCreateThreadID(Ident);
6694 if (Device ==
nullptr)
6696 if (NumDependences ==
nullptr) {
6697 NumDependences = ConstantInt::get(
Int32, 0);
6701 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6703 Ident, ThreadId, InteropVar,
Device,
6704 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6706 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6708 return Builder.CreateCall(Fn, Args);
6711CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6713 Value *NumDependences,
6714 Value *DependenceAddress,
6715 bool HaveNowaitClause) {
6717 updateToLocation(
Loc);
6719 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6720 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6721 Value *ThreadId = getOrCreateThreadID(Ident);
6722 if (Device ==
nullptr)
6724 if (NumDependences ==
nullptr) {
6725 NumDependences = ConstantInt::get(
Int32, 0);
6729 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6731 Ident, ThreadId, InteropVar,
Device,
6732 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6734 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6736 return Builder.CreateCall(Fn, Args);
6739CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6743 updateToLocation(
Loc);
6746 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6747 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6748 Value *ThreadId = getOrCreateThreadID(Ident);
6750 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6754 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6756 return Builder.CreateCall(Fn, Args);
6759OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6760 const LocationDescription &
Loc,
6761 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6763 "expected num_threads and num_teams to be specified");
6765 if (!updateToLocation(
Loc))
6769 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6770 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6782 const std::string DebugPrefix =
"_debug__";
6783 if (KernelName.
ends_with(DebugPrefix)) {
6784 KernelName = KernelName.
drop_back(DebugPrefix.length());
6785 Kernel = M.getFunction(KernelName);
6791 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6796 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6797 if (MaxThreadsVal < 0)
6798 MaxThreadsVal = std::max(
6801 if (MaxThreadsVal > 0)
6802 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6813 Function *Fn = getOrCreateRuntimeFunctionPtr(
6814 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6817 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6818 Constant *DynamicEnvironmentInitializer =
6822 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6824 DL.getDefaultGlobalsAddressSpace());
6828 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6829 ? DynamicEnvironmentGV
6831 DynamicEnvironmentPtr);
6834 ConfigurationEnvironment, {
6835 UseGenericStateMachineVal,
6836 MayUseNestedParallelismVal,
6843 ReductionBufferLength,
6846 KernelEnvironment, {
6847 ConfigurationEnvironmentInitializer,
6851 std::string KernelEnvironmentName =
6852 (KernelName +
"_kernel_environment").str();
6855 KernelEnvironmentInitializer, KernelEnvironmentName,
6857 DL.getDefaultGlobalsAddressSpace());
6861 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6862 ? KernelEnvironmentGV
6864 KernelEnvironmentPtr);
6865 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6867 KernelLaunchEnvironment =
6868 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6869 ? KernelLaunchEnvironment
6870 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6871 KernelLaunchEnvParamTy);
6873 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6875 Value *ExecUserCode = Builder.CreateICmpEQ(
6885 auto *UI = Builder.CreateUnreachable();
6891 Builder.SetInsertPoint(WorkerExitBB);
6892 Builder.CreateRetVoid();
6895 Builder.SetInsertPoint(CheckBBTI);
6896 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6899 UI->eraseFromParent();
6906void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6907 int32_t TeamsReductionDataSize,
6908 int32_t TeamsReductionBufferLength) {
6909 if (!updateToLocation(
Loc))
6912 Function *Fn = getOrCreateRuntimeFunctionPtr(
6913 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6915 Builder.CreateCall(Fn, {});
6917 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6923 const std::string DebugPrefix =
"_debug__";
6925 KernelName = KernelName.
drop_back(DebugPrefix.length());
6926 auto *KernelEnvironmentGV =
6927 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6928 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6929 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6931 KernelEnvironmentInitializer,
6932 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6934 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6941 if (
Kernel.hasFnAttribute(Name)) {
6942 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6948std::pair<int32_t, int32_t>
6950 int32_t ThreadLimit =
6951 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6954 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6955 if (!Attr.isValid() || !Attr.isStringAttribute())
6956 return {0, ThreadLimit};
6957 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6960 return {0, ThreadLimit};
6961 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6967 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6968 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6969 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6971 return {0, ThreadLimit};
6974void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6977 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6980 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
6988std::pair<int32_t, int32_t>
6991 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
6995 int32_t LB, int32_t UB) {
7002 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7005void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7007 if (Config.isTargetDevice()) {
7014 else if (
T.isNVPTX())
7016 else if (
T.isSPIRV())
7023 if (Config.isTargetDevice()) {
7024 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7033Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7038 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7039 "Named kernel already exists?");
7045Error OpenMPIRBuilder::emitTargetRegionFunction(
7046 TargetRegionEntryInfo &EntryInfo,
7047 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7051 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7053 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7057 OutlinedFn = *CBResult;
7059 OutlinedFn =
nullptr;
7065 if (!IsOffloadEntry)
7068 std::string EntryFnIDName =
7069 Config.isTargetDevice()
7070 ? std::string(EntryFnName)
7071 : createPlatformSpecificName({EntryFnName,
"region_id"});
7073 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7074 EntryFnName, EntryFnIDName);
7078Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7079 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7082 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7083 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7084 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7085 OffloadInfoManager.registerTargetRegionEntryInfo(
7086 EntryInfo, EntryAddr, OutlinedFnID,
7087 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7088 return OutlinedFnID;
7091OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7092 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7093 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7094 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7096 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7097 BodyGenTy BodyGenType)>
7100 if (!updateToLocation(
Loc))
7101 return InsertPointTy();
7103 Builder.restoreIP(CodeGenIP);
7105 if (Config.IsTargetDevice.value_or(
false)) {
7107 InsertPointOrErrorTy AfterIP =
7108 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7110 return AfterIP.takeError();
7111 Builder.restoreIP(*AfterIP);
7113 return Builder.saveIP();
7116 bool IsStandAlone = !BodyGenCB;
7117 MapInfosTy *MapInfo;
7121 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7122 InsertPointTy CodeGenIP) ->
Error {
7123 MapInfo = &GenMapInfoCB(Builder.saveIP());
7124 if (
Error Err = emitOffloadingArrays(
7125 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7126 true, DeviceAddrCB))
7129 TargetDataRTArgs RTArgs;
7130 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7133 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7138 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7139 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7143 SrcLocInfo, DeviceID,
7144 PointerNum, RTArgs.BasePointersArray,
7145 RTArgs.PointersArray, RTArgs.SizesArray,
7146 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7147 RTArgs.MappersArray};
7150 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7154 if (
Info.HasNoWait) {
7161 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7164 if (
Info.HasNoWait) {
7168 emitBlock(OffloadContBlock, CurFn,
true);
7169 Builder.restoreIP(Builder.saveIP());
7174 bool RequiresOuterTargetTask =
Info.HasNoWait;
7175 if (!RequiresOuterTargetTask)
7176 cantFail(TaskBodyCB(
nullptr,
nullptr,
7179 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7180 {}, RTArgs,
Info.HasNoWait));
7182 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7183 omp::OMPRTL___tgt_target_data_begin_mapper);
7185 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7187 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7190 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7191 Builder.CreateStore(LI, DeviceMap.second.second);
7198 InsertPointOrErrorTy AfterIP =
7199 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7201 return AfterIP.takeError();
7202 Builder.restoreIP(*AfterIP);
7210 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7211 InsertPointTy CodeGenIP) ->
Error {
7212 InsertPointOrErrorTy AfterIP =
7213 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7215 return AfterIP.takeError();
7216 Builder.restoreIP(*AfterIP);
7221 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7222 TargetDataRTArgs RTArgs;
7223 Info.EmitDebug = !MapInfo->Names.empty();
7224 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7227 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7232 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7233 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7236 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7237 PointerNum, RTArgs.BasePointersArray,
7238 RTArgs.PointersArray, RTArgs.SizesArray,
7239 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7240 RTArgs.MappersArray};
7242 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7244 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7250 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7258 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7259 return BeginThenGen(AllocaIP, Builder.saveIP());
7267 InsertPointOrErrorTy AfterIP =
7268 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7270 return AfterIP.takeError();
7274 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7275 return EndThenGen(AllocaIP, Builder.saveIP());
7278 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7279 return BeginThenGen(AllocaIP, Builder.saveIP());
7285 return Builder.saveIP();
7289OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7290 bool IsGPUDistribute) {
7291 assert((IVSize == 32 || IVSize == 64) &&
7292 "IV size is not compatible with the omp runtime");
7294 if (IsGPUDistribute)
7296 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7297 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7298 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7299 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7301 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7302 : omp::OMPRTL___kmpc_for_static_init_4u)
7303 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7304 : omp::OMPRTL___kmpc_for_static_init_8u);
7306 return getOrCreateRuntimeFunction(M, Name);
7309FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7311 assert((IVSize == 32 || IVSize == 64) &&
7312 "IV size is not compatible with the omp runtime");
7314 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7315 : omp::OMPRTL___kmpc_dispatch_init_4u)
7316 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7317 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7319 return getOrCreateRuntimeFunction(M, Name);
7322FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7324 assert((IVSize == 32 || IVSize == 64) &&
7325 "IV size is not compatible with the omp runtime");
7327 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7328 : omp::OMPRTL___kmpc_dispatch_next_4u)
7329 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7330 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7332 return getOrCreateRuntimeFunction(M, Name);
7335FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7337 assert((IVSize == 32 || IVSize == 64) &&
7338 "IV size is not compatible with the omp runtime");
7340 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7341 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7342 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7343 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7345 return getOrCreateRuntimeFunction(M, Name);
7349 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7354 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7362 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7366 if (NewVar && (arg == NewVar->
getArg()))
7376 auto UpdateDebugRecord = [&](
auto *DR) {
7379 for (
auto Loc : DR->location_ops()) {
7380 auto Iter = ValueReplacementMap.find(
Loc);
7381 if (Iter != ValueReplacementMap.end()) {
7382 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7383 ArgNo = std::get<1>(Iter->second) + 1;
7387 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7394 "Unexpected debug intrinsic");
7396 UpdateDebugRecord(&DVR);
7399 if (OMPBuilder.Config.isTargetDevice()) {
7401 Module *M = Func->getParent();
7404 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7406 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7407 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7409 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7416 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7418 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7419 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7421 if (OMPBuilder.Config.isTargetDevice()) {
7429 for (
auto &Arg : Inputs)
7434 for (
auto &Arg : Inputs)
7438 auto BB = Builder.GetInsertBlock();
7450 if (TargetCpuAttr.isStringAttribute())
7451 Func->addFnAttr(TargetCpuAttr);
7453 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7454 if (TargetFeaturesAttr.isStringAttribute())
7455 Func->addFnAttr(TargetFeaturesAttr);
7457 if (OMPBuilder.Config.isTargetDevice()) {
7459 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7460 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7471 Builder.SetInsertPoint(EntryBB);
7474 if (OMPBuilder.Config.isTargetDevice())
7475 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7477 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7482 if (OMPBuilder.Config.isTargetDevice())
7483 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7487 splitBB(Builder,
true,
"outlined.body");
7488 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7490 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7492 return AfterIP.takeError();
7493 Builder.restoreIP(*AfterIP);
7494 if (OMPBuilder.Config.isTargetDevice())
7495 OMPBuilder.createTargetDeinit(Builder);
7498 Builder.CreateRetVoid();
7502 auto AllocaIP = Builder.saveIP();
7507 const auto &ArgRange =
7508 OMPBuilder.Config.isTargetDevice()
7509 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7542 if (Instr->getFunction() == Func)
7543 Instr->replaceUsesOfWith(
Input, InputCopy);
7549 for (
auto InArg :
zip(Inputs, ArgRange)) {
7551 Argument &Arg = std::get<1>(InArg);
7552 Value *InputCopy =
nullptr;
7554 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7555 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7557 return AfterIP.takeError();
7558 Builder.restoreIP(*AfterIP);
7559 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7578 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7585 ReplaceValue(
Input, InputCopy, Func);
7589 for (
auto Deferred : DeferredReplacement)
7590 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7593 ValueReplacementMap);
7601 Value *TaskWithPrivates,
7602 Type *TaskWithPrivatesTy) {
7604 Type *TaskTy = OMPIRBuilder.Task;
7607 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7608 Value *Shareds = TaskT;
7618 if (TaskWithPrivatesTy != TaskTy)
7619 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7636 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7641 assert((!NumOffloadingArrays || PrivatesTy) &&
7642 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7645 Module &M = OMPBuilder.M;
7669 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7675 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7676 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7682 ".omp_target_task_proxy_func",
7683 Builder.GetInsertBlock()->getModule());
7684 Value *ThreadId = ProxyFn->getArg(0);
7685 Value *TaskWithPrivates = ProxyFn->getArg(1);
7686 ThreadId->
setName(
"thread.id");
7687 TaskWithPrivates->
setName(
"task");
7689 bool HasShareds = SharedArgsOperandNo > 0;
7690 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7693 Builder.SetInsertPoint(EntryBB);
7699 if (HasOffloadingArrays) {
7700 assert(TaskTy != TaskWithPrivatesTy &&
7701 "If there are offloading arrays to pass to the target"
7702 "TaskTy cannot be the same as TaskWithPrivatesTy");
7705 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7706 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7708 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7712 auto *ArgStructAlloca =
7714 assert(ArgStructAlloca &&
7715 "Unable to find the alloca instruction corresponding to arguments "
7716 "for extracted function");
7720 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7722 Value *SharedsSize =
7723 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7726 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7728 Builder.CreateMemCpy(
7729 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7731 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7733 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7734 Builder.CreateRetVoid();
7740 return GEP->getSourceElementType();
7742 return Alloca->getAllocatedType();
7765 if (OffloadingArraysToPrivatize.
empty())
7766 return OMPIRBuilder.Task;
7769 for (
Value *V : OffloadingArraysToPrivatize) {
7770 assert(V->getType()->isPointerTy() &&
7771 "Expected pointer to array to privatize. Got a non-pointer value "
7774 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7780 "struct.task_with_privates");
7783 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7784 TargetRegionEntryInfo &EntryInfo,
7785 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7788 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7789 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7791 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7794 EntryFnName, Inputs, CBFunc,
7798 return OMPBuilder.emitTargetRegionFunction(
7799 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7803OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7804 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7805 OpenMPIRBuilder::InsertPointTy AllocaIP,
7807 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7931 splitBB(Builder,
true,
"target.task.body");
7933 splitBB(Builder,
true,
"target.task.alloca");
7935 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7936 TargetTaskAllocaBB->
begin());
7937 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7940 OI.EntryBB = TargetTaskAllocaBB;
7941 OI.OuterAllocaBB = AllocaIP.getBlock();
7946 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7949 Builder.restoreIP(TargetTaskBodyIP);
7950 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7964 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7968 bool NeedsTargetTask = HasNoWait && DeviceID;
7969 if (NeedsTargetTask) {
7971 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7972 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7973 RTArgs.SizesArray}) {
7975 OffloadingArraysToPrivatize.
push_back(V);
7976 OI.ExcludeArgsFromAggregate.push_back(V);
7980 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7981 DeviceID, OffloadingArraysToPrivatize](
7984 "there must be a single user for the outlined function");
7998 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
7999 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8001 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8002 "Wrong number of arguments for StaleCI when shareds are present");
8003 int SharedArgOperandNo =
8004 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8010 if (!OffloadingArraysToPrivatize.
empty())
8015 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8016 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8018 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8021 Builder.SetInsertPoint(StaleCI);
8026 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8027 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8036 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8037 : getOrCreateRuntimeFunctionPtr(
8038 OMPRTL___kmpc_omp_target_task_alloc);
8042 Value *ThreadID = getOrCreateThreadID(Ident);
8049 Value *TaskSize = Builder.getInt64(
8050 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8055 Value *SharedsSize = Builder.getInt64(0);
8057 auto *ArgStructAlloca =
8059 assert(ArgStructAlloca &&
8060 "Unable to find the alloca instruction corresponding to arguments "
8061 "for extracted function");
8062 auto *ArgStructType =
8064 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8065 "arguments for extracted function");
8067 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8076 Value *Flags = Builder.getInt32(0);
8086 TaskSize, SharedsSize,
8089 if (NeedsTargetTask) {
8090 assert(DeviceID &&
"Expected non-empty device ID.");
8094 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8100 *
this, Builder, TaskData, TaskWithPrivatesTy);
8101 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8104 if (!OffloadingArraysToPrivatize.
empty()) {
8106 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8107 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8108 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8115 "ElementType should match ArrayType");
8118 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8119 Builder.CreateMemCpy(
8120 Dst, Alignment, PtrToPrivatize, Alignment,
8121 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8135 if (!NeedsTargetTask) {
8138 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8142 Builder.getInt32(Dependencies.size()),
8144 ConstantInt::get(Builder.getInt32Ty(), 0),
8150 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8153 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8154 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8156 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8157 }
else if (DepArray) {
8162 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8165 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8166 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8170 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8171 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8176 I->eraseFromParent();
8178 addOutlineInfo(std::move(OI));
8181 << *(Builder.GetInsertBlock()) <<
"\n");
8183 << *(Builder.GetInsertBlock()->getParent()->getParent())
8185 return Builder.saveIP();
8188Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8189 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8190 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8191 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8194 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8195 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8197 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8203 OpenMPIRBuilder::InsertPointTy AllocaIP,
8204 OpenMPIRBuilder::TargetDataInfo &
Info,
8205 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8206 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8209 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8210 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8216 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8217 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8218 Builder.restoreIP(IP);
8219 Builder.CreateCall(OutlinedFn, Args);
8220 return Builder.saveIP();
8223 bool HasDependencies = Dependencies.
size() > 0;
8224 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8226 OpenMPIRBuilder::TargetKernelArgs KArgs;
8233 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8241 if (OutlinedFnID && DeviceID)
8242 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8243 EmitTargetCallFallbackCB, KArgs,
8244 DeviceID, RTLoc, TargetTaskAllocaIP);
8252 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8255 OMPBuilder.Builder.restoreIP(AfterIP);
8259 auto &&EmitTargetCallElse =
8260 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8261 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8264 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8265 if (RequiresOuterTargetTask) {
8269 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8270 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8272 Dependencies, EmptyRTArgs, HasNoWait);
8274 return EmitTargetCallFallbackCB(Builder.saveIP());
8277 Builder.restoreIP(AfterIP);
8281 auto &&EmitTargetCallThen =
8282 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8283 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8284 Info.HasNoWait = HasNoWait;
8285 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8286 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8287 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8288 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8295 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8296 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8301 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8303 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8307 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8310 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8318 Value *MaxThreadsClause =
8319 RuntimeAttrs.TeamsThreadLimit.size() == 1
8320 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8323 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8324 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8325 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8326 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8328 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8329 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8331 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8334 unsigned NumTargetItems =
Info.NumberOfPtrs;
8338 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8339 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8342 Value *TripCount = RuntimeAttrs.LoopTripCount
8343 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8344 Builder.getInt64Ty(),
8346 : Builder.getInt64(0);
8349 Value *DynCGGroupMem = Builder.getInt32(0);
8351 KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
8352 NumTeamsC, NumThreadsC,
8353 DynCGGroupMem, HasNoWait);
8357 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8360 if (RequiresOuterTargetTask)
8361 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8362 Dependencies, KArgs.RTArgs,
8365 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8366 EmitTargetCallFallbackCB, KArgs,
8367 DeviceID, RTLoc, AllocaIP);
8370 Builder.restoreIP(AfterIP);
8377 if (!OutlinedFnID) {
8378 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8384 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8388 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8389 EmitTargetCallElse, AllocaIP));
8392OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8393 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8394 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8395 TargetRegionEntryInfo &EntryInfo,
8396 const TargetKernelDefaultAttrs &DefaultAttrs,
8397 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8399 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8400 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8401 CustomMapperCallbackTy CustomMapperCB,
8404 if (!updateToLocation(
Loc))
8405 return InsertPointTy();
8407 Builder.restoreIP(CodeGenIP);
8415 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8416 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8422 if (!Config.isTargetDevice())
8424 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8425 CustomMapperCB, Dependencies, HasNowait);
8426 return Builder.saveIP();
8439 return OS.
str().str();
8444 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8445 Config.separator());
8449OpenMPIRBuilder::getOrCreateInternalVariable(
Type *Ty,
const StringRef &Name,
8451 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8453 assert(Elem.second->getValueType() == Ty &&
8454 "OMP internal variable has different type than requested");
8470 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8477Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8478 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8479 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8480 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8483Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8488 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8490 return SizePtrToInt;
8495 std::string VarName) {
8499 M, MaptypesArrayInit->
getType(),
8503 return MaptypesArrayGlobal;
8506void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8507 InsertPointTy AllocaIP,
8508 unsigned NumOperands,
8509 struct MapperAllocas &MapperAllocas) {
8510 if (!updateToLocation(
Loc))
8515 Builder.restoreIP(AllocaIP);
8517 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8521 ArrI64Ty,
nullptr,
".offload_sizes");
8522 updateToLocation(
Loc);
8523 MapperAllocas.ArgsBase = ArgsBase;
8524 MapperAllocas.Args =
Args;
8525 MapperAllocas.ArgSizes = ArgSizes;
8528void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8531 struct MapperAllocas &MapperAllocas,
8532 int64_t DeviceID,
unsigned NumOperands) {
8533 if (!updateToLocation(
Loc))
8538 Value *ArgsBaseGEP =
8539 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8540 {Builder.getInt32(0), Builder.getInt32(0)});
8542 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8543 {Builder.getInt32(0), Builder.getInt32(0)});
8544 Value *ArgSizesGEP =
8545 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8546 {Builder.getInt32(0), Builder.getInt32(0)});
8549 Builder.CreateCall(MapperFunc,
8550 {SrcLocInfo, Builder.getInt64(DeviceID),
8551 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8552 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8555void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8556 TargetDataRTArgs &RTArgs,
8557 TargetDataInfo &
Info,
8559 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8560 "expected region end call to runtime only when end call is separate");
8562 auto VoidPtrTy = UnqualPtrTy;
8563 auto VoidPtrPtrTy = UnqualPtrTy;
8565 auto Int64PtrTy = UnqualPtrTy;
8567 if (!
Info.NumberOfPtrs) {
8577 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8579 Info.RTArgs.BasePointersArray,
8581 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8585 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8588 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8590 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8591 :
Info.RTArgs.MapTypesArray,
8597 if (!
Info.EmitDebug)
8600 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8606 if (!
Info.HasMapper)
8609 RTArgs.MappersArray =
8610 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8613void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8614 InsertPointTy CodeGenIP,
8615 MapInfosTy &CombinedInfo,
8616 TargetDataInfo &
Info) {
8617 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8618 CombinedInfo.NonContigInfo;
8631 "struct.descriptor_dim");
8633 enum { OffsetFD = 0, CountFD, StrideFD };
8637 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8640 if (NonContigInfo.Dims[
I] == 1)
8642 Builder.restoreIP(AllocaIP);
8645 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8646 Builder.restoreIP(CodeGenIP);
8647 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8648 unsigned RevIdx = EE -
II - 1;
8649 Value *DimsLVal = Builder.CreateInBoundsGEP(
8651 {Builder.getInt64(0), Builder.getInt64(II)});
8653 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8654 Builder.CreateAlignedStore(
8655 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8656 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8658 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8659 Builder.CreateAlignedStore(
8660 NonContigInfo.Counts[L][RevIdx], CountLVal,
8661 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8663 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8664 Builder.CreateAlignedStore(
8665 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8666 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8669 Builder.restoreIP(CodeGenIP);
8670 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8671 DimsAddr, Builder.getPtrTy());
8672 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8674 Info.RTArgs.PointersArray, 0,
I);
8675 Builder.CreateAlignedStore(
8676 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8681void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8689 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8691 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8692 Value *DeleteBit = Builder.CreateAnd(
8695 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8696 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8701 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8703 Value *PtrAndObjBit = Builder.CreateAnd(
8706 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8707 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8708 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8709 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8710 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8711 DeleteCond = Builder.CreateIsNull(
8713 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8716 DeleteCond = Builder.CreateIsNotNull(
8718 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8720 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8721 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8723 emitBlock(BodyBB, MapperFn);
8726 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8729 Value *MapTypeArg = Builder.CreateAnd(
8732 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8733 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8734 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8735 MapTypeArg = Builder.CreateOr(
8738 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8739 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8743 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8744 ArraySize, MapTypeArg, MapName};
8746 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8754 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8770 MapperFn->
addFnAttr(Attribute::NoInline);
8771 MapperFn->
addFnAttr(Attribute::NoUnwind);
8781 auto SavedIP = Builder.saveIP();
8782 Builder.SetInsertPoint(EntryBB);
8794 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8795 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8796 Value *PtrBegin = BeginIn;
8797 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8802 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8803 MapType, MapName, ElementSize, HeadBB,
8809 emitBlock(HeadBB, MapperFn);
8814 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8815 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8818 emitBlock(BodyBB, MapperFn);
8821 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8825 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8827 return Info.takeError();
8831 Value *OffloadingArgs[] = {MapperHandle};
8832 Value *PreviousSize = Builder.CreateCall(
8833 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8835 Value *ShiftedPreviousSize =
8836 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8839 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8848 Value *OriMapType = Builder.getInt64(
8849 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8851 Value *MemberMapType =
8852 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8866 Value *LeftToFrom = Builder.CreateAnd(
8869 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8870 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8871 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8880 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8881 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8883 emitBlock(AllocBB, MapperFn);
8884 Value *AllocMapType = Builder.CreateAnd(
8887 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8888 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8889 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8890 Builder.CreateBr(EndBB);
8891 emitBlock(AllocElseBB, MapperFn);
8892 Value *IsTo = Builder.CreateICmpEQ(
8895 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8896 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8897 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8899 emitBlock(ToBB, MapperFn);
8900 Value *ToMapType = Builder.CreateAnd(
8903 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8904 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8905 Builder.CreateBr(EndBB);
8906 emitBlock(ToElseBB, MapperFn);
8907 Value *IsFrom = Builder.CreateICmpEQ(
8910 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8911 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8912 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8914 emitBlock(FromBB, MapperFn);
8915 Value *FromMapType = Builder.CreateAnd(
8918 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8919 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8921 emitBlock(EndBB, MapperFn);
8924 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8930 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8931 CurSizeArg, CurMapType, CurNameArg};
8933 auto ChildMapperFn = CustomMapperCB(
I);
8935 return ChildMapperFn.takeError();
8936 if (*ChildMapperFn) {
8938 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8943 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8950 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8951 "omp.arraymap.next");
8953 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8955 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8957 emitBlock(ExitBB, MapperFn);
8960 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8961 MapType, MapName, ElementSize, DoneBB,
8965 emitBlock(DoneBB, MapperFn,
true);
8967 Builder.CreateRetVoid();
8968 Builder.restoreIP(SavedIP);
8972Error OpenMPIRBuilder::emitOffloadingArrays(
8973 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
8974 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
8975 bool IsNonContiguous,
8979 Info.clearArrayInfo();
8980 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8982 if (
Info.NumberOfPtrs == 0)
8985 Builder.restoreIP(AllocaIP);
8991 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
8992 PointerArrayType,
nullptr,
".offload_baseptrs");
8994 Info.RTArgs.PointersArray = Builder.CreateAlloca(
8995 PointerArrayType,
nullptr,
".offload_ptrs");
8996 AllocaInst *MappersArray = Builder.CreateAlloca(
8997 PointerArrayType,
nullptr,
".offload_mappers");
8998 Info.RTArgs.MappersArray = MappersArray;
9005 ConstantInt::get(Int64Ty, 0));
9007 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9010 if (IsNonContiguous &&
9011 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9012 CombinedInfo.Types[
I] &
9013 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9015 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9021 RuntimeSizes.set(
I);
9024 if (RuntimeSizes.all()) {
9026 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9027 SizeArrayType,
nullptr,
".offload_sizes");
9032 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9033 auto *SizesArrayGbl =
9038 if (!RuntimeSizes.any()) {
9039 Info.RTArgs.SizesArray = SizesArrayGbl;
9041 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9042 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9045 SizeArrayType,
nullptr,
".offload_sizes");
9048 Builder.CreateMemCpy(
9049 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9050 SizesArrayGbl, OffloadSizeAlign,
9055 Info.RTArgs.SizesArray = Buffer;
9063 for (
auto mapFlag : CombinedInfo.Types)
9065 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9067 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9068 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9069 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9072 if (!CombinedInfo.Names.empty()) {
9073 auto *MapNamesArrayGbl = createOffloadMapnames(
9074 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9075 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9076 Info.EmitDebug =
true;
9078 Info.RTArgs.MapNamesArray =
9080 Info.EmitDebug =
false;
9085 if (
Info.separateBeginEndCalls()) {
9086 bool EndMapTypesDiffer =
false;
9088 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9089 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9090 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9091 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9092 EndMapTypesDiffer =
true;
9095 if (EndMapTypesDiffer) {
9096 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9097 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9102 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9103 Value *BPVal = CombinedInfo.BasePointers[
I];
9104 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9107 Builder.CreateAlignedStore(BPVal, BP,
9108 M.getDataLayout().getPrefTypeAlign(PtrTy));
9110 if (
Info.requiresDevicePointerInfo()) {
9111 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9112 CodeGenIP = Builder.saveIP();
9113 Builder.restoreIP(AllocaIP);
9114 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9115 Builder.restoreIP(CodeGenIP);
9117 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9118 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9119 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9121 DeviceAddrCB(
I, BP);
9125 Value *PVal = CombinedInfo.Pointers[
I];
9126 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9130 Builder.CreateAlignedStore(PVal,
P,
9131 M.getDataLayout().getPrefTypeAlign(PtrTy));
9133 if (RuntimeSizes.test(
I)) {
9134 Value *S = Builder.CreateConstInBoundsGEP2_32(
9138 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9141 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9144 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9147 auto CustomMFunc = CustomMapperCB(
I);
9149 return CustomMFunc.takeError();
9151 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9153 Value *MAddr = Builder.CreateInBoundsGEP(
9155 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9156 Builder.CreateAlignedStore(
9157 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9160 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9161 Info.NumberOfPtrs == 0)
9163 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9168 BasicBlock *CurBB = Builder.GetInsertBlock();
9175 Builder.CreateBr(
Target);
9178 Builder.ClearInsertionPoint();
9183 BasicBlock *CurBB = Builder.GetInsertBlock();
9199 Builder.SetInsertPoint(BB);
9202Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9203 BodyGenCallbackTy ElseGen,
9204 InsertPointTy AllocaIP) {
9208 auto CondConstant = CI->getSExtValue();
9210 return ThenGen(AllocaIP, Builder.saveIP());
9212 return ElseGen(AllocaIP, Builder.saveIP());
9222 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9224 emitBlock(ThenBlock, CurFn);
9225 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9227 emitBranch(ContBlock);
9230 emitBlock(ElseBlock, CurFn);
9231 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9234 emitBranch(ContBlock);
9236 emitBlock(ContBlock, CurFn,
true);
9240bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9244 "Unexpected Atomic Ordering.");
9301OpenMPIRBuilder::InsertPointTy
9302OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9303 AtomicOpValue &
X, AtomicOpValue &V,
9305 if (!updateToLocation(
Loc))
9308 assert(
X.Var->getType()->isPointerTy() &&
9309 "OMP Atomic expects a pointer to target memory");
9310 Type *XElemTy =
X.ElemTy;
9313 "OMP atomic read expected a scalar type");
9315 Value *XRead =
nullptr;
9319 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9325 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9330 OpenMPIRBuilder::AtomicInfo atomicInfo(
9331 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9332 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9333 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9334 XRead = AtomicLoadRes.first;
9341 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9344 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9346 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9349 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9350 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9351 return Builder.saveIP();
9354OpenMPIRBuilder::InsertPointTy
9355OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9356 AtomicOpValue &
X,
Value *Expr,
9358 if (!updateToLocation(
Loc))
9361 assert(
X.Var->getType()->isPointerTy() &&
9362 "OMP Atomic expects a pointer to target memory");
9363 Type *XElemTy =
X.ElemTy;
9366 "OMP atomic write expected a scalar type");
9369 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9372 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9376 OpenMPIRBuilder::AtomicInfo atomicInfo(
9377 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9378 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9379 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9386 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9387 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9391 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9392 return Builder.saveIP();
9395OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9396 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9398 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9399 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9401 if (!updateToLocation(
Loc))
9405 Type *XTy =
X.Var->getType();
9407 "OMP Atomic expects a pointer to target memory");
9408 Type *XElemTy =
X.ElemTy;
9411 "OMP atomic update expected a scalar type");
9414 "OpenMP atomic does not support LT or GT operations");
9418 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9419 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9421 return AtomicResult.takeError();
9422 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9423 return Builder.saveIP();
9427Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9431 return Builder.CreateAdd(Src1, Src2);
9433 return Builder.CreateSub(Src1, Src2);
9435 return Builder.CreateAnd(Src1, Src2);
9437 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9439 return Builder.CreateOr(Src1, Src2);
9441 return Builder.CreateXor(Src1, Src2);
9466 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9467 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9470 bool emitRMWOp =
false;
9478 emitRMWOp = XElemTy;
9481 emitRMWOp = (IsXBinopExpr && XElemTy);
9488 std::pair<Value *, Value *> Res;
9493 if (IsIgnoreDenormalMode)
9494 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9496 if (!IsFineGrainedMemory)
9497 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9499 if (!IsRemoteMemory)
9503 Res.first = RMWInst;
9508 Res.second = Res.first;
9510 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9514 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9520 OpenMPIRBuilder::AtomicInfo atomicInfo(
9521 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9522 OldVal->
getAlign(),
true , AllocaIP,
X);
9523 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9524 BasicBlock *CurBB = Builder.GetInsertBlock();
9526 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9530 X->getName() +
".atomic.cont");
9532 Builder.restoreIP(AllocaIP);
9533 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9534 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9535 Builder.SetInsertPoint(ContBB);
9537 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9542 Value *Upd = *CBResult;
9543 Builder.CreateStore(Upd, NewAtomicAddr);
9546 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9547 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9549 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9550 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9552 Res.first = OldExprVal;
9558 Builder.SetInsertPoint(ExitBB);
9560 Builder.SetInsertPoint(ExitTI);
9566 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9573 BasicBlock *CurBB = Builder.GetInsertBlock();
9575 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9579 X->getName() +
".atomic.cont");
9581 Builder.restoreIP(AllocaIP);
9582 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9583 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9584 Builder.SetInsertPoint(ContBB);
9586 PHI->addIncoming(OldVal, CurBB);
9591 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9592 X->getName() +
".atomic.fltCast");
9594 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9595 X->getName() +
".atomic.ptrCast");
9602 Value *Upd = *CBResult;
9603 Builder.CreateStore(Upd, NewAtomicAddr);
9604 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9609 Result->setVolatile(VolatileX);
9610 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9611 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9612 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9613 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9615 Res.first = OldExprVal;
9622 Builder.SetInsertPoint(ExitBB);
9624 Builder.SetInsertPoint(ExitTI);
9631OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9632 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9635 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9636 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9637 if (!updateToLocation(
Loc))
9641 Type *XTy =
X.Var->getType();
9643 "OMP Atomic expects a pointer to target memory");
9644 Type *XElemTy =
X.ElemTy;
9647 "OMP atomic capture expected a scalar type");
9649 "OpenMP atomic does not support LT or GT operations");
9656 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9657 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9660 Value *CapturedVal =
9661 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9662 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9664 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9665 return Builder.saveIP();
9668OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9669 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9675 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9676 IsPostfixUpdate, IsFailOnly, Failure);
9679OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9680 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9685 if (!updateToLocation(
Loc))
9688 assert(
X.Var->getType()->isPointerTy() &&
9689 "OMP atomic expects a pointer to target memory");
9692 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9693 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9696 bool IsInteger =
E->getType()->isIntegerTy();
9698 if (
Op == OMPAtomicCompareOp::EQ) {
9703 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9704 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9709 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9713 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9715 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9717 "OldValue and V must be of same type");
9718 if (IsPostfixUpdate) {
9719 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9721 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9732 BasicBlock *CurBB = Builder.GetInsertBlock();
9734 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9736 CurBBTI,
X.Var->getName() +
".atomic.exit");
9742 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9744 Builder.SetInsertPoint(ContBB);
9745 Builder.CreateStore(OldValue, V.Var);
9746 Builder.CreateBr(ExitBB);
9751 Builder.SetInsertPoint(ExitBB);
9753 Builder.SetInsertPoint(ExitTI);
9756 Value *CapturedValue =
9757 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9758 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9764 assert(
R.Var->getType()->isPointerTy() &&
9765 "r.var must be of pointer type");
9766 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9768 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9769 Value *ResultCast =
R.IsSigned
9770 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9771 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9772 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9775 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9776 "Op should be either max or min at this point");
9777 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9815 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9817 Value *CapturedValue =
nullptr;
9818 if (IsPostfixUpdate) {
9819 CapturedValue = OldValue;
9844 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9845 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9847 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9851 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9853 return Builder.saveIP();
9856OpenMPIRBuilder::InsertPointOrErrorTy
9857OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9858 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9861 if (!updateToLocation(
Loc))
9862 return InsertPointTy();
9865 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9866 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9871 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9872 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9873 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9893 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9894 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9896 splitBB(Builder,
true,
"teams.alloca");
9898 bool SubClausesPresent =
9899 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9901 if (!Config.isTargetDevice() && SubClausesPresent) {
9902 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9903 "if lowerbound is non-null, then upperbound must also be non-null "
9904 "for bounds on num_teams");
9906 if (NumTeamsUpper ==
nullptr)
9907 NumTeamsUpper = Builder.getInt32(0);
9909 if (NumTeamsLower ==
nullptr)
9910 NumTeamsLower = NumTeamsUpper;
9914 "argument to if clause must be an integer value");
9918 IfExpr = Builder.CreateICmpNE(IfExpr,
9919 ConstantInt::get(IfExpr->
getType(), 0));
9920 NumTeamsUpper = Builder.CreateSelect(
9921 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9924 NumTeamsLower = Builder.CreateSelect(
9925 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9928 if (ThreadLimit ==
nullptr)
9929 ThreadLimit = Builder.getInt32(0);
9931 Value *ThreadNum = getOrCreateThreadID(Ident);
9933 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9934 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9937 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9938 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9939 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9943 OI.EntryBB = AllocaBB;
9945 OI.OuterAllocaBB = &OuterAllocaBB;
9949 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9951 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9953 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9955 auto HostPostOutlineCB = [
this, Ident,
9956 ToBeDeleted](
Function &OutlinedFn)
mutable {
9961 "there must be a single user for the outlined function");
9966 "Outlined function must have two or three arguments only");
9968 bool HasShared = OutlinedFn.
arg_size() == 3;
9976 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9977 "outlined function.");
9978 Builder.SetInsertPoint(StaleCI);
9980 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
9983 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
9984 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
9988 I->eraseFromParent();
9991 if (!Config.isTargetDevice())
9992 OI.PostOutlineCB = HostPostOutlineCB;
9994 addOutlineInfo(std::move(OI));
9996 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
9998 return Builder.saveIP();
10001OpenMPIRBuilder::InsertPointOrErrorTy
10002OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10003 InsertPointTy OuterAllocaIP,
10004 BodyGenCallbackTy BodyGenCB) {
10005 if (!updateToLocation(
Loc))
10006 return InsertPointTy();
10008 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10010 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10012 splitBB(Builder,
true,
"distribute.entry");
10013 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10016 splitBB(Builder,
true,
"distribute.exit");
10018 splitBB(Builder,
true,
"distribute.body");
10020 splitBB(Builder,
true,
"distribute.alloca");
10023 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10024 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10025 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10029 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10030 OI.EntryBB = AllocaBB;
10031 OI.ExitBB = ExitBB;
10033 addOutlineInfo(std::move(OI));
10034 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10036 return Builder.saveIP();
10041 std::string VarName) {
10047 M, MapNamesArrayInit->
getType(),
10050 return MapNamesArrayGlobal;
10055void OpenMPIRBuilder::initializeTypes(
Module &M) {
10058#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10059#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10060 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10061 VarName##PtrTy = PointerType::getUnqual(Ctx);
10062#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10063 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10064 VarName##Ptr = PointerType::getUnqual(Ctx);
10065#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10066 T = StructType::getTypeByName(Ctx, StructName); \
10068 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10070 VarName##Ptr = PointerType::getUnqual(Ctx);
10071#include "llvm/Frontend/OpenMP/OMPKinds.def"
10074void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10082 while (!Worklist.
empty()) {
10086 if (
BlockSet.insert(SuccBB).second)
10095 if (!Config.isGPU()) {
10110 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10111 Fn->
addFnAttr(Attribute::MustProgress);
10115void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10116 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10119 if (OffloadInfoManager.empty())
10123 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10124 TargetRegionEntryInfo>,
10126 OrderedEntries(OffloadInfoManager.size());
10129 auto &&GetMDInt = [
this](
unsigned V) {
10136 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10137 auto &&TargetRegionMetadataEmitter =
10138 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10139 const TargetRegionEntryInfo &EntryInfo,
10140 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10153 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10154 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10155 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10156 GetMDInt(
E.getOrder())};
10159 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10165 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10168 auto &&DeviceGlobalVarMetadataEmitter =
10169 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10171 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10179 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10180 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10183 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10184 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10190 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10191 DeviceGlobalVarMetadataEmitter);
10193 for (
const auto &
E : OrderedEntries) {
10194 assert(
E.first &&
"All ordered entries must exist!");
10195 if (
const auto *CE =
10198 if (!
CE->getID() || !
CE->getAddress()) {
10200 TargetRegionEntryInfo EntryInfo =
E.second;
10201 StringRef FnName = EntryInfo.ParentName;
10202 if (!M.getNamedValue(FnName))
10204 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10207 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10210 }
else if (
const auto *CE =
dyn_cast<
10211 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10213 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10214 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10217 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10218 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10219 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10221 if (!
CE->getAddress()) {
10222 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10226 if (
CE->getVarSize() == 0)
10229 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10230 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10231 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10232 "Declaret target link address is set.");
10233 if (Config.isTargetDevice())
10235 if (!
CE->getAddress()) {
10236 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10248 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10249 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10254 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10255 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10256 Flags,
CE->getLinkage(),
CE->getVarName());
10258 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10259 Flags,
CE->getLinkage());
10270 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10275 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10276 Config.getRequiresFlags());
10279void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10281 unsigned FileID,
unsigned Line,
unsigned Count) {
10283 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10284 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10286 OS <<
"_" <<
Count;
10289void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10291 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10292 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10293 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10294 EntryInfo.Line, NewCount);
10297TargetRegionEntryInfo
10298OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10301 auto FileIDInfo = CallBack();
10307 FileID =
hash_value(std::get<0>(FileIDInfo));
10309 FileID =
ID.getFile();
10311 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10312 std::get<1>(FileIDInfo));
10315unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10318 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10320 !(Remain & 1); Remain = Remain >> 1)
10326OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10329 << getFlagMemberOffset());
10332void OpenMPIRBuilder::setCorrectMemberOfFlag(
10338 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10340 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10347 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10348 Flags |= MemberOfFlag;
10351Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10352 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10353 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10354 bool IsDeclaration,
bool IsExternallyVisible,
10355 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10356 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10357 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10358 std::function<
Constant *()> GlobalInitializer,
10365 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10366 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10368 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10369 Config.hasRequiresUnifiedSharedMemory())) {
10374 if (!IsExternallyVisible)
10375 OS <<
format(
"_%x", EntryInfo.FileID);
10376 OS <<
"_decl_tgt_ref_ptr";
10379 Value *
Ptr = M.getNamedValue(PtrName);
10383 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10388 if (!Config.isTargetDevice()) {
10389 if (GlobalInitializer)
10390 GV->setInitializer(GlobalInitializer());
10395 registerTargetGlobalVariable(
10396 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10397 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10407void OpenMPIRBuilder::registerTargetGlobalVariable(
10408 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10409 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10410 bool IsDeclaration,
bool IsExternallyVisible,
10411 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10412 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10413 std::vector<Triple> TargetTriple,
10414 std::function<
Constant *()> GlobalInitializer,
10417 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10418 (TargetTriple.empty() && !Config.isTargetDevice()))
10421 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10426 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10428 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10429 !Config.hasRequiresUnifiedSharedMemory()) {
10430 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10434 if (!IsDeclaration)
10436 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10443 if (Config.isTargetDevice() &&
10447 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10450 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10452 if (!M.getNamedValue(RefName)) {
10454 getOrCreateInternalVariable(Addr->
getType(), RefName);
10456 GvAddrRef->setConstant(
true);
10458 GvAddrRef->setInitializer(Addr);
10459 GeneratedRefs.push_back(GvAddrRef);
10463 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10464 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10466 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10468 if (Config.isTargetDevice()) {
10472 Addr = getAddrOfDeclareTargetVar(
10473 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10474 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10475 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10478 VarSize = M.getDataLayout().getPointerSize();
10482 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10488void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10492 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10497 auto &&GetMDInt = [MN](
unsigned Idx) {
10502 auto &&GetMDString = [MN](
unsigned Idx) {
10504 return V->getString();
10507 switch (GetMDInt(0)) {
10511 case OffloadEntriesInfoManager::OffloadEntryInfo::
10512 OffloadingEntryInfoTargetRegion: {
10513 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10518 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10522 case OffloadEntriesInfoManager::OffloadEntryInfo::
10523 OffloadingEntryInfoDeviceGlobalVar:
10524 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10526 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10534void OpenMPIRBuilder::loadOffloadInfoMetadata(
StringRef HostFilePath) {
10535 if (HostFilePath.
empty())
10539 if (std::error_code Err = Buf.getError()) {
10541 "OpenMPIRBuilder: " +
10549 if (std::error_code Err = M.getError()) {
10551 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10555 loadOffloadInfoMetadata(*M.get());
10562bool OffloadEntriesInfoManager::empty()
const {
10563 return OffloadEntriesTargetRegion.empty() &&
10564 OffloadEntriesDeviceGlobalVar.empty();
10567unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10568 const TargetRegionEntryInfo &EntryInfo)
const {
10569 auto It = OffloadEntriesTargetRegionCount.find(
10570 getTargetRegionEntryCountKey(EntryInfo));
10571 if (It == OffloadEntriesTargetRegionCount.end())
10576void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10577 const TargetRegionEntryInfo &EntryInfo) {
10578 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10579 EntryInfo.Count + 1;
10583void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10584 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10585 OffloadEntriesTargetRegion[EntryInfo] =
10586 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10587 OMPTargetRegionEntryTargetRegion);
10588 ++OffloadingEntriesNum;
10591void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10593 OMPTargetRegionEntryKind Flags) {
10594 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10597 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10601 if (OMPBuilder->Config.isTargetDevice()) {
10603 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10606 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10607 Entry.setAddress(Addr);
10609 Entry.setFlags(Flags);
10611 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10612 hasTargetRegionEntryInfo(EntryInfo,
true))
10614 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10615 "Target region entry already registered!");
10616 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10617 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10618 ++OffloadingEntriesNum;
10620 incrementTargetRegionEntryInfoCount(EntryInfo);
10623bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10624 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10627 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10629 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10630 if (It == OffloadEntriesTargetRegion.end()) {
10634 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10639void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10640 const OffloadTargetRegionEntryInfoActTy &Action) {
10642 for (
const auto &It : OffloadEntriesTargetRegion) {
10643 Action(It.first, It.second);
10647void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10648 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10649 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10650 ++OffloadingEntriesNum;
10653void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10656 if (OMPBuilder->Config.isTargetDevice()) {
10658 if (!hasDeviceGlobalVarEntryInfo(VarName))
10660 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10661 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10662 if (Entry.getVarSize() == 0) {
10663 Entry.setVarSize(VarSize);
10668 Entry.setVarSize(VarSize);
10670 Entry.setAddress(Addr);
10672 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10673 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10674 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10675 "Entry not initialized!");
10676 if (Entry.getVarSize() == 0) {
10677 Entry.setVarSize(VarSize);
10682 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10683 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10684 Addr, VarSize, Flags,
Linkage,
10687 OffloadEntriesDeviceGlobalVar.try_emplace(
10688 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10689 ++OffloadingEntriesNum;
10693void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10694 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10696 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10697 Action(
E.getKey(),
E.getValue());
10704void CanonicalLoopInfo::collectControlBlocks(
10711 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10714BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10723void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10735void CanonicalLoopInfo::mapIndVar(
10745 for (
Use &U : OldIV->
uses()) {
10749 if (
User->getParent() == getCond())
10751 if (
User->getParent() == getLatch())
10757 Value *NewIV = Updater(OldIV);
10760 for (
Use *U : ReplacableUses)
10768void CanonicalLoopInfo::assertOK()
const {
10781 "Preheader must terminate with unconditional branch");
10783 "Preheader must jump to header");
10787 "Header must terminate with unconditional branch");
10788 assert(Header->getSingleSuccessor() ==
Cond &&
10789 "Header must jump to exiting block");
10792 assert(
Cond->getSinglePredecessor() == Header &&
10793 "Exiting block only reachable from header");
10796 "Exiting block must terminate with conditional branch");
10798 "Exiting block must have two successors");
10800 "Exiting block's first successor jump to the body");
10802 "Exiting block's second successor must exit the loop");
10806 "Body only reachable from exiting block");
10811 "Latch must terminate with unconditional branch");
10820 "Exit block must terminate with unconditional branch");
10821 assert(
Exit->getSingleSuccessor() == After &&
10822 "Exit block must jump to after block");
10826 "After block only reachable from exit block");
10830 assert(IndVar &&
"Canonical induction variable not found?");
10832 "Induction variable must be an integer");
10834 "Induction variable must be a PHI in the loop header");
10840 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10847 Value *TripCount = getTripCount();
10848 assert(TripCount &&
"Loop trip count not found?");
10850 "Trip count and induction variable must have the same type");
10854 "Exit condition must be a signed less-than comparison");
10856 "Exit condition must compare the induction variable");
10858 "Exit condition must compare with the trip count");
10862void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn)
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI void emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
LLVM_ABI std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...