65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
159 Builder.restoreIP(IP);
169 Kernel->getFnAttribute(
"target-features").getValueAsString();
170 if (Features.
count(
"+wavefrontsize64"))
185 bool HasSimdModifier) {
187 switch (ClauseKind) {
188 case OMP_SCHEDULE_Default:
189 case OMP_SCHEDULE_Static:
190 return HasChunks ? OMPScheduleType::BaseStaticChunked
191 : OMPScheduleType::BaseStatic;
192 case OMP_SCHEDULE_Dynamic:
193 return OMPScheduleType::BaseDynamicChunked;
194 case OMP_SCHEDULE_Guided:
195 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
196 : OMPScheduleType::BaseGuidedChunked;
197 case OMP_SCHEDULE_Auto:
199 case OMP_SCHEDULE_Runtime:
200 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
201 : OMPScheduleType::BaseRuntime;
209 bool HasOrderedClause) {
210 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
211 OMPScheduleType::None &&
212 "Must not have ordering nor monotonicity flags already set");
215 ? OMPScheduleType::ModifierOrdered
216 : OMPScheduleType::ModifierUnordered;
217 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
220 if (OrderingScheduleType ==
221 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
222 return OMPScheduleType::OrderedGuidedChunked;
223 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
224 OMPScheduleType::ModifierOrdered))
225 return OMPScheduleType::OrderedRuntime;
227 return OrderingScheduleType;
233 bool HasSimdModifier,
bool HasMonotonic,
234 bool HasNonmonotonic,
bool HasOrderedClause) {
235 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
236 OMPScheduleType::None &&
237 "Must not have monotonicity flags already set");
238 assert((!HasMonotonic || !HasNonmonotonic) &&
239 "Monotonic and Nonmonotonic are contradicting each other");
242 return ScheduleType | OMPScheduleType::ModifierMonotonic;
243 }
else if (HasNonmonotonic) {
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
254 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
255 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
261 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
269 bool HasSimdModifier,
bool HasMonotonicModifier,
270 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
276 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
277 HasNonmonotonicModifier, HasOrderedClause);
292 assert(!Br->isConditional() &&
293 "BB's terminator must be an unconditional branch (or degenerate)");
296 Br->setSuccessor(0,
Target);
301 NewBr->setDebugLoc(
DL);
307 "Target BB must not have PHI nodes");
327 NewBr->setDebugLoc(
DL);
335 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
339 Builder.SetInsertPoint(Old);
343 Builder.SetCurrentDebugLocation(
DebugLoc);
352 spliceBB(IP, New, CreateBranch,
DL);
353 New->replaceSuccessorsPhiUsesWith(Old, New);
362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
364 Builder.SetInsertPoint(Builder.GetInsertBlock());
367 Builder.SetCurrentDebugLocation(
DebugLoc);
376 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
378 Builder.SetInsertPoint(Builder.GetInsertBlock());
381 Builder.SetCurrentDebugLocation(
DebugLoc);
388 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
395 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
397 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
398 const Twine &Name =
"",
bool AsPtr =
true) {
399 Builder.restoreIP(OuterAllocaIP);
402 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
406 FakeVal = FakeValAddr;
409 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
414 Builder.restoreIP(InnerAllocaIP);
418 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
434enum OpenMPOffloadingRequiresDirFlags {
436 OMP_REQ_UNDEFINED = 0x000,
438 OMP_REQ_NONE = 0x001,
440 OMP_REQ_REVERSE_OFFLOAD = 0x002,
442 OMP_REQ_UNIFIED_ADDRESS = 0x004,
444 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
446 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
453 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
456 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
457 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
458 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
459 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
460 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
461 RequiresFlags(OMP_REQ_UNDEFINED) {
462 if (HasRequiresReverseOffload)
463 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
464 if (HasRequiresUnifiedAddress)
465 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
466 if (HasRequiresUnifiedSharedMemory)
467 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
468 if (HasRequiresDynamicAllocators)
469 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
473 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
477 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
481 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
485 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
488int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
489 return hasRequiresFlags() ? RequiresFlags
490 :
static_cast<int64_t
>(OMP_REQ_NONE);
493void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
495 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
497 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
500void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
502 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
504 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
514void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
516 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
518 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
525void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
529 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
531 constexpr const size_t MaxDim = 3;
533 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
535 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
538 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
539 Value *NumThreads3D =
540 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
542 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
544 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
546 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
548 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
550 ArgsVector = {Version,
552 KernelArgs.RTArgs.BasePointersArray,
553 KernelArgs.RTArgs.PointersArray,
554 KernelArgs.RTArgs.SizesArray,
555 KernelArgs.RTArgs.MapTypesArray,
556 KernelArgs.RTArgs.MapNamesArray,
557 KernelArgs.RTArgs.MappersArray,
558 KernelArgs.NumIterations,
562 KernelArgs.DynCGGroupMem};
570 auto FnAttrs =
Attrs.getFnAttrs();
571 auto RetAttrs =
Attrs.getRetAttrs();
573 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
578 bool Param =
true) ->
void {
579 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
580 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
581 if (HasSignExt || HasZeroExt) {
582 assert(AS.getNumAttributes() == 1 &&
583 "Currently not handling extension attr combined with others.");
585 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
588 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
595#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
596#include "llvm/Frontend/OpenMP/OMPKinds.def"
600#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
602 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
603 addAttrSet(RetAttrs, RetAttrSet, false); \
604 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
605 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
606 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
608#include "llvm/Frontend/OpenMP/OMPKinds.def"
622#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
624 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
626 Fn = M.getFunction(Str); \
628#include "llvm/Frontend/OpenMP/OMPKinds.def"
634#define OMP_RTL(Enum, Str, ...) \
636 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
638#include "llvm/Frontend/OpenMP/OMPKinds.def"
642 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
652 LLVMContext::MD_callback,
654 2, {-1, -1},
true)}));
660 addAttributes(FnID, *Fn);
667 assert(Fn &&
"Failed to create OpenMP runtime function");
675 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
679void OpenMPIRBuilder::initialize() { initializeTypes(M); }
690 for (
auto Inst =
Block->getReverseIterator()->begin();
691 Inst !=
Block->getReverseIterator()->end();) {
704void OpenMPIRBuilder::finalize(
Function *Fn) {
708 for (OutlineInfo &OI : OutlineInfos) {
711 if (Fn && OI.getFunction() != Fn) {
716 ParallelRegionBlockSet.
clear();
718 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
728 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
737 ".omp_par", ArgsInZeroAddressSpace);
741 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
742 assert(Extractor.isEligible() &&
743 "Expected OpenMP outlining to be possible!");
745 for (
auto *V : OI.ExcludeArgsFromAggregate)
746 Extractor.excludeArgFromAggregate(V);
748 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
752 if (TargetCpuAttr.isStringAttribute())
755 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
756 if (TargetFeaturesAttr.isStringAttribute())
757 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
760 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
762 "OpenMP outlined functions should not return a value!");
767 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
774 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
781 "Expected instructions to add in the outlined region entry");
783 End = ArtificialEntry.
rend();
788 if (
I.isTerminator()) {
790 if (OI.EntryBB->getTerminator())
791 OI.EntryBB->getTerminator()->adoptDbgRecords(
792 &ArtificialEntry,
I.getIterator(),
false);
796 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
799 OI.EntryBB->moveBefore(&ArtificialEntry);
806 if (OI.PostOutlineCB)
807 OI.PostOutlineCB(*OutlinedFn);
811 OutlineInfos = std::move(DeferredOutlines);
832 for (
Function *
F : ConstantAllocaRaiseCandidates)
835 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
836 [](EmitMetadataErrorKind Kind,
837 const TargetRegionEntryInfo &EntryInfo) ->
void {
838 errs() <<
"Error of kind: " << Kind
839 <<
" when emitting offload entries and metadata during "
840 "OMPIRBuilder finalization \n";
843 if (!OffloadInfoManager.empty())
844 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
846 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
847 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
848 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
849 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
855bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
857OpenMPIRBuilder::~OpenMPIRBuilder() {
858 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
866 ConstantInt::get(I32Ty,
Value), Name);
878 UsedArray.
resize(List.size());
879 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
883 if (UsedArray.
empty())
890 GV->setSection(
"llvm.metadata");
894OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
896 auto *Int8Ty = Builder.getInt8Ty();
899 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
907 unsigned Reserve2Flags) {
909 LocFlags |= OMP_IDENT_FLAG_KMPC;
912 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
917 ConstantInt::get(
Int32, Reserve2Flags),
918 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
920 size_t SrcLocStrArgIdx = 4;
921 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
925 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
932 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
933 if (
GV.getInitializer() == Initializer)
938 M, OpenMPIRBuilder::Ident,
941 M.getDataLayout().getDefaultGlobalsAddressSpace());
953 SrcLocStrSize = LocStr.
size();
954 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
962 if (
GV.isConstant() &&
GV.hasInitializer() &&
963 GV.getInitializer() == Initializer)
966 SrcLocStr = Builder.CreateGlobalString(
967 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
975 unsigned Line,
unsigned Column,
981 Buffer.
append(FunctionName);
983 Buffer.
append(std::to_string(Line));
985 Buffer.
append(std::to_string(Column));
988 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
992OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
993 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
994 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1002 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1004 if (
DIFile *DIF = DIL->getFile())
1005 if (std::optional<StringRef> Source = DIF->getSource())
1010 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1011 DIL->getColumn(), SrcLocStrSize);
1014Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1016 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1017 Loc.IP.getBlock()->getParent());
1020Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1021 return Builder.CreateCall(
1022 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1023 "omp_global_thread_num");
1026OpenMPIRBuilder::InsertPointOrErrorTy
1027OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1028 bool ForceSimpleCall,
bool CheckCancelFlag) {
1029 if (!updateToLocation(
Loc))
1038 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1041 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1044 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1047 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1050 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1055 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1057 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1058 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1063 bool UseCancelBarrier =
1064 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1067 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1068 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1069 : OMPRTL___kmpc_barrier),
1072 if (UseCancelBarrier && CheckCancelFlag)
1073 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1076 return Builder.saveIP();
1079OpenMPIRBuilder::InsertPointOrErrorTy
1080OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1082 omp::Directive CanceledDirective) {
1083 if (!updateToLocation(
Loc))
1087 auto *UI = Builder.CreateUnreachable();
1092 Builder.SetInsertPoint(ThenTI);
1094 Value *CancelKind =
nullptr;
1095 switch (CanceledDirective) {
1096#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1097 case DirectiveEnum: \
1098 CancelKind = Builder.getInt32(Value); \
1100#include "llvm/Frontend/OpenMP/OMPKinds.def"
1106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1108 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1110 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1111 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1112 if (CanceledDirective == OMPD_parallel) {
1114 Builder.restoreIP(IP);
1115 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1116 omp::Directive::OMPD_unknown,
1125 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1129 Builder.SetInsertPoint(UI->getParent());
1130 UI->eraseFromParent();
1132 return Builder.saveIP();
1135OpenMPIRBuilder::InsertPointOrErrorTy
1136OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1137 omp::Directive CanceledDirective) {
1138 if (!updateToLocation(
Loc))
1142 auto *UI = Builder.CreateUnreachable();
1143 Builder.SetInsertPoint(UI);
1145 Value *CancelKind =
nullptr;
1146 switch (CanceledDirective) {
1147#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1148 case DirectiveEnum: \
1149 CancelKind = Builder.getInt32(Value); \
1151#include "llvm/Frontend/OpenMP/OMPKinds.def"
1157 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1158 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1159 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1161 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1162 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1163 if (CanceledDirective == OMPD_parallel) {
1165 Builder.restoreIP(IP);
1166 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1167 omp::Directive::OMPD_unknown,
1176 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1180 Builder.SetInsertPoint(UI->getParent());
1181 UI->eraseFromParent();
1183 return Builder.saveIP();
1186OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1187 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1190 if (!updateToLocation(
Loc))
1193 Builder.restoreIP(AllocaIP);
1194 auto *KernelArgsPtr =
1195 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1196 updateToLocation(
Loc);
1200 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1201 Builder.CreateAlignedStore(
1203 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1207 NumThreads, HostPtr, KernelArgsPtr};
1209 Return = Builder.CreateCall(
1210 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1213 return Builder.saveIP();
1216OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1217 const LocationDescription &
Loc,
Value *OutlinedFnID,
1218 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1219 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1221 if (!updateToLocation(
Loc))
1234 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1238 Value *Return =
nullptr;
1242 getKernelArgsVector(Args, Builder, ArgsVector);
1257 Builder.restoreIP(emitTargetKernel(
1258 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1259 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1266 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1268 auto CurFn = Builder.GetInsertBlock()->getParent();
1269 emitBlock(OffloadFailedBlock, CurFn);
1270 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1272 return AfterIP.takeError();
1273 Builder.restoreIP(*AfterIP);
1274 emitBranch(OffloadContBlock);
1275 emitBlock(OffloadContBlock, CurFn,
true);
1276 return Builder.saveIP();
1279Error OpenMPIRBuilder::emitCancelationCheckImpl(
1280 Value *CancelFlag, omp::Directive CanceledDirective,
1281 FinalizeCallbackTy ExitCB) {
1282 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1283 "Unexpected cancellation!");
1288 if (Builder.GetInsertPoint() == BB->
end()) {
1294 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1296 Builder.SetInsertPoint(BB);
1302 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1303 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1308 Builder.SetInsertPoint(CancellationBlock);
1310 if (
Error Err = ExitCB(Builder.saveIP()))
1312 auto &FI = FinalizationStack.back();
1313 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1317 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1336 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1339 "Expected at least tid and bounded tid as arguments");
1340 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1343 assert(CI &&
"Expected call instruction to outlined function");
1344 CI->
getParent()->setName(
"omp_parallel");
1346 Builder.SetInsertPoint(CI);
1347 Type *PtrTy = OMPIRBuilder->VoidPtr;
1351 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1355 Value *Args = ArgsAlloca;
1359 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1360 Builder.restoreIP(CurrentIP);
1363 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1365 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1367 Builder.CreateStore(V, StoreAddress);
1371 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1372 : Builder.getInt32(1);
1375 Value *Parallel51CallArgs[] = {
1379 NumThreads ? NumThreads : Builder.getInt32(-1),
1380 Builder.getInt32(-1),
1384 Builder.getInt64(NumCapturedVars)};
1387 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1389 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1392 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1395 Builder.SetInsertPoint(PrivTID);
1397 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1404 I->eraseFromParent();
1421 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1424 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1427 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1435 F->addMetadata(LLVMContext::MD_callback,
1444 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1447 "Expected at least tid and bounded tid as arguments");
1448 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1451 CI->
getParent()->setName(
"omp_parallel");
1452 Builder.SetInsertPoint(CI);
1455 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1459 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1461 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1468 auto PtrTy = OMPIRBuilder->VoidPtr;
1469 if (IfCondition && NumCapturedVars == 0) {
1474 Builder.CreateCall(RTLFn, RealArgs);
1477 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1480 Builder.SetInsertPoint(PrivTID);
1482 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1489 I->eraseFromParent();
1493OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1494 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1495 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1496 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1497 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1500 if (!updateToLocation(
Loc))
1504 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1505 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1506 Value *ThreadID = getOrCreateThreadID(Ident);
1512 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1516 if (NumThreads && !Config.isTargetDevice()) {
1519 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1521 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1524 if (ProcBind != OMP_PROC_BIND_default) {
1528 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1530 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1533 BasicBlock *InsertBB = Builder.GetInsertBlock();
1538 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1546 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1547 Builder.restoreIP(NewOuter);
1548 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1550 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1553 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1556 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1560 PointerType ::get(M.getContext(), 0),
1561 "zero.addr.ascast");
1582 auto FiniCBWrapper = [&](InsertPointTy IP) {
1587 Builder.restoreIP(IP);
1589 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1593 "Unexpected insertion point for finalization call!");
1597 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1602 InsertPointTy InnerAllocaIP = Builder.saveIP();
1605 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1609 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1611 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1629 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1632 assert(BodyGenCB &&
"Expected body generation callback!");
1633 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1634 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1637 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1640 if (Config.isTargetDevice()) {
1642 OI.PostOutlineCB = [=, ToBeDeletedVec =
1643 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1645 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1646 ThreadID, ToBeDeletedVec);
1650 OI.PostOutlineCB = [=, ToBeDeletedVec =
1651 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1653 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1657 OI.OuterAllocaBB = OuterAllocaBlock;
1658 OI.EntryBB = PRegEntryBB;
1659 OI.ExitBB = PRegExitBB;
1663 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1674 ".omp_par", ArgsInZeroAddressSpace);
1679 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1681 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1686 return GV->getValueType() == OpenMPIRBuilder::Ident;
1691 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1694 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1697 if (&V == TIDAddr || &V == ZeroAddr) {
1698 OI.ExcludeArgsFromAggregate.push_back(&V);
1703 for (
Use &U : V.uses())
1705 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1715 if (!V.getType()->isPointerTy()) {
1719 Builder.restoreIP(OuterAllocaIP);
1721 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1725 Builder.SetInsertPoint(InsertBB,
1727 Builder.CreateStore(&V,
Ptr);
1730 Builder.restoreIP(InnerAllocaIP);
1731 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1734 Value *ReplacementValue =
nullptr;
1737 ReplacementValue = PrivTID;
1739 InsertPointOrErrorTy AfterIP =
1740 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1742 return AfterIP.takeError();
1743 Builder.restoreIP(*AfterIP);
1745 InnerAllocaIP.getBlock(),
1746 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1748 assert(ReplacementValue &&
1749 "Expected copy/create callback to set replacement value!");
1750 if (ReplacementValue == &V)
1755 UPtr->set(ReplacementValue);
1780 for (
Value *Output : Outputs)
1783 assert(Outputs.empty() &&
1784 "OpenMP outlining should not produce live-out values!");
1786 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1788 for (
auto *BB : Blocks)
1795 auto FiniInfo = FinalizationStack.pop_back_val();
1797 assert(FiniInfo.DK == OMPD_parallel &&
1798 "Unexpected finalization stack state!");
1802 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1803 if (
Error Err = FiniCB(PreFiniIP))
1807 addOutlineInfo(std::move(OI));
1809 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1810 UI->eraseFromParent();
1815void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1818 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1819 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1821 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1824void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1825 if (!updateToLocation(
Loc))
1830void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1834 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1835 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1836 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1839 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1843void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1844 if (!updateToLocation(
Loc))
1846 emitTaskwaitImpl(
Loc);
1849void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1852 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1853 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1855 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1857 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1861void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1862 if (!updateToLocation(
Loc))
1864 emitTaskyieldImpl(
Loc);
1873 OpenMPIRBuilder &OMPBuilder,
1876 if (Dependencies.
empty())
1896 Type *DependInfo = OMPBuilder.DependInfo;
1897 Module &M = OMPBuilder.M;
1899 Value *DepArray =
nullptr;
1900 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1901 Builder.SetInsertPoint(
1902 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1905 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1907 Builder.restoreIP(OldIP);
1909 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1911 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1913 Value *Addr = Builder.CreateStructGEP(
1915 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1916 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1917 Builder.CreateStore(DepValPtr, Addr);
1920 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1921 Builder.CreateStore(
1922 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1925 Value *Flags = Builder.CreateStructGEP(
1927 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1928 Builder.CreateStore(
1929 ConstantInt::get(Builder.getInt8Ty(),
1930 static_cast<unsigned int>(Dep.DepKind)),
1936OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1937 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1938 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1942 if (!updateToLocation(
Loc))
1943 return InsertPointTy();
1946 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1947 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1964 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1965 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1967 splitBB(Builder,
true,
"task.alloca");
1969 InsertPointTy TaskAllocaIP =
1970 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1971 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1972 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1976 OI.EntryBB = TaskAllocaBB;
1977 OI.OuterAllocaBB = AllocaIP.getBlock();
1978 OI.ExitBB = TaskExitBB;
1983 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1985 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1986 Mergeable, Priority, EventHandle, TaskAllocaBB,
1987 ToBeDeleted](
Function &OutlinedFn)
mutable {
1990 "there must be a single user for the outlined function");
1995 bool HasShareds = StaleCI->
arg_size() > 1;
1996 Builder.SetInsertPoint(StaleCI);
2001 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2005 Value *ThreadID = getOrCreateThreadID(Ident);
2017 Value *Flags = Builder.getInt32(Tied);
2020 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2021 Flags = Builder.CreateOr(FinalFlag, Flags);
2025 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2027 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2033 Value *TaskSize = Builder.getInt64(
2034 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2039 Value *SharedsSize = Builder.getInt64(0);
2043 assert(ArgStructAlloca &&
2044 "Unable to find the alloca instruction corresponding to arguments "
2045 "for extracted function");
2048 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2049 "arguments for extracted function");
2051 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2056 CallInst *TaskData = Builder.CreateCall(
2057 TaskAllocFn, {Ident, ThreadID, Flags,
2058 TaskSize, SharedsSize,
2065 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2066 OMPRTL___kmpc_task_allow_completion_event);
2068 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2070 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2071 Builder.getPtrTy(0));
2072 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2073 Builder.CreateStore(EventVal, EventHandleAddr);
2079 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2080 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2098 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2101 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2102 Value *PriorityData = Builder.CreateInBoundsGEP(
2103 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2106 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2108 Builder.CreateStore(Priority, CmplrData);
2133 splitBB(Builder,
true,
"if.end");
2135 Builder.GetInsertPoint()->
getParent()->getTerminator();
2136 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2137 Builder.SetInsertPoint(IfTerminator);
2140 Builder.SetInsertPoint(ElseTI);
2142 if (Dependencies.size()) {
2144 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2147 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2148 ConstantInt::get(Builder.getInt32Ty(), 0),
2152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2154 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2155 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2158 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2160 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2162 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2163 Builder.SetInsertPoint(ThenTI);
2166 if (Dependencies.size()) {
2168 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2171 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2172 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2177 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2178 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2183 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2185 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2187 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2191 I->eraseFromParent();
2194 addOutlineInfo(std::move(OI));
2195 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2197 return Builder.saveIP();
2200OpenMPIRBuilder::InsertPointOrErrorTy
2201OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2202 InsertPointTy AllocaIP,
2203 BodyGenCallbackTy BodyGenCB) {
2204 if (!updateToLocation(
Loc))
2205 return InsertPointTy();
2208 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2209 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2210 Value *ThreadID = getOrCreateThreadID(Ident);
2214 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2215 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2217 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2218 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2221 Builder.SetInsertPoint(TaskgroupExitBB);
2224 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2225 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2227 return Builder.saveIP();
2230OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2231 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2233 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2236 if (!updateToLocation(
Loc))
2242 auto FiniCBWrapper = [&](InsertPointTy IP) {
2251 CancellationBranches.
push_back(DummyBranch);
2255 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2273 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2274 Builder.restoreIP(CodeGenIP);
2276 splitBBWithSuffix(Builder,
false,
".sections.after");
2280 unsigned CaseNumber = 0;
2281 for (
auto SectionCB : SectionCBs) {
2283 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2284 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2285 Builder.SetInsertPoint(CaseBB);
2287 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2299 Value *LB = ConstantInt::get(I32Ty, 0);
2300 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2301 Value *
ST = ConstantInt::get(I32Ty, 1);
2303 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2307 InsertPointOrErrorTy WsloopIP =
2308 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2309 WorksharingLoopType::ForStaticLoop, !IsNowait);
2311 return WsloopIP.takeError();
2312 InsertPointTy AfterIP = *WsloopIP;
2315 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2318 auto FiniInfo = FinalizationStack.pop_back_val();
2319 assert(FiniInfo.DK == OMPD_sections &&
2320 "Unexpected finalization stack state!");
2321 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2322 Builder.restoreIP(AfterIP);
2324 splitBBWithSuffix(Builder,
true,
"sections.fini");
2325 if (
Error Err = CB(Builder.saveIP()))
2327 AfterIP = {FiniBB, FiniBB->
begin()};
2331 for (
BranchInst *DummyBranch : CancellationBranches) {
2339OpenMPIRBuilder::InsertPointOrErrorTy
2340OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2341 BodyGenCallbackTy BodyGenCB,
2342 FinalizeCallbackTy FiniCB) {
2343 if (!updateToLocation(
Loc))
2346 auto FiniCBWrapper = [&](InsertPointTy IP) {
2357 Builder.restoreIP(IP);
2358 auto *CaseBB =
Loc.IP.getBlock();
2362 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2366 Directive OMPD = Directive::OMPD_sections;
2369 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2377 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2380Value *OpenMPIRBuilder::getGPUThreadID() {
2381 return Builder.CreateCall(
2382 getOrCreateRuntimeFunction(M,
2383 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2387Value *OpenMPIRBuilder::getGPUWarpSize() {
2388 return Builder.CreateCall(
2389 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2392Value *OpenMPIRBuilder::getNVPTXWarpID() {
2393 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2394 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2397Value *OpenMPIRBuilder::getNVPTXLaneID() {
2398 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2399 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2400 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2401 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2405Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2408 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2409 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2410 assert(FromSize > 0 &&
"From size must be greater than zero");
2411 assert(ToSize > 0 &&
"To size must be greater than zero");
2412 if (FromType == ToType)
2414 if (FromSize == ToSize)
2415 return Builder.CreateBitCast(From, ToType);
2417 return Builder.CreateIntCast(From, ToType,
true);
2418 InsertPointTy SaveIP = Builder.saveIP();
2419 Builder.restoreIP(AllocaIP);
2420 Value *CastItem = Builder.CreateAlloca(ToType);
2421 Builder.restoreIP(SaveIP);
2423 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2424 CastItem, Builder.getPtrTy(0));
2425 Builder.CreateStore(From, ValCastItem);
2426 return Builder.CreateLoad(ToType, CastItem);
2429Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2433 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2434 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2438 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2440 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2441 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2442 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2443 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2444 Value *WarpSizeCast =
2445 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2446 Value *ShuffleCall =
2447 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2448 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2451void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2454 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2465 Type *IndexTy = Builder.getIndexTy(
2466 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2467 Value *ElemPtr = DstAddr;
2469 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2473 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2474 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2476 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2477 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2478 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2481 if ((
Size / IntSize) > 1) {
2482 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2483 SrcAddrGEP, Builder.getPtrTy());
2488 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2489 emitBlock(PreCondBB, CurFunc);
2491 Builder.CreatePHI(
Ptr->getType(), 2);
2494 Builder.CreatePHI(ElemPtr->
getType(), 2);
2498 Value *PtrDiff = Builder.CreatePtrDiff(
2499 Builder.getInt8Ty(), PtrEnd,
2500 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2501 Builder.CreateCondBr(
2502 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2504 emitBlock(ThenBB, CurFunc);
2505 Value *Res = createRuntimeShuffleFunction(
2507 Builder.CreateAlignedLoad(
2508 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2510 Builder.CreateAlignedStore(Res, ElemPtr,
2511 M.getDataLayout().getPrefTypeAlign(ElemType));
2513 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2514 Value *LocalElemPtr =
2515 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2518 emitBranch(PreCondBB);
2519 emitBlock(ExitBB, CurFunc);
2521 Value *Res = createRuntimeShuffleFunction(
2522 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2525 Res = Builder.CreateTrunc(Res, ElemType);
2526 Builder.CreateStore(Res, ElemPtr);
2527 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2529 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2535void OpenMPIRBuilder::emitReductionListCopy(
2536 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2538 CopyOptionsTy CopyOptions) {
2539 Type *IndexTy = Builder.getIndexTy(
2540 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2541 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2545 for (
auto En :
enumerate(ReductionInfos)) {
2546 const ReductionInfo &RI = En.value();
2547 Value *SrcElementAddr =
nullptr;
2548 Value *DestElementAddr =
nullptr;
2549 Value *DestElementPtrAddr =
nullptr;
2551 bool ShuffleInElement =
false;
2554 bool UpdateDestListPtr =
false;
2557 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2558 ReductionArrayTy, SrcBase,
2559 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2560 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2564 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2565 ReductionArrayTy, DestBase,
2566 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2568 case CopyAction::RemoteLaneToThread: {
2569 InsertPointTy CurIP = Builder.saveIP();
2570 Builder.restoreIP(AllocaIP);
2571 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2572 ".omp.reduction.element");
2574 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2575 DestElementAddr = DestAlloca;
2577 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2578 DestElementAddr->
getName() +
".ascast");
2579 Builder.restoreIP(CurIP);
2580 ShuffleInElement =
true;
2581 UpdateDestListPtr =
true;
2584 case CopyAction::ThreadCopy: {
2586 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2593 if (ShuffleInElement) {
2594 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2595 RemoteLaneOffset, ReductionArrayTy);
2597 switch (RI.EvaluationKind) {
2598 case EvalKind::Scalar: {
2599 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2601 Builder.CreateStore(Elem, DestElementAddr);
2604 case EvalKind::Complex: {
2605 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2606 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2607 Value *SrcReal = Builder.CreateLoad(
2608 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2609 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2610 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2611 Value *SrcImg = Builder.CreateLoad(
2612 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2614 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2615 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2616 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2617 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2618 Builder.CreateStore(SrcReal, DestRealPtr);
2619 Builder.CreateStore(SrcImg, DestImgPtr);
2622 case EvalKind::Aggregate: {
2623 Value *SizeVal = Builder.getInt64(
2624 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2625 Builder.CreateMemCpy(
2626 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2627 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2639 if (UpdateDestListPtr) {
2640 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2641 DestElementAddr, Builder.getPtrTy(),
2642 DestElementAddr->
getName() +
".ascast");
2643 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2650 AttributeList FuncAttrs) {
2651 InsertPointTy SavedIP = Builder.saveIP();
2654 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2658 "_omp_reduction_inter_warp_copy_func", &M);
2663 Builder.SetInsertPoint(EntryBB);
2681 "__openmp_nvptx_data_transfer_temporary_storage";
2682 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2683 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2685 if (!TransferMedium) {
2694 Value *GPUThreadID = getGPUThreadID();
2696 Value *LaneID = getNVPTXLaneID();
2698 Value *WarpID = getNVPTXWarpID();
2700 InsertPointTy AllocaIP =
2701 InsertPointTy(Builder.GetInsertBlock(),
2702 Builder.GetInsertBlock()->getFirstInsertionPt());
2705 Builder.restoreIP(AllocaIP);
2706 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2707 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2709 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2710 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2711 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2712 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2713 NumWarpsAlloca, Builder.getPtrTy(0),
2714 NumWarpsAlloca->
getName() +
".ascast");
2715 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2716 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2718 InsertPointTy CodeGenIP =
2720 Builder.restoreIP(CodeGenIP);
2723 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2725 for (
auto En :
enumerate(ReductionInfos)) {
2730 const ReductionInfo &RI = En.value();
2731 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2732 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2735 unsigned NumIters = RealTySize / TySize;
2738 Value *Cnt =
nullptr;
2739 Value *CntAddr =
nullptr;
2743 CodeGenIP = Builder.saveIP();
2744 Builder.restoreIP(AllocaIP);
2746 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2748 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2749 CntAddr->
getName() +
".ascast");
2750 Builder.restoreIP(CodeGenIP);
2757 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2758 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2760 Value *
Cmp = Builder.CreateICmpULT(
2761 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2762 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2763 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2767 InsertPointOrErrorTy BarrierIP1 =
2768 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2769 omp::Directive::OMPD_unknown,
2773 return BarrierIP1.takeError();
2779 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2780 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2781 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2784 auto *RedListArrayTy =
2786 Type *IndexTy = Builder.getIndexTy(
2787 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2789 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2790 {ConstantInt::get(IndexTy, 0),
2791 ConstantInt::get(IndexTy, En.index())});
2793 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2795 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2799 Value *MediumPtr = Builder.CreateInBoundsGEP(
2800 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2803 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2805 Builder.CreateStore(Elem, MediumPtr,
2807 Builder.CreateBr(MergeBB);
2810 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2811 Builder.CreateBr(MergeBB);
2814 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2815 InsertPointOrErrorTy BarrierIP2 =
2816 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2817 omp::Directive::OMPD_unknown,
2821 return BarrierIP2.takeError();
2828 Value *NumWarpsVal =
2829 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2831 Value *IsActiveThread =
2832 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2833 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2835 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2839 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2840 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2842 Value *TargetElemPtrPtr =
2843 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2844 {ConstantInt::get(IndexTy, 0),
2845 ConstantInt::get(IndexTy, En.index())});
2846 Value *TargetElemPtrVal =
2847 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2848 Value *TargetElemPtr = TargetElemPtrVal;
2851 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2854 Value *SrcMediumValue =
2855 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2856 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2857 Builder.CreateBr(W0MergeBB);
2859 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2860 Builder.CreateBr(W0MergeBB);
2862 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2865 Cnt = Builder.CreateNSWAdd(
2866 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2867 Builder.CreateStore(Cnt, CntAddr,
false);
2869 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2870 emitBranch(PrecondBB);
2871 emitBlock(ExitBB, CurFn);
2873 RealTySize %= TySize;
2877 Builder.CreateRetVoid();
2878 Builder.restoreIP(SavedIP);
2883Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2885 AttributeList FuncAttrs) {
2889 {Builder.getPtrTy(), Builder.getInt16Ty(),
2890 Builder.getInt16Ty(), Builder.getInt16Ty()},
2894 "_omp_reduction_shuffle_and_reduce_func", &M);
2904 Builder.SetInsertPoint(EntryBB);
2915 Type *ReduceListArgType = ReduceListArg->
getType();
2917 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2918 Value *ReduceListAlloca = Builder.CreateAlloca(
2919 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2920 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2921 LaneIDArg->
getName() +
".addr");
2922 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2923 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2924 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2925 AlgoVerArg->
getName() +
".addr");
2931 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2932 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2934 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2935 ReduceListAlloca, ReduceListArgType,
2936 ReduceListAlloca->
getName() +
".ascast");
2937 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2938 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2939 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2940 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2941 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2942 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2943 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2944 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2945 RemoteReductionListAlloca, Builder.getPtrTy(),
2946 RemoteReductionListAlloca->
getName() +
".ascast");
2948 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2949 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2950 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2951 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2953 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2954 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2955 Value *RemoteLaneOffset =
2956 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2957 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2964 emitReductionListCopy(
2965 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2966 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2989 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2990 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2991 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2992 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2993 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
2994 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
2995 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
2996 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
2997 Value *RemoteOffsetComp =
2998 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
2999 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3000 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3001 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3007 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3008 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3009 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3010 ReduceList, Builder.getPtrTy());
3011 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3012 RemoteListAddrCast, Builder.getPtrTy());
3013 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3014 ->addFnAttr(Attribute::NoUnwind);
3015 Builder.CreateBr(MergeBB);
3017 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3018 Builder.CreateBr(MergeBB);
3020 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3024 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3025 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3026 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3031 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3033 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3034 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3035 ReductionInfos, RemoteListAddrCast, ReduceList);
3036 Builder.CreateBr(CpyMergeBB);
3038 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3039 Builder.CreateBr(CpyMergeBB);
3041 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3043 Builder.CreateRetVoid();
3048Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3050 AttributeList FuncAttrs) {
3051 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3054 Builder.getVoidTy(),
3055 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3059 "_omp_reduction_list_to_global_copy_func", &M);
3066 Builder.SetInsertPoint(EntryBlock);
3075 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3076 BufferArg->
getName() +
".addr");
3077 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3079 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3080 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3081 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3082 BufferArgAlloca, Builder.getPtrTy(),
3083 BufferArgAlloca->
getName() +
".ascast");
3084 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3085 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3086 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3087 ReduceListArgAlloca, Builder.getPtrTy(),
3088 ReduceListArgAlloca->
getName() +
".ascast");
3090 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3091 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3092 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3094 Value *LocalReduceList =
3095 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3096 Value *BufferArgVal =
3097 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3098 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3099 Type *IndexTy = Builder.getIndexTy(
3100 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3101 for (
auto En :
enumerate(ReductionInfos)) {
3102 const ReductionInfo &RI = En.value();
3103 auto *RedListArrayTy =
3106 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3107 RedListArrayTy, LocalReduceList,
3108 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3110 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3114 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3115 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3116 ReductionsBufferTy, BufferVD, 0, En.index());
3118 switch (RI.EvaluationKind) {
3119 case EvalKind::Scalar: {
3120 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3121 Builder.CreateStore(TargetElement, GlobVal);
3124 case EvalKind::Complex: {
3125 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3126 RI.ElementType, ElemPtr, 0, 0,
".realp");
3127 Value *SrcReal = Builder.CreateLoad(
3128 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3129 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3130 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3131 Value *SrcImg = Builder.CreateLoad(
3132 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3134 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3135 RI.ElementType, GlobVal, 0, 0,
".realp");
3136 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3137 RI.ElementType, GlobVal, 0, 1,
".imagp");
3138 Builder.CreateStore(SrcReal, DestRealPtr);
3139 Builder.CreateStore(SrcImg, DestImgPtr);
3142 case EvalKind::Aggregate: {
3144 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3145 Builder.CreateMemCpy(
3146 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3147 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3153 Builder.CreateRetVoid();
3154 Builder.restoreIP(OldIP);
3158Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3160 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3161 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3164 Builder.getVoidTy(),
3165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3169 "_omp_reduction_list_to_global_reduce_func", &M);
3176 Builder.SetInsertPoint(EntryBlock);
3185 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3186 BufferArg->
getName() +
".addr");
3187 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3189 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3191 auto *RedListArrayTy =
3196 Value *LocalReduceList =
3197 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3199 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3200 BufferArgAlloca, Builder.getPtrTy(),
3201 BufferArgAlloca->
getName() +
".ascast");
3202 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3203 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3204 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 ReduceListArgAlloca, Builder.getPtrTy(),
3206 ReduceListArgAlloca->
getName() +
".ascast");
3207 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3208 LocalReduceList, Builder.getPtrTy(),
3209 LocalReduceList->
getName() +
".ascast");
3211 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3212 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3213 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3215 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3216 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3217 Type *IndexTy = Builder.getIndexTy(
3218 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3219 for (
auto En :
enumerate(ReductionInfos)) {
3220 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3221 RedListArrayTy, LocalReduceListAddrCast,
3222 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3224 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3226 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3227 ReductionsBufferTy, BufferVD, 0, En.index());
3228 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3233 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3234 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3235 ->addFnAttr(Attribute::NoUnwind);
3236 Builder.CreateRetVoid();
3237 Builder.restoreIP(OldIP);
3241Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3243 AttributeList FuncAttrs) {
3244 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3247 Builder.getVoidTy(),
3248 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3252 "_omp_reduction_global_to_list_copy_func", &M);
3259 Builder.SetInsertPoint(EntryBlock);
3268 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3269 BufferArg->
getName() +
".addr");
3270 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3272 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3273 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3274 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3275 BufferArgAlloca, Builder.getPtrTy(),
3276 BufferArgAlloca->
getName() +
".ascast");
3277 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3278 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3279 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3280 ReduceListArgAlloca, Builder.getPtrTy(),
3281 ReduceListArgAlloca->
getName() +
".ascast");
3282 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3283 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3284 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3286 Value *LocalReduceList =
3287 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3288 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3289 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3290 Type *IndexTy = Builder.getIndexTy(
3291 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3292 for (
auto En :
enumerate(ReductionInfos)) {
3293 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3294 auto *RedListArrayTy =
3297 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3298 RedListArrayTy, LocalReduceList,
3299 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3301 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3304 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3305 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3306 ReductionsBufferTy, BufferVD, 0, En.index());
3308 switch (RI.EvaluationKind) {
3309 case EvalKind::Scalar: {
3310 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3311 Builder.CreateStore(TargetElement, ElemPtr);
3314 case EvalKind::Complex: {
3315 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3316 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3317 Value *SrcReal = Builder.CreateLoad(
3318 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3319 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3320 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3321 Value *SrcImg = Builder.CreateLoad(
3322 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3324 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3325 RI.ElementType, ElemPtr, 0, 0,
".realp");
3326 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3327 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3328 Builder.CreateStore(SrcReal, DestRealPtr);
3329 Builder.CreateStore(SrcImg, DestImgPtr);
3332 case EvalKind::Aggregate: {
3334 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3335 Builder.CreateMemCpy(
3336 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3337 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3344 Builder.CreateRetVoid();
3345 Builder.restoreIP(OldIP);
3349Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3351 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3352 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3355 Builder.getVoidTy(),
3356 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3360 "_omp_reduction_global_to_list_reduce_func", &M);
3367 Builder.SetInsertPoint(EntryBlock);
3376 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3377 BufferArg->
getName() +
".addr");
3378 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3380 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3381 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3387 Value *LocalReduceList =
3388 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3390 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3391 BufferArgAlloca, Builder.getPtrTy(),
3392 BufferArgAlloca->
getName() +
".ascast");
3393 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3394 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3395 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3396 ReduceListArgAlloca, Builder.getPtrTy(),
3397 ReduceListArgAlloca->
getName() +
".ascast");
3398 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3399 LocalReduceList, Builder.getPtrTy(),
3400 LocalReduceList->
getName() +
".ascast");
3402 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3403 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3404 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3406 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3407 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3408 Type *IndexTy = Builder.getIndexTy(
3409 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3410 for (
auto En :
enumerate(ReductionInfos)) {
3411 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3412 RedListArrayTy, ReductionList,
3413 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3416 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3417 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3418 ReductionsBufferTy, BufferVD, 0, En.index());
3419 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3424 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3425 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3426 ->addFnAttr(Attribute::NoUnwind);
3427 Builder.CreateRetVoid();
3428 Builder.restoreIP(OldIP);
3432std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3433 std::string Suffix =
3434 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3435 return (Name + Suffix).
str();
3440 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3442 {Builder.getPtrTy(), Builder.getPtrTy()},
3444 std::string
Name = getReductionFuncName(ReducerName);
3452 Builder.SetInsertPoint(EntryBB);
3456 Value *LHSArrayPtr =
nullptr;
3457 Value *RHSArrayPtr =
nullptr;
3464 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3466 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3467 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3468 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3469 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3470 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3471 Builder.CreateStore(Arg0, LHSAddrCast);
3472 Builder.CreateStore(Arg1, RHSAddrCast);
3473 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3474 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3477 Type *IndexTy = Builder.getIndexTy(
3478 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3480 for (
auto En :
enumerate(ReductionInfos)) {
3481 const ReductionInfo &RI = En.value();
3482 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3483 RedArrayTy, RHSArrayPtr,
3484 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3485 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3486 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3487 RHSI8Ptr, RI.PrivateVariable->getType(),
3488 RHSI8Ptr->
getName() +
".ascast");
3490 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3491 RedArrayTy, LHSArrayPtr,
3492 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3493 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3494 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3495 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3497 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3501 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3502 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3504 InsertPointOrErrorTy AfterIP =
3505 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3507 return AfterIP.takeError();
3508 if (!Builder.GetInsertBlock())
3509 return ReductionFunc;
3511 Builder.restoreIP(*AfterIP);
3512 Builder.CreateStore(Reduced, LHSPtr);
3516 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3517 for (
auto En :
enumerate(ReductionInfos)) {
3518 unsigned Index = En.index();
3519 const ReductionInfo &RI = En.value();
3520 Value *LHSFixupPtr, *RHSFixupPtr;
3521 Builder.restoreIP(RI.ReductionGenClang(
3522 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3527 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3532 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3538 Builder.CreateRetVoid();
3539 return ReductionFunc;
3545 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3547 assert(RI.Variable &&
"expected non-null variable");
3548 assert(RI.PrivateVariable &&
"expected non-null private variable");
3549 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3550 "expected non-null reduction generator callback");
3553 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3554 "expected variables and their private equivalents to have the same "
3557 assert(RI.Variable->getType()->isPointerTy() &&
3558 "expected variables to be pointers");
3562OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3563 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3565 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3566 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3567 Value *SrcLocInfo) {
3568 if (!updateToLocation(
Loc))
3569 return InsertPointTy();
3570 Builder.restoreIP(CodeGenIP);
3577 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3578 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3581 if (ReductionInfos.
size() == 0)
3582 return Builder.saveIP();
3585 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3591 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3595 AttributeList FuncAttrs;
3596 AttrBuilder AttrBldr(Ctx);
3598 AttrBldr.addAttribute(Attr);
3599 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3600 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3602 CodeGenIP = Builder.saveIP();
3604 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3605 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3606 if (!ReductionResult)
3608 Function *ReductionFunc = *ReductionResult;
3609 Builder.restoreIP(CodeGenIP);
3612 if (GridValue.has_value())
3613 Config.setGridValue(GridValue.value());
3628 Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
3630 CodeGenIP = Builder.saveIP();
3631 Builder.restoreIP(AllocaIP);
3632 Value *ReductionListAlloca =
3633 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3634 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3635 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3636 Builder.restoreIP(CodeGenIP);
3637 Type *IndexTy = Builder.getIndexTy(
3638 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3639 for (
auto En :
enumerate(ReductionInfos)) {
3640 const ReductionInfo &RI = En.value();
3641 Value *ElemPtr = Builder.CreateInBoundsGEP(
3642 RedArrayTy, ReductionList,
3643 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3645 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3646 Builder.CreateStore(CastElem, ElemPtr);
3648 CodeGenIP = Builder.saveIP();
3650 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3652 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3656 Builder.restoreIP(CodeGenIP);
3658 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3660 unsigned MaxDataSize = 0;
3662 for (
auto En :
enumerate(ReductionInfos)) {
3663 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3664 if (
Size > MaxDataSize)
3666 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3668 Value *ReductionDataSize =
3669 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3670 if (!IsTeamsReduction) {
3671 Value *SarFuncCast =
3672 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, FuncPtrTy);
3674 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
3675 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3677 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3678 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3679 Res = Builder.CreateCall(Pv2Ptr, Args);
3681 CodeGenIP = Builder.saveIP();
3683 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3684 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3685 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3686 Function *LtGCFunc = emitListToGlobalCopyFunction(
3687 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3688 Function *LtGRFunc = emitListToGlobalReduceFunction(
3689 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3690 Function *GtLCFunc = emitGlobalToListCopyFunction(
3691 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3692 Function *GtLRFunc = emitGlobalToListReduceFunction(
3693 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3694 Builder.restoreIP(CodeGenIP);
3696 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3697 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3699 Value *Args3[] = {SrcLocInfo,
3700 KernelTeamsReductionPtr,
3701 Builder.getInt32(ReductionBufNum),
3711 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3712 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3713 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3719 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3720 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3726 emitBlock(ThenBB, CurFunc);
3729 for (
auto En :
enumerate(ReductionInfos)) {
3730 const ReductionInfo &RI = En.value();
3733 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3735 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3736 Value *LHSPtr, *RHSPtr;
3737 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3738 &LHSPtr, &RHSPtr, CurFunc));
3751 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3752 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3754 InsertPointOrErrorTy AfterIP =
3755 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3757 return AfterIP.takeError();
3758 Builder.restoreIP(*AfterIP);
3759 Builder.CreateStore(Reduced,
LHS,
false);
3762 emitBlock(ExitBB, CurFunc);
3763 if (ContinuationBlock) {
3764 Builder.CreateBr(ContinuationBlock);
3765 Builder.SetInsertPoint(ContinuationBlock);
3767 Config.setEmitLLVMUsed();
3769 return Builder.saveIP();
3778 ".omp.reduction.func", &M);
3788 Builder.SetInsertPoint(ReductionFuncBlock);
3789 Value *LHSArrayPtr =
nullptr;
3790 Value *RHSArrayPtr =
nullptr;
3801 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3803 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3804 Value *LHSAddrCast =
3805 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3806 Value *RHSAddrCast =
3807 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3808 Builder.CreateStore(Arg0, LHSAddrCast);
3809 Builder.CreateStore(Arg1, RHSAddrCast);
3810 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3811 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3813 LHSArrayPtr = ReductionFunc->
getArg(0);
3814 RHSArrayPtr = ReductionFunc->
getArg(1);
3817 unsigned NumReductions = ReductionInfos.
size();
3820 for (
auto En :
enumerate(ReductionInfos)) {
3821 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3822 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3823 RedArrayTy, LHSArrayPtr, 0, En.index());
3824 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3825 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3826 LHSI8Ptr, RI.Variable->
getType());
3827 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3828 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3829 RedArrayTy, RHSArrayPtr, 0, En.index());
3830 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3831 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3832 RHSI8Ptr, RI.PrivateVariable->
getType());
3833 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3835 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3836 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3838 return AfterIP.takeError();
3840 Builder.restoreIP(*AfterIP);
3842 if (!Builder.GetInsertBlock())
3846 if (!IsByRef[En.index()])
3847 Builder.CreateStore(Reduced, LHSPtr);
3849 Builder.CreateRetVoid();
3853OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3854 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3856 bool IsNoWait,
bool IsTeamsReduction) {
3859 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3860 IsNoWait, IsTeamsReduction);
3864 if (!updateToLocation(
Loc))
3865 return InsertPointTy();
3867 if (ReductionInfos.
size() == 0)
3868 return Builder.saveIP();
3877 unsigned NumReductions = ReductionInfos.
size();
3879 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3880 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3882 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3884 for (
auto En :
enumerate(ReductionInfos)) {
3885 unsigned Index = En.index();
3886 const ReductionInfo &RI = En.value();
3887 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3888 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3889 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3894 Type *IndexTy = Builder.getIndexTy(
3895 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3896 Function *
Func = Builder.GetInsertBlock()->getParent();
3899 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3900 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3901 return RI.AtomicReductionGen;
3903 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3905 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3907 Value *ThreadId = getOrCreateThreadID(Ident);
3908 Constant *NumVariables = Builder.getInt32(NumReductions);
3910 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3911 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3913 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3914 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3915 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3916 : RuntimeFunction::OMPRTL___kmpc_reduce);
3918 Builder.CreateCall(ReduceFunc,
3919 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3920 ReductionFunc, Lock},
3931 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3932 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3933 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3938 Builder.SetInsertPoint(NonAtomicRedBlock);
3939 for (
auto En :
enumerate(ReductionInfos)) {
3940 const ReductionInfo &RI = En.value();
3944 Value *RedValue = RI.Variable;
3945 if (!IsByRef[En.index()]) {
3946 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3947 "red.value." +
Twine(En.index()));
3949 Value *PrivateRedValue =
3950 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3951 "red.private.value." +
Twine(En.index()));
3953 InsertPointOrErrorTy AfterIP =
3954 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3956 return AfterIP.takeError();
3957 Builder.restoreIP(*AfterIP);
3959 if (!Builder.GetInsertBlock())
3960 return InsertPointTy();
3962 if (!IsByRef[En.index()])
3963 Builder.CreateStore(Reduced, RI.Variable);
3965 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3966 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3967 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3968 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3969 Builder.CreateBr(ContinuationBlock);
3974 Builder.SetInsertPoint(AtomicRedBlock);
3975 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3976 for (
const ReductionInfo &RI : ReductionInfos) {
3977 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3978 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3980 return AfterIP.takeError();
3981 Builder.restoreIP(*AfterIP);
3982 if (!Builder.GetInsertBlock())
3983 return InsertPointTy();
3985 Builder.CreateBr(ContinuationBlock);
3987 Builder.CreateUnreachable();
3998 if (!Builder.GetInsertBlock())
3999 return InsertPointTy();
4001 Builder.SetInsertPoint(ContinuationBlock);
4002 return Builder.saveIP();
4005OpenMPIRBuilder::InsertPointOrErrorTy
4006OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4007 BodyGenCallbackTy BodyGenCB,
4008 FinalizeCallbackTy FiniCB) {
4009 if (!updateToLocation(
Loc))
4012 Directive OMPD = Directive::OMPD_master;
4014 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4015 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4016 Value *ThreadId = getOrCreateThreadID(Ident);
4019 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4020 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4022 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4023 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4025 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4029OpenMPIRBuilder::InsertPointOrErrorTy
4030OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4031 BodyGenCallbackTy BodyGenCB,
4033 if (!updateToLocation(
Loc))
4036 Directive OMPD = Directive::OMPD_masked;
4038 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4039 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4040 Value *ThreadId = getOrCreateThreadID(Ident);
4042 Value *ArgsEnd[] = {Ident, ThreadId};
4044 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4045 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4047 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4048 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4050 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4060 Call->setDoesNotThrow();
4072OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4073 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4075 bool IsInclusive, ScanInfo *ScanRedInfo) {
4076 if (ScanRedInfo->OMPFirstScanLoop) {
4077 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4078 ScanVarsType, ScanRedInfo);
4082 if (!updateToLocation(
Loc))
4087 if (ScanRedInfo->OMPFirstScanLoop) {
4089 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4090 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4091 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4092 Type *DestTy = ScanVarsType[i];
4093 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4094 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4096 Builder.CreateStore(Src, Val);
4099 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4100 emitBlock(ScanRedInfo->OMPScanDispatch,
4101 Builder.GetInsertBlock()->getParent());
4103 if (!ScanRedInfo->OMPFirstScanLoop) {
4104 IV = ScanRedInfo->IV;
4107 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4108 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4109 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4110 Type *DestTy = ScanVarsType[i];
4112 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4113 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4114 Builder.CreateStore(Src, ScanVars[i]);
4120 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4121 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4122 ScanRedInfo->OMPAfterScanBlock);
4124 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4125 ScanRedInfo->OMPBeforeScanBlock);
4127 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4128 Builder.GetInsertBlock()->getParent());
4129 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4130 return Builder.saveIP();
4133Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4137 Builder.restoreIP(AllocaIP);
4139 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4141 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4142 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4146 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4147 InsertPointTy CodeGenIP) ->
Error {
4148 Builder.restoreIP(CodeGenIP);
4150 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4151 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4155 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4156 AllocSpan,
nullptr,
"arr");
4157 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4165 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4167 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4168 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4171 return AfterIP.takeError();
4172 Builder.restoreIP(*AfterIP);
4173 BasicBlock *InputBB = Builder.GetInsertBlock();
4175 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4176 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4178 return AfterIP.takeError();
4179 Builder.restoreIP(*AfterIP);
4184Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4186 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4187 InsertPointTy CodeGenIP) ->
Error {
4188 Builder.restoreIP(CodeGenIP);
4189 for (ReductionInfo RedInfo : ReductionInfos) {
4190 Value *PrivateVar = RedInfo.PrivateVariable;
4191 Value *OrigVar = RedInfo.Variable;
4192 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4193 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4195 Type *SrcTy = RedInfo.ElementType;
4196 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4198 Value *Src = Builder.CreateLoad(SrcTy, Val);
4200 Builder.CreateStore(Src, OrigVar);
4201 Builder.CreateFree(Buff);
4209 if (ScanRedInfo->OMPScanFinish->getTerminator())
4210 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4212 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4215 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4216 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4219 return AfterIP.takeError();
4220 Builder.restoreIP(*AfterIP);
4221 BasicBlock *InputBB = Builder.GetInsertBlock();
4223 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4224 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4226 return AfterIP.takeError();
4227 Builder.restoreIP(*AfterIP);
4231OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4232 const LocationDescription &
Loc,
4234 ScanInfo *ScanRedInfo) {
4236 if (!updateToLocation(
Loc))
4238 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4239 InsertPointTy CodeGenIP) ->
Error {
4240 Builder.restoreIP(CodeGenIP);
4246 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4248 Builder.GetInsertBlock()->getModule(),
4252 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4255 Builder.GetInsertBlock()->getModule(),
4258 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4261 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4262 Builder.SetInsertPoint(InputBB);
4263 Builder.CreateBr(LoopBB);
4264 emitBlock(LoopBB, CurFn);
4265 Builder.SetInsertPoint(LoopBB);
4267 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4269 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4270 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4272 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4280 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4281 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4282 emitBlock(InnerLoopBB, CurFn);
4283 Builder.SetInsertPoint(InnerLoopBB);
4284 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4286 for (ReductionInfo RedInfo : ReductionInfos) {
4287 Value *ReductionVal = RedInfo.PrivateVariable;
4288 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4289 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4290 Type *DestTy = RedInfo.ElementType;
4291 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4293 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4294 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4296 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4297 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4298 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4300 InsertPointOrErrorTy AfterIP =
4301 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4303 return AfterIP.takeError();
4304 Builder.CreateStore(Result, LHSPtr);
4307 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4308 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4309 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4310 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4311 emitBlock(InnerExitBB, CurFn);
4313 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4316 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4317 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4319 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4329 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4330 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4333 return AfterIP.takeError();
4334 Builder.restoreIP(*AfterIP);
4335 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4338 return AfterIP.takeError();
4339 Builder.restoreIP(*AfterIP);
4340 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4347Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4350 ScanInfo *ScanRedInfo) {
4358 ScanRedInfo->OMPFirstScanLoop =
true;
4359 Error Err = InputLoopGen();
4369 ScanRedInfo->OMPFirstScanLoop =
false;
4370 Error Err = ScanLoopGen(Builder.saveIP());
4377void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4378 Function *
Fun = Builder.GetInsertBlock()->getParent();
4379 ScanRedInfo->OMPScanDispatch =
4381 ScanRedInfo->OMPAfterScanBlock =
4383 ScanRedInfo->OMPBeforeScanBlock =
4385 ScanRedInfo->OMPScanLoopExit =
4388CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4412 Builder.SetCurrentDebugLocation(
DL);
4414 Builder.SetInsertPoint(Preheader);
4415 Builder.CreateBr(Header);
4417 Builder.SetInsertPoint(Header);
4418 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4419 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4420 Builder.CreateBr(
Cond);
4422 Builder.SetInsertPoint(
Cond);
4424 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4425 Builder.CreateCondBr(Cmp, Body, Exit);
4427 Builder.SetInsertPoint(Body);
4428 Builder.CreateBr(Latch);
4430 Builder.SetInsertPoint(Latch);
4431 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4432 "omp_" + Name +
".next",
true);
4433 Builder.CreateBr(Header);
4436 Builder.SetInsertPoint(Exit);
4437 Builder.CreateBr(After);
4440 LoopInfos.emplace_front();
4441 CanonicalLoopInfo *CL = &LoopInfos.front();
4443 CL->Header = Header;
4455OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4456 LoopBodyGenCallbackTy BodyGenCB,
4461 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4462 NextBB, NextBB, Name);
4466 if (updateToLocation(
Loc)) {
4470 spliceBB(Builder, After,
false);
4471 Builder.CreateBr(CL->getPreheader());
4476 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4486 ScanInfos.emplace_front();
4487 ScanInfo *
Result = &ScanInfos.front();
4492OpenMPIRBuilder::createCanonicalScanLoops(
4493 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4494 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4495 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4496 LocationDescription ComputeLoc =
4497 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4498 updateToLocation(ComputeLoc);
4502 Value *TripCount = calculateCanonicalLoopTripCount(
4503 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4504 ScanRedInfo->Span = TripCount;
4505 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4506 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4508 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4509 Builder.restoreIP(CodeGenIP);
4510 ScanRedInfo->IV =
IV;
4511 createScanBBs(ScanRedInfo);
4512 BasicBlock *InputBlock = Builder.GetInsertBlock();
4516 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4517 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4518 Builder.GetInsertBlock()->getParent());
4519 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4520 emitBlock(ScanRedInfo->OMPScanLoopExit,
4521 Builder.GetInsertBlock()->getParent());
4522 Builder.CreateBr(ContinueBlock);
4523 Builder.SetInsertPoint(
4524 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4525 return BodyGenCB(Builder.saveIP(),
IV);
4528 const auto &&InputLoopGen = [&]() ->
Error {
4530 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4531 ComputeIP, Name,
true, ScanRedInfo);
4535 Builder.restoreIP((*LoopInfo)->getAfterIP());
4538 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4540 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4541 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4545 Builder.restoreIP((*LoopInfo)->getAfterIP());
4546 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4549 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4555Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4557 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4567 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4568 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4570 updateToLocation(
Loc);
4587 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4588 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4589 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4590 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4591 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4592 ZeroCmp = Builder.CreateICmp(
4595 Span = Builder.CreateSub(Stop, Start,
"",
true);
4596 ZeroCmp = Builder.CreateICmp(
4600 Value *CountIfLooping;
4601 if (InclusiveStop) {
4602 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4605 Value *CountIfTwo = Builder.CreateAdd(
4606 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4608 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4611 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4612 "omp_" + Name +
".tripcount");
4616 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4617 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4618 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4619 ScanInfo *ScanRedInfo) {
4620 LocationDescription ComputeLoc =
4621 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4623 Value *TripCount = calculateCanonicalLoopTripCount(
4624 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4626 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4627 Builder.restoreIP(CodeGenIP);
4628 Value *Span = Builder.CreateMul(
IV, Step);
4629 Value *IndVar = Builder.CreateAdd(Span, Start);
4631 ScanRedInfo->IV = IndVar;
4632 return BodyGenCB(Builder.saveIP(), IndVar);
4634 LocationDescription LoopLoc =
4637 : LocationDescription(Builder.saveIP(),
4638 Builder.getCurrentDebugLocation());
4639 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4648 OpenMPIRBuilder &OMPBuilder) {
4649 unsigned Bitwidth = Ty->getIntegerBitWidth();
4651 return OMPBuilder.getOrCreateRuntimeFunction(
4652 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4654 return OMPBuilder.getOrCreateRuntimeFunction(
4655 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4664 OpenMPIRBuilder &OMPBuilder) {
4665 unsigned Bitwidth = Ty->getIntegerBitWidth();
4667 return OMPBuilder.getOrCreateRuntimeFunction(
4668 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4670 return OMPBuilder.getOrCreateRuntimeFunction(
4671 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4675OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4676 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4678 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4680 "Require dedicated allocate IP");
4683 Builder.restoreIP(CLI->getPreheaderIP());
4684 Builder.SetCurrentDebugLocation(
DL);
4687 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4688 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4692 Type *IVTy =
IV->getType();
4694 LoopType == WorksharingLoopType::DistributeForStaticLoop
4698 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4701 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4704 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4705 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4706 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4707 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4708 CLI->setLastIter(PLastIter);
4714 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4716 Constant *One = ConstantInt::get(IVTy, 1);
4717 Builder.CreateStore(Zero, PLowerBound);
4718 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4719 Builder.CreateStore(UpperBound, PUpperBound);
4720 Builder.CreateStore(One, PStride);
4722 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4725 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4726 ? OMPScheduleType::OrderedDistribute
4729 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4734 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4735 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4736 Value *PDistUpperBound =
4737 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4738 Args.push_back(PDistUpperBound);
4741 Builder.CreateCall(StaticInit, Args);
4742 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4743 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4744 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4745 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4746 CLI->setTripCount(TripCount);
4753 Builder.SetInsertPoint(CLI->getBody(),
4754 CLI->getBody()->getFirstInsertionPt());
4755 Builder.SetCurrentDebugLocation(
DL);
4756 return Builder.CreateAdd(OldIV, LowerBound);
4760 Builder.SetInsertPoint(CLI->getExit(),
4761 CLI->getExit()->getTerminator()->getIterator());
4762 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4766 InsertPointOrErrorTy BarrierIP =
4767 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4768 omp::Directive::OMPD_for,
false,
4771 return BarrierIP.takeError();
4774 InsertPointTy AfterIP = CLI->getAfterIP();
4780OpenMPIRBuilder::InsertPointOrErrorTy
4781OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4782 CanonicalLoopInfo *CLI,
4783 InsertPointTy AllocaIP,
4786 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4787 assert(ChunkSize &&
"Chunk size is required");
4789 LLVMContext &Ctx = CLI->getFunction()->getContext();
4791 Value *OrigTripCount = CLI->getTripCount();
4792 Type *IVTy =
IV->getType();
4794 "Max supported tripcount bitwidth is 64 bits");
4796 :
Type::getInt64Ty(Ctx);
4799 Constant *One = ConstantInt::get(InternalIVTy, 1);
4805 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4808 Builder.restoreIP(AllocaIP);
4809 Builder.SetCurrentDebugLocation(
DL);
4810 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4811 Value *PLowerBound =
4812 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4813 Value *PUpperBound =
4814 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4815 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4816 CLI->setLastIter(PLastIter);
4819 Builder.restoreIP(CLI->getPreheaderIP());
4820 Builder.SetCurrentDebugLocation(
DL);
4823 Value *CastedChunkSize =
4824 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4825 Value *CastedTripCount =
4826 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4828 Constant *SchedulingType = ConstantInt::get(
4829 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4830 Builder.CreateStore(Zero, PLowerBound);
4831 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4832 Builder.CreateStore(OrigUpperBound, PUpperBound);
4833 Builder.CreateStore(One, PStride);
4838 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4839 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4840 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4841 Builder.CreateCall(StaticInit,
4843 SchedulingType, PLastIter,
4844 PLowerBound, PUpperBound,
4849 Value *FirstChunkStart =
4850 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4851 Value *FirstChunkStop =
4852 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4853 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4855 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4856 Value *NextChunkStride =
4857 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4860 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4861 Value *DispatchCounter;
4866 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4867 {Builder.saveIP(),
DL},
4868 [&](InsertPointTy BodyIP,
Value *Counter) {
4869 DispatchCounter = Counter;
4872 FirstChunkStart, CastedTripCount, NextChunkStride,
4878 BasicBlock *DispatchBody = DispatchCLI->getBody();
4879 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4880 BasicBlock *DispatchExit = DispatchCLI->getExit();
4881 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4882 DispatchCLI->invalidate();
4890 Builder.restoreIP(CLI->getPreheaderIP());
4891 Builder.SetCurrentDebugLocation(
DL);
4894 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4895 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4896 Value *IsLastChunk =
4897 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4898 Value *CountUntilOrigTripCount =
4899 Builder.CreateSub(CastedTripCount, DispatchCounter);
4900 Value *ChunkTripCount = Builder.CreateSelect(
4901 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4902 Value *BackcastedChunkTC =
4903 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4904 CLI->setTripCount(BackcastedChunkTC);
4909 Value *BackcastedDispatchCounter =
4910 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4912 Builder.restoreIP(CLI->getBodyIP());
4913 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4918 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4922 InsertPointOrErrorTy AfterIP =
4923 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4926 return AfterIP.takeError();
4944 unsigned Bitwidth = Ty->getIntegerBitWidth();
4945 Module &M = OMPBuilder->M;
4947 case WorksharingLoopType::ForStaticLoop:
4949 return OMPBuilder->getOrCreateRuntimeFunction(
4950 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4952 return OMPBuilder->getOrCreateRuntimeFunction(
4953 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4955 case WorksharingLoopType::DistributeStaticLoop:
4957 return OMPBuilder->getOrCreateRuntimeFunction(
4958 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4960 return OMPBuilder->getOrCreateRuntimeFunction(
4961 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4963 case WorksharingLoopType::DistributeForStaticLoop:
4965 return OMPBuilder->getOrCreateRuntimeFunction(
4966 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4968 return OMPBuilder->getOrCreateRuntimeFunction(
4969 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4972 if (Bitwidth != 32 && Bitwidth != 64) {
4984 Function &LoopBodyFn,
bool NoLoop) {
4986 Module &M = OMPBuilder->M;
4995 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4996 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4997 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
4998 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4999 Builder.CreateCall(RTLFn, RealArgs);
5002 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5003 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5004 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5005 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5008 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5009 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5010 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5011 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5012 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5014 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5017 Builder.CreateCall(RTLFn, RealArgs);
5021 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5026 Value *TripCount = CLI->getTripCount();
5032 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5033 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5038 Builder.restoreIP({Preheader, Preheader->
end()});
5041 Builder.CreateBr(CLI->getExit());
5044 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5047 CleanUpInfo.EntryBB = CLI->getHeader();
5048 CleanUpInfo.ExitBB = CLI->getExit();
5049 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5057 "Expected unique undroppable user of outlined function");
5059 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5061 "Expected outlined function call to be located in loop preheader");
5063 if (OutlinedFnCallInstruction->
arg_size() > 1)
5070 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5072 for (
auto &ToBeDeletedItem : ToBeDeleted)
5073 ToBeDeletedItem->eraseFromParent();
5077OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5078 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5081 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5082 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5085 OI.OuterAllocaBB = CLI->getPreheader();
5091 OI.OuterAllocaBB = AllocaIP.getBlock();
5094 OI.EntryBB = CLI->getBody();
5095 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5096 "omp.prelatch",
true);
5099 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5103 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5105 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5116 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5127 CLI->getPreheader(),
5136 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5142 CLI->getIndVar()->user_end());
5145 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5146 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5152 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5159 OI.PostOutlineCB = [=, ToBeDeletedVec =
5160 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5164 addOutlineInfo(std::move(OI));
5165 return CLI->getAfterIP();
5168OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5169 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5170 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5171 bool HasSimdModifier,
bool HasMonotonicModifier,
5172 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5174 if (Config.isTargetDevice())
5175 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5177 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5178 HasNonmonotonicModifier, HasOrderedClause);
5180 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5181 OMPScheduleType::ModifierOrdered;
5182 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5183 case OMPScheduleType::BaseStatic:
5184 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5186 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5187 NeedsBarrier, ChunkSize);
5189 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5191 case OMPScheduleType::BaseStaticChunked:
5193 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5194 NeedsBarrier, ChunkSize);
5196 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5199 case OMPScheduleType::BaseRuntime:
5200 case OMPScheduleType::BaseAuto:
5201 case OMPScheduleType::BaseGreedy:
5202 case OMPScheduleType::BaseBalanced:
5203 case OMPScheduleType::BaseSteal:
5204 case OMPScheduleType::BaseGuidedSimd:
5205 case OMPScheduleType::BaseRuntimeSimd:
5207 "schedule type does not support user-defined chunk sizes");
5209 case OMPScheduleType::BaseDynamicChunked:
5210 case OMPScheduleType::BaseGuidedChunked:
5211 case OMPScheduleType::BaseGuidedIterativeChunked:
5212 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5213 case OMPScheduleType::BaseStaticBalancedChunked:
5214 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5215 NeedsBarrier, ChunkSize);
5228 unsigned Bitwidth = Ty->getIntegerBitWidth();
5230 return OMPBuilder.getOrCreateRuntimeFunction(
5231 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5233 return OMPBuilder.getOrCreateRuntimeFunction(
5234 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5244 unsigned Bitwidth = Ty->getIntegerBitWidth();
5246 return OMPBuilder.getOrCreateRuntimeFunction(
5247 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5249 return OMPBuilder.getOrCreateRuntimeFunction(
5250 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5259 unsigned Bitwidth = Ty->getIntegerBitWidth();
5261 return OMPBuilder.getOrCreateRuntimeFunction(
5262 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5264 return OMPBuilder.getOrCreateRuntimeFunction(
5265 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5269OpenMPIRBuilder::InsertPointOrErrorTy
5270OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5271 InsertPointTy AllocaIP,
5273 bool NeedsBarrier,
Value *Chunk) {
5274 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5276 "Require dedicated allocate IP");
5278 "Require valid schedule type");
5280 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5281 OMPScheduleType::ModifierOrdered;
5284 Builder.SetCurrentDebugLocation(
DL);
5287 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5288 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5292 Type *IVTy =
IV->getType();
5297 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5299 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5300 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5301 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5302 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5303 CLI->setLastIter(PLastIter);
5311 Constant *One = ConstantInt::get(IVTy, 1);
5312 Builder.CreateStore(One, PLowerBound);
5313 Value *UpperBound = CLI->getTripCount();
5314 Builder.CreateStore(UpperBound, PUpperBound);
5315 Builder.CreateStore(One, PStride);
5321 InsertPointTy AfterIP = CLI->getAfterIP();
5329 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5332 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5335 Builder.CreateCall(DynamicInit,
5336 {SrcLoc, ThreadNum, SchedulingType, One,
5337 UpperBound, One, Chunk});
5346 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5347 PLowerBound, PUpperBound, PStride});
5348 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5351 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5352 Builder.CreateCondBr(MoreWork, Header, Exit);
5358 PI->setIncomingBlock(0, OuterCond);
5359 PI->setIncomingValue(0, LowerBound);
5364 Br->setSuccessor(0, OuterCond);
5369 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5370 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5377 assert(BI->getSuccessor(1) == Exit);
5378 BI->setSuccessor(1, OuterCond);
5382 Builder.SetInsertPoint(&Latch->
back());
5384 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5389 Builder.SetInsertPoint(&
Exit->back());
5390 InsertPointOrErrorTy BarrierIP =
5391 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5392 omp::Directive::OMPD_for,
false,
5395 return BarrierIP.takeError();
5414 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5419 if (BBsToErase.
count(UseInst->getParent()))
5426 while (BBsToErase.
remove_if(HasRemainingUses)) {
5436 InsertPointTy ComputeIP) {
5437 assert(
Loops.size() >= 1 &&
"At least one loop required");
5438 size_t NumLoops =
Loops.size();
5442 return Loops.front();
5444 CanonicalLoopInfo *Outermost =
Loops.front();
5445 CanonicalLoopInfo *Innermost =
Loops.back();
5446 BasicBlock *OrigPreheader = Outermost->getPreheader();
5447 BasicBlock *OrigAfter = Outermost->getAfter();
5454 Loop->collectControlBlocks(OldControlBBs);
5457 Builder.SetCurrentDebugLocation(
DL);
5458 if (ComputeIP.isSet())
5459 Builder.restoreIP(ComputeIP);
5461 Builder.restoreIP(Outermost->getPreheaderIP());
5465 Value *CollapsedTripCount =
nullptr;
5466 for (CanonicalLoopInfo *L :
Loops) {
5468 "All loops to collapse must be valid canonical loops");
5469 Value *OrigTripCount =
L->getTripCount();
5470 if (!CollapsedTripCount) {
5471 CollapsedTripCount = OrigTripCount;
5476 CollapsedTripCount = Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5480 CanonicalLoopInfo *
Result =
5481 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5482 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5488 Builder.restoreIP(
Result->getBodyIP());
5492 NewIndVars.
resize(NumLoops);
5493 for (
int i = NumLoops - 1; i >= 1; --i) {
5494 Value *OrigTripCount =
Loops[i]->getTripCount();
5496 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5497 NewIndVars[i] = NewIndVar;
5499 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5502 NewIndVars[0] = Leftover;
5513 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5520 ContinueBlock =
nullptr;
5521 ContinuePred = NextSrc;
5528 for (
size_t i = 0; i < NumLoops - 1; ++i)
5529 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5532 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5535 for (
size_t i = NumLoops - 1; i > 0; --i)
5536 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5539 ContinueWith(
Result->getLatch(),
nullptr);
5546 for (
size_t i = 0; i < NumLoops; ++i)
5547 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5552 for (CanonicalLoopInfo *L :
Loops)
5561std::vector<CanonicalLoopInfo *>
5565 "Must pass as many tile sizes as there are loops");
5566 int NumLoops =
Loops.size();
5567 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5569 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5570 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5571 Function *
F = OutermostLoop->getBody()->getParent();
5572 BasicBlock *InnerEnter = InnermostLoop->getBody();
5573 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5579 Loop->collectControlBlocks(OldControlBBs);
5586 for (CanonicalLoopInfo *L :
Loops) {
5587 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5599 for (
int i = 0; i < NumLoops - 1; ++i) {
5600 CanonicalLoopInfo *Surrounding =
Loops[i];
5603 BasicBlock *EnterBB = Surrounding->getBody();
5609 Builder.SetCurrentDebugLocation(
DL);
5610 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5612 for (
int i = 0; i < NumLoops; ++i) {
5614 Value *OrigTripCount = OrigTripCounts[i];
5617 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5618 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5627 Value *FloorTripOverflow =
5628 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5630 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5631 Value *FloorTripCount =
5632 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5633 "omp_floor" +
Twine(i) +
".tripcount",
true);
5636 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5642 std::vector<CanonicalLoopInfo *>
Result;
5643 Result.reserve(NumLoops * 2);
5647 BasicBlock *Enter = OutermostLoop->getPreheader();
5654 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5656 auto EmbeddNewLoop =
5657 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5659 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5660 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5665 Enter = EmbeddedLoop->getBody();
5666 Continue = EmbeddedLoop->getLatch();
5667 OutroInsertBefore = EmbeddedLoop->getLatch();
5668 return EmbeddedLoop;
5672 const Twine &NameBase) {
5674 CanonicalLoopInfo *EmbeddedLoop =
5675 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5676 Result.push_back(EmbeddedLoop);
5680 EmbeddNewLoops(FloorCount,
"floor");
5684 Builder.SetInsertPoint(Enter->getTerminator());
5686 for (
int i = 0; i < NumLoops; ++i) {
5687 CanonicalLoopInfo *FloorLoop =
Result[i];
5690 Value *FloorIsEpilogue =
5691 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5692 Value *TileTripCount =
5693 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5699 EmbeddNewLoops(TileCounts,
"tile");
5704 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5713 BodyEnter =
nullptr;
5714 BodyEntered = ExitBB;
5726 Builder.restoreIP(
Result.back()->getBodyIP());
5727 for (
int i = 0; i < NumLoops; ++i) {
5728 CanonicalLoopInfo *FloorLoop =
Result[i];
5729 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5730 Value *OrigIndVar = OrigIndVars[i];
5734 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5736 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5743 for (CanonicalLoopInfo *L :
Loops)
5747 for (CanonicalLoopInfo *GenL : Result)
5758 if (Properties.
empty())
5781 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5785 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5793 if (
I.mayReadOrWriteMemory()) {
5797 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5802void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5809void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5817void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5820 const Twine &NamePrefix) {
5821 Function *
F = CanonicalLoop->getFunction();
5843 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5849 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5851 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5854 Builder.SetInsertPoint(SplitBeforeIt);
5856 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5859 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5862 Builder.SetInsertPoint(ElseBlock);
5868 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5870 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5876 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5878 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5885 if (
Block == ThenBlock)
5886 NewBB->
setName(NamePrefix +
".if.else");
5889 VMap[
Block] = NewBB;
5893 Builder.CreateBr(NewBlocks.
front());
5897 L->getLoopLatch()->splitBasicBlock(
5898 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5902 L->addBasicBlockToLoop(ThenBlock, LI);
5906OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5908 if (TargetTriple.
isX86()) {
5909 if (Features.
lookup(
"avx512f"))
5911 else if (Features.
lookup(
"avx"))
5915 if (TargetTriple.
isPPC())
5917 if (TargetTriple.
isWasm())
5922void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5924 Value *IfCond, OrderKind Order,
5928 Function *
F = CanonicalLoop->getFunction();
5943 if (AlignedVars.
size()) {
5944 InsertPointTy IP = Builder.saveIP();
5945 for (
auto &AlignedItem : AlignedVars) {
5946 Value *AlignedPtr = AlignedItem.first;
5947 Value *Alignment = AlignedItem.second;
5950 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5953 Builder.restoreIP(IP);
5958 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5968 if (
Block == CanonicalLoop->getCond() ||
5969 Block == CanonicalLoop->getHeader())
5971 Reachable.insert(
Block);
5981 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5989 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
6005 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6007 if (Simdlen || Safelen) {
6011 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6037static std::unique_ptr<TargetMachine>
6041 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6042 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6053 std::nullopt, OptLevel));
6077 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6078 FAM.registerPass([&]() {
return TIRA; });
6092 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6097 nullptr, ORE,
static_cast<int>(OptLevel),
6118 <<
" Threshold=" << UP.
Threshold <<
"\n"
6121 <<
" PartialOptSizeThreshold="
6141 Ptr = Load->getPointerOperand();
6143 Ptr = Store->getPointerOperand();
6147 Ptr =
Ptr->stripPointerCasts();
6150 if (Alloca->getParent() == &
F->getEntryBlock())
6170 int MaxTripCount = 0;
6171 bool MaxOrZero =
false;
6172 unsigned TripMultiple = 0;
6174 bool UseUpperBound =
false;
6176 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6178 unsigned Factor = UP.
Count;
6179 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6187void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6189 CanonicalLoopInfo **UnrolledCLI) {
6190 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6206 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6219 *UnrolledCLI =
Loop;
6224 "unrolling only makes sense with a factor of 2 or larger");
6226 Type *IndVarTy =
Loop->getIndVarType();
6233 std::vector<CanonicalLoopInfo *>
LoopNest =
6234 tileLoops(
DL, {
Loop}, {FactorVal});
6237 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6248 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6251 (*UnrolledCLI)->assertOK();
6255OpenMPIRBuilder::InsertPointTy
6256OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6259 if (!updateToLocation(
Loc))
6263 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6264 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6265 Value *ThreadId = getOrCreateThreadID(Ident);
6267 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6269 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6271 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6272 Builder.CreateCall(Fn, Args);
6274 return Builder.saveIP();
6277OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6278 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6282 if (!updateToLocation(
Loc))
6288 if (!CPVars.
empty()) {
6290 Builder.CreateStore(Builder.getInt32(0), DidIt);
6293 Directive OMPD = Directive::OMPD_single;
6295 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6296 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6297 Value *ThreadId = getOrCreateThreadID(Ident);
6300 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6301 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6303 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6304 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6306 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6307 if (
Error Err = FiniCB(IP))
6314 Builder.CreateStore(Builder.getInt32(1), DidIt);
6327 InsertPointOrErrorTy AfterIP =
6328 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6332 return AfterIP.takeError();
6335 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6337 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6338 ConstantInt::get(
Int64, 0), CPVars[
I],
6341 }
else if (!IsNowait) {
6342 InsertPointOrErrorTy AfterIP =
6343 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6344 omp::Directive::OMPD_unknown,
false,
6347 return AfterIP.takeError();
6349 return Builder.saveIP();
6352OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6353 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6354 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6356 if (!updateToLocation(
Loc))
6359 Directive OMPD = Directive::OMPD_critical;
6361 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6362 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6363 Value *ThreadId = getOrCreateThreadID(Ident);
6364 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6365 Value *
Args[] = {Ident, ThreadId, LockVar};
6371 EnterArgs.push_back(HintInst);
6372 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6374 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6376 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6379 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6380 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6382 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6386OpenMPIRBuilder::InsertPointTy
6387OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6388 InsertPointTy AllocaIP,
unsigned NumLoops,
6390 const Twine &Name,
bool IsDependSource) {
6394 "OpenMP runtime requires depend vec with i64 type");
6396 if (!updateToLocation(
Loc))
6401 Builder.restoreIP(AllocaIP);
6402 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6404 updateToLocation(
Loc);
6407 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6408 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6409 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6410 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6414 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6415 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6418 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6419 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6420 Value *ThreadId = getOrCreateThreadID(Ident);
6421 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6425 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6427 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6428 Builder.CreateCall(RTLFn, Args);
6430 return Builder.saveIP();
6433OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6434 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6435 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6436 if (!updateToLocation(
Loc))
6439 Directive OMPD = Directive::OMPD_ordered;
6445 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6446 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6447 Value *ThreadId = getOrCreateThreadID(Ident);
6450 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6451 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6454 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6455 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6458 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6462OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6464 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6465 bool HasFinalize,
bool IsCancellable) {
6468 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6472 BasicBlock *EntryBB = Builder.GetInsertBlock();
6481 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6484 if (
Error Err = BodyGenCB( InsertPointTy(),
6492 "Unexpected control flow graph state!!");
6493 InsertPointOrErrorTy AfterIP =
6494 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6496 return AfterIP.takeError();
6498 "Unexpected Control Flow State!");
6504 "Unexpected Insertion point location!");
6507 auto InsertBB = merged ? ExitPredBB : ExitBB;
6510 Builder.SetInsertPoint(InsertBB);
6512 return Builder.saveIP();
6515OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6518 if (!Conditional || !EntryCall)
6519 return Builder.saveIP();
6521 BasicBlock *EntryBB = Builder.GetInsertBlock();
6522 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6534 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6536 Builder.SetInsertPoint(UI);
6537 Builder.Insert(EntryBBTI);
6538 UI->eraseFromParent();
6545OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6546 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6549 Builder.restoreIP(FinIP);
6553 assert(!FinalizationStack.empty() &&
6554 "Unexpected finalization stack state!");
6556 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6557 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6559 if (
Error Err = Fi.FiniCB(FinIP))
6566 Builder.SetInsertPoint(FiniBBTI);
6570 return Builder.saveIP();
6574 Builder.Insert(ExitCall);
6580OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6581 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6610 "copyin.not.master.end");
6617 Builder.SetInsertPoint(OMP_Entry);
6618 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6619 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6620 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6621 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6623 Builder.SetInsertPoint(CopyBegin);
6625 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6627 return Builder.saveIP();
6630CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6634 updateToLocation(
Loc);
6637 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6638 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6639 Value *ThreadId = getOrCreateThreadID(Ident);
6642 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6644 return Builder.CreateCall(Fn, Args, Name);
6647CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6651 updateToLocation(
Loc);
6654 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6655 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6656 Value *ThreadId = getOrCreateThreadID(Ident);
6658 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6659 return Builder.CreateCall(Fn, Args, Name);
6662CallInst *OpenMPIRBuilder::createOMPInteropInit(
6663 const LocationDescription &
Loc,
Value *InteropVar,
6665 Value *DependenceAddress,
bool HaveNowaitClause) {
6667 updateToLocation(
Loc);
6670 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6671 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6672 Value *ThreadId = getOrCreateThreadID(Ident);
6673 if (Device ==
nullptr)
6675 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6676 if (NumDependences ==
nullptr) {
6677 NumDependences = ConstantInt::get(
Int32, 0);
6681 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6683 Ident, ThreadId, InteropVar, InteropTypeVal,
6684 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6686 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6688 return Builder.CreateCall(Fn, Args);
6691CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6692 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6693 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6695 updateToLocation(
Loc);
6698 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6699 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6700 Value *ThreadId = getOrCreateThreadID(Ident);
6701 if (Device ==
nullptr)
6703 if (NumDependences ==
nullptr) {
6704 NumDependences = ConstantInt::get(
Int32, 0);
6708 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6710 Ident, ThreadId, InteropVar,
Device,
6711 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6713 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6715 return Builder.CreateCall(Fn, Args);
6718CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6720 Value *NumDependences,
6721 Value *DependenceAddress,
6722 bool HaveNowaitClause) {
6724 updateToLocation(
Loc);
6726 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6727 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6728 Value *ThreadId = getOrCreateThreadID(Ident);
6729 if (Device ==
nullptr)
6731 if (NumDependences ==
nullptr) {
6732 NumDependences = ConstantInt::get(
Int32, 0);
6736 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6738 Ident, ThreadId, InteropVar,
Device,
6739 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6741 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6743 return Builder.CreateCall(Fn, Args);
6746CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6750 updateToLocation(
Loc);
6753 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6754 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6755 Value *ThreadId = getOrCreateThreadID(Ident);
6757 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6761 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6763 return Builder.CreateCall(Fn, Args);
6766OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6767 const LocationDescription &
Loc,
6768 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6770 "expected num_threads and num_teams to be specified");
6772 if (!updateToLocation(
Loc))
6776 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6777 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6789 const std::string DebugPrefix =
"_debug__";
6790 if (KernelName.
ends_with(DebugPrefix)) {
6791 KernelName = KernelName.
drop_back(DebugPrefix.length());
6792 Kernel = M.getFunction(KernelName);
6798 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6803 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6804 if (MaxThreadsVal < 0)
6805 MaxThreadsVal = std::max(
6808 if (MaxThreadsVal > 0)
6809 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6820 Function *Fn = getOrCreateRuntimeFunctionPtr(
6821 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6824 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6825 Constant *DynamicEnvironmentInitializer =
6829 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6831 DL.getDefaultGlobalsAddressSpace());
6835 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6836 ? DynamicEnvironmentGV
6838 DynamicEnvironmentPtr);
6841 ConfigurationEnvironment, {
6842 UseGenericStateMachineVal,
6843 MayUseNestedParallelismVal,
6850 ReductionBufferLength,
6853 KernelEnvironment, {
6854 ConfigurationEnvironmentInitializer,
6858 std::string KernelEnvironmentName =
6859 (KernelName +
"_kernel_environment").str();
6862 KernelEnvironmentInitializer, KernelEnvironmentName,
6864 DL.getDefaultGlobalsAddressSpace());
6868 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6869 ? KernelEnvironmentGV
6871 KernelEnvironmentPtr);
6872 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6874 KernelLaunchEnvironment =
6875 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6876 ? KernelLaunchEnvironment
6877 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6878 KernelLaunchEnvParamTy);
6880 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6882 Value *ExecUserCode = Builder.CreateICmpEQ(
6892 auto *UI = Builder.CreateUnreachable();
6898 Builder.SetInsertPoint(WorkerExitBB);
6899 Builder.CreateRetVoid();
6902 Builder.SetInsertPoint(CheckBBTI);
6903 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6906 UI->eraseFromParent();
6913void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6914 int32_t TeamsReductionDataSize,
6915 int32_t TeamsReductionBufferLength) {
6916 if (!updateToLocation(
Loc))
6919 Function *Fn = getOrCreateRuntimeFunctionPtr(
6920 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6922 Builder.CreateCall(Fn, {});
6924 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6930 const std::string DebugPrefix =
"_debug__";
6932 KernelName = KernelName.
drop_back(DebugPrefix.length());
6933 auto *KernelEnvironmentGV =
6934 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6935 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6936 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6938 KernelEnvironmentInitializer,
6939 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6941 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6948 if (
Kernel.hasFnAttribute(Name)) {
6949 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6955std::pair<int32_t, int32_t>
6957 int32_t ThreadLimit =
6958 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6961 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6962 if (!Attr.isValid() || !Attr.isStringAttribute())
6963 return {0, ThreadLimit};
6964 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6967 return {0, ThreadLimit};
6968 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6974 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6975 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6976 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6978 return {0, ThreadLimit};
6981void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6984 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6987 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
6995std::pair<int32_t, int32_t>
6998 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7002 int32_t LB, int32_t UB) {
7009 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7012void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7014 if (Config.isTargetDevice()) {
7021 else if (
T.isNVPTX())
7023 else if (
T.isSPIRV())
7030 if (Config.isTargetDevice()) {
7031 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7040Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7045 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7046 "Named kernel already exists?");
7052Error OpenMPIRBuilder::emitTargetRegionFunction(
7053 TargetRegionEntryInfo &EntryInfo,
7054 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7058 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7060 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7064 OutlinedFn = *CBResult;
7066 OutlinedFn =
nullptr;
7072 if (!IsOffloadEntry)
7075 std::string EntryFnIDName =
7076 Config.isTargetDevice()
7077 ? std::string(EntryFnName)
7078 : createPlatformSpecificName({EntryFnName,
"region_id"});
7080 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7081 EntryFnName, EntryFnIDName);
7085Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7086 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7089 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7090 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7091 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7092 OffloadInfoManager.registerTargetRegionEntryInfo(
7093 EntryInfo, EntryAddr, OutlinedFnID,
7094 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7095 return OutlinedFnID;
7098OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7099 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7100 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7101 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7103 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7104 BodyGenTy BodyGenType)>
7107 if (!updateToLocation(
Loc))
7108 return InsertPointTy();
7110 Builder.restoreIP(CodeGenIP);
7112 if (Config.IsTargetDevice.value_or(
false)) {
7114 InsertPointOrErrorTy AfterIP =
7115 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7117 return AfterIP.takeError();
7118 Builder.restoreIP(*AfterIP);
7120 return Builder.saveIP();
7123 bool IsStandAlone = !BodyGenCB;
7124 MapInfosTy *MapInfo;
7128 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7129 InsertPointTy CodeGenIP) ->
Error {
7130 MapInfo = &GenMapInfoCB(Builder.saveIP());
7131 if (
Error Err = emitOffloadingArrays(
7132 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7133 true, DeviceAddrCB))
7136 TargetDataRTArgs RTArgs;
7137 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7140 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7145 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7146 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7150 SrcLocInfo, DeviceID,
7151 PointerNum, RTArgs.BasePointersArray,
7152 RTArgs.PointersArray, RTArgs.SizesArray,
7153 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7154 RTArgs.MappersArray};
7157 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7161 if (
Info.HasNoWait) {
7168 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7171 if (
Info.HasNoWait) {
7175 emitBlock(OffloadContBlock, CurFn,
true);
7176 Builder.restoreIP(Builder.saveIP());
7181 bool RequiresOuterTargetTask =
Info.HasNoWait;
7182 if (!RequiresOuterTargetTask)
7183 cantFail(TaskBodyCB(
nullptr,
nullptr,
7186 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7187 {}, RTArgs,
Info.HasNoWait));
7189 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7190 omp::OMPRTL___tgt_target_data_begin_mapper);
7192 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7194 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7197 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7198 Builder.CreateStore(LI, DeviceMap.second.second);
7205 InsertPointOrErrorTy AfterIP =
7206 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7208 return AfterIP.takeError();
7209 Builder.restoreIP(*AfterIP);
7217 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7218 InsertPointTy CodeGenIP) ->
Error {
7219 InsertPointOrErrorTy AfterIP =
7220 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7222 return AfterIP.takeError();
7223 Builder.restoreIP(*AfterIP);
7228 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7229 TargetDataRTArgs RTArgs;
7230 Info.EmitDebug = !MapInfo->Names.empty();
7231 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7234 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7239 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7240 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7243 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7244 PointerNum, RTArgs.BasePointersArray,
7245 RTArgs.PointersArray, RTArgs.SizesArray,
7246 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7247 RTArgs.MappersArray};
7249 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7251 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7257 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7265 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7266 return BeginThenGen(AllocaIP, Builder.saveIP());
7274 InsertPointOrErrorTy AfterIP =
7275 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7277 return AfterIP.takeError();
7281 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7282 return EndThenGen(AllocaIP, Builder.saveIP());
7285 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7286 return BeginThenGen(AllocaIP, Builder.saveIP());
7292 return Builder.saveIP();
7296OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7297 bool IsGPUDistribute) {
7298 assert((IVSize == 32 || IVSize == 64) &&
7299 "IV size is not compatible with the omp runtime");
7301 if (IsGPUDistribute)
7303 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7304 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7305 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7306 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7308 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7309 : omp::OMPRTL___kmpc_for_static_init_4u)
7310 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7311 : omp::OMPRTL___kmpc_for_static_init_8u);
7313 return getOrCreateRuntimeFunction(M, Name);
7316FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7318 assert((IVSize == 32 || IVSize == 64) &&
7319 "IV size is not compatible with the omp runtime");
7321 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7322 : omp::OMPRTL___kmpc_dispatch_init_4u)
7323 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7324 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7326 return getOrCreateRuntimeFunction(M, Name);
7329FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7331 assert((IVSize == 32 || IVSize == 64) &&
7332 "IV size is not compatible with the omp runtime");
7334 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7335 : omp::OMPRTL___kmpc_dispatch_next_4u)
7336 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7337 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7339 return getOrCreateRuntimeFunction(M, Name);
7342FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7344 assert((IVSize == 32 || IVSize == 64) &&
7345 "IV size is not compatible with the omp runtime");
7347 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7348 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7349 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7350 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7352 return getOrCreateRuntimeFunction(M, Name);
7356 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7361 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7369 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7373 if (NewVar && (arg == NewVar->
getArg()))
7383 auto UpdateDebugRecord = [&](
auto *DR) {
7386 for (
auto Loc : DR->location_ops()) {
7387 auto Iter = ValueReplacementMap.find(
Loc);
7388 if (Iter != ValueReplacementMap.end()) {
7389 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7390 ArgNo = std::get<1>(Iter->second) + 1;
7394 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7401 "Unexpected debug intrinsic");
7403 UpdateDebugRecord(&DVR);
7406 if (OMPBuilder.Config.isTargetDevice()) {
7408 Module *M = Func->getParent();
7411 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7413 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7414 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7416 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7429 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7431 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7432 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7434 if (OMPBuilder.Config.isTargetDevice()) {
7442 for (
auto &Arg : Inputs)
7447 for (
auto &Arg : Inputs)
7451 auto BB = Builder.GetInsertBlock();
7463 if (TargetCpuAttr.isStringAttribute())
7464 Func->addFnAttr(TargetCpuAttr);
7466 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7467 if (TargetFeaturesAttr.isStringAttribute())
7468 Func->addFnAttr(TargetFeaturesAttr);
7470 if (OMPBuilder.Config.isTargetDevice()) {
7472 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7473 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7484 Builder.SetInsertPoint(EntryBB);
7487 if (OMPBuilder.Config.isTargetDevice())
7488 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7490 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7495 if (OMPBuilder.Config.isTargetDevice())
7496 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7500 splitBB(Builder,
true,
"outlined.body");
7501 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7503 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7505 return AfterIP.takeError();
7506 Builder.restoreIP(*AfterIP);
7507 if (OMPBuilder.Config.isTargetDevice())
7508 OMPBuilder.createTargetDeinit(Builder);
7511 Builder.CreateRetVoid();
7515 auto AllocaIP = Builder.saveIP();
7520 const auto &ArgRange =
7521 OMPBuilder.Config.isTargetDevice()
7522 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7555 if (Instr->getFunction() == Func)
7556 Instr->replaceUsesOfWith(
Input, InputCopy);
7562 for (
auto InArg :
zip(Inputs, ArgRange)) {
7564 Argument &Arg = std::get<1>(InArg);
7565 Value *InputCopy =
nullptr;
7567 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7568 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7570 return AfterIP.takeError();
7571 Builder.restoreIP(*AfterIP);
7572 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7592 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7599 ReplaceValue(
Input, InputCopy, Func);
7603 for (
auto Deferred : DeferredReplacement)
7604 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7607 ValueReplacementMap);
7615 Value *TaskWithPrivates,
7616 Type *TaskWithPrivatesTy) {
7618 Type *TaskTy = OMPIRBuilder.Task;
7621 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7622 Value *Shareds = TaskT;
7632 if (TaskWithPrivatesTy != TaskTy)
7633 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7650 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7655 assert((!NumOffloadingArrays || PrivatesTy) &&
7656 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7659 Module &M = OMPBuilder.M;
7683 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7689 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7690 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7696 ".omp_target_task_proxy_func",
7697 Builder.GetInsertBlock()->getModule());
7698 Value *ThreadId = ProxyFn->getArg(0);
7699 Value *TaskWithPrivates = ProxyFn->getArg(1);
7700 ThreadId->
setName(
"thread.id");
7701 TaskWithPrivates->
setName(
"task");
7703 bool HasShareds = SharedArgsOperandNo > 0;
7704 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7707 Builder.SetInsertPoint(EntryBB);
7713 if (HasOffloadingArrays) {
7714 assert(TaskTy != TaskWithPrivatesTy &&
7715 "If there are offloading arrays to pass to the target"
7716 "TaskTy cannot be the same as TaskWithPrivatesTy");
7719 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7720 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7722 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7726 auto *ArgStructAlloca =
7728 assert(ArgStructAlloca &&
7729 "Unable to find the alloca instruction corresponding to arguments "
7730 "for extracted function");
7734 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7736 Value *SharedsSize =
7737 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7740 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7742 Builder.CreateMemCpy(
7743 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7745 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7747 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7748 Builder.CreateRetVoid();
7754 return GEP->getSourceElementType();
7756 return Alloca->getAllocatedType();
7779 if (OffloadingArraysToPrivatize.
empty())
7780 return OMPIRBuilder.Task;
7783 for (
Value *V : OffloadingArraysToPrivatize) {
7784 assert(V->getType()->isPointerTy() &&
7785 "Expected pointer to array to privatize. Got a non-pointer value "
7788 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7794 "struct.task_with_privates");
7797 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7798 TargetRegionEntryInfo &EntryInfo,
7799 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7802 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7803 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7805 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7808 EntryFnName, Inputs, CBFunc,
7812 return OMPBuilder.emitTargetRegionFunction(
7813 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7817OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7818 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7819 OpenMPIRBuilder::InsertPointTy AllocaIP,
7821 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7945 splitBB(Builder,
true,
"target.task.body");
7947 splitBB(Builder,
true,
"target.task.alloca");
7949 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7950 TargetTaskAllocaBB->
begin());
7951 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7954 OI.EntryBB = TargetTaskAllocaBB;
7955 OI.OuterAllocaBB = AllocaIP.getBlock();
7960 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7963 Builder.restoreIP(TargetTaskBodyIP);
7964 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7978 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7982 bool NeedsTargetTask = HasNoWait && DeviceID;
7983 if (NeedsTargetTask) {
7985 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7986 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7987 RTArgs.SizesArray}) {
7989 OffloadingArraysToPrivatize.
push_back(V);
7990 OI.ExcludeArgsFromAggregate.push_back(V);
7994 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7995 DeviceID, OffloadingArraysToPrivatize](
7998 "there must be a single user for the outlined function");
8012 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8013 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8015 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8016 "Wrong number of arguments for StaleCI when shareds are present");
8017 int SharedArgOperandNo =
8018 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8024 if (!OffloadingArraysToPrivatize.
empty())
8029 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8030 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8032 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8035 Builder.SetInsertPoint(StaleCI);
8040 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8041 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8050 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8051 : getOrCreateRuntimeFunctionPtr(
8052 OMPRTL___kmpc_omp_target_task_alloc);
8056 Value *ThreadID = getOrCreateThreadID(Ident);
8063 Value *TaskSize = Builder.getInt64(
8064 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8069 Value *SharedsSize = Builder.getInt64(0);
8071 auto *ArgStructAlloca =
8073 assert(ArgStructAlloca &&
8074 "Unable to find the alloca instruction corresponding to arguments "
8075 "for extracted function");
8076 auto *ArgStructType =
8078 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8079 "arguments for extracted function");
8081 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8090 Value *Flags = Builder.getInt32(0);
8100 TaskSize, SharedsSize,
8103 if (NeedsTargetTask) {
8104 assert(DeviceID &&
"Expected non-empty device ID.");
8108 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8114 *
this, Builder, TaskData, TaskWithPrivatesTy);
8115 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8118 if (!OffloadingArraysToPrivatize.
empty()) {
8120 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8121 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8122 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8129 "ElementType should match ArrayType");
8132 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8133 Builder.CreateMemCpy(
8134 Dst, Alignment, PtrToPrivatize, Alignment,
8135 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8149 if (!NeedsTargetTask) {
8152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8156 Builder.getInt32(Dependencies.size()),
8158 ConstantInt::get(Builder.getInt32Ty(), 0),
8164 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8166 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8167 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8168 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8170 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8171 }
else if (DepArray) {
8176 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8179 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8180 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8184 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8185 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8190 I->eraseFromParent();
8192 addOutlineInfo(std::move(OI));
8195 << *(Builder.GetInsertBlock()) <<
"\n");
8197 << *(Builder.GetInsertBlock()->getParent()->getParent())
8199 return Builder.saveIP();
8202Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8203 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8204 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8205 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8208 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8209 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8211 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8217 OpenMPIRBuilder::InsertPointTy AllocaIP,
8218 OpenMPIRBuilder::TargetDataInfo &
Info,
8219 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8220 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8223 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8224 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8230 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8231 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8232 Builder.restoreIP(IP);
8233 Builder.CreateCall(OutlinedFn, Args);
8234 return Builder.saveIP();
8237 bool HasDependencies = Dependencies.
size() > 0;
8238 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8240 OpenMPIRBuilder::TargetKernelArgs KArgs;
8247 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8255 if (OutlinedFnID && DeviceID)
8256 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8257 EmitTargetCallFallbackCB, KArgs,
8258 DeviceID, RTLoc, TargetTaskAllocaIP);
8266 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8269 OMPBuilder.Builder.restoreIP(AfterIP);
8273 auto &&EmitTargetCallElse =
8274 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8275 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8278 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8279 if (RequiresOuterTargetTask) {
8283 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8284 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8286 Dependencies, EmptyRTArgs, HasNoWait);
8288 return EmitTargetCallFallbackCB(Builder.saveIP());
8291 Builder.restoreIP(AfterIP);
8295 auto &&EmitTargetCallThen =
8296 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8297 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8298 Info.HasNoWait = HasNoWait;
8299 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8300 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8301 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8302 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8309 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8310 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8315 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8317 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8321 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8324 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8332 Value *MaxThreadsClause =
8333 RuntimeAttrs.TeamsThreadLimit.size() == 1
8334 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8337 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8338 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8339 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8340 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8342 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8343 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8345 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8348 unsigned NumTargetItems =
Info.NumberOfPtrs;
8352 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8353 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8356 Value *TripCount = RuntimeAttrs.LoopTripCount
8357 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8358 Builder.getInt64Ty(),
8360 : Builder.getInt64(0);
8363 Value *DynCGGroupMem = Builder.getInt32(0);
8365 KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
8366 NumTeamsC, NumThreadsC,
8367 DynCGGroupMem, HasNoWait);
8371 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8374 if (RequiresOuterTargetTask)
8375 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8376 Dependencies, KArgs.RTArgs,
8379 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8380 EmitTargetCallFallbackCB, KArgs,
8381 DeviceID, RTLoc, AllocaIP);
8384 Builder.restoreIP(AfterIP);
8391 if (!OutlinedFnID) {
8392 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8398 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8402 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8403 EmitTargetCallElse, AllocaIP));
8406OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8407 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8408 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8409 TargetRegionEntryInfo &EntryInfo,
8410 const TargetKernelDefaultAttrs &DefaultAttrs,
8411 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8413 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8414 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8415 CustomMapperCallbackTy CustomMapperCB,
8418 if (!updateToLocation(
Loc))
8419 return InsertPointTy();
8421 Builder.restoreIP(CodeGenIP);
8429 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8430 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8436 if (!Config.isTargetDevice())
8438 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8439 CustomMapperCB, Dependencies, HasNowait);
8440 return Builder.saveIP();
8453 return OS.
str().str();
8458 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8459 Config.separator());
8463OpenMPIRBuilder::getOrCreateInternalVariable(
Type *Ty,
const StringRef &Name,
8465 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8467 assert(Elem.second->getValueType() == Ty &&
8468 "OMP internal variable has different type than requested");
8484 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8491Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8492 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8493 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8494 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8497Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8502 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8504 return SizePtrToInt;
8509 std::string VarName) {
8513 M, MaptypesArrayInit->
getType(),
8517 return MaptypesArrayGlobal;
8520void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8521 InsertPointTy AllocaIP,
8522 unsigned NumOperands,
8523 struct MapperAllocas &MapperAllocas) {
8524 if (!updateToLocation(
Loc))
8529 Builder.restoreIP(AllocaIP);
8531 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8535 ArrI64Ty,
nullptr,
".offload_sizes");
8536 updateToLocation(
Loc);
8537 MapperAllocas.ArgsBase = ArgsBase;
8538 MapperAllocas.Args =
Args;
8539 MapperAllocas.ArgSizes = ArgSizes;
8542void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8545 struct MapperAllocas &MapperAllocas,
8546 int64_t DeviceID,
unsigned NumOperands) {
8547 if (!updateToLocation(
Loc))
8552 Value *ArgsBaseGEP =
8553 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8554 {Builder.getInt32(0), Builder.getInt32(0)});
8556 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8557 {Builder.getInt32(0), Builder.getInt32(0)});
8558 Value *ArgSizesGEP =
8559 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8560 {Builder.getInt32(0), Builder.getInt32(0)});
8563 Builder.CreateCall(MapperFunc,
8564 {SrcLocInfo, Builder.getInt64(DeviceID),
8565 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8566 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8569void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8570 TargetDataRTArgs &RTArgs,
8571 TargetDataInfo &
Info,
8573 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8574 "expected region end call to runtime only when end call is separate");
8576 auto VoidPtrTy = UnqualPtrTy;
8577 auto VoidPtrPtrTy = UnqualPtrTy;
8579 auto Int64PtrTy = UnqualPtrTy;
8581 if (!
Info.NumberOfPtrs) {
8591 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8593 Info.RTArgs.BasePointersArray,
8595 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8599 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8602 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8604 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8605 :
Info.RTArgs.MapTypesArray,
8611 if (!
Info.EmitDebug)
8614 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8620 if (!
Info.HasMapper)
8623 RTArgs.MappersArray =
8624 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8627void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8628 InsertPointTy CodeGenIP,
8629 MapInfosTy &CombinedInfo,
8630 TargetDataInfo &
Info) {
8631 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8632 CombinedInfo.NonContigInfo;
8645 "struct.descriptor_dim");
8647 enum { OffsetFD = 0, CountFD, StrideFD };
8651 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8654 if (NonContigInfo.Dims[
I] == 1)
8656 Builder.restoreIP(AllocaIP);
8659 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8660 Builder.restoreIP(CodeGenIP);
8661 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8662 unsigned RevIdx = EE -
II - 1;
8663 Value *DimsLVal = Builder.CreateInBoundsGEP(
8665 {Builder.getInt64(0), Builder.getInt64(II)});
8667 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8668 Builder.CreateAlignedStore(
8669 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8670 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8672 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8673 Builder.CreateAlignedStore(
8674 NonContigInfo.Counts[L][RevIdx], CountLVal,
8675 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8677 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8678 Builder.CreateAlignedStore(
8679 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8680 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8683 Builder.restoreIP(CodeGenIP);
8684 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8685 DimsAddr, Builder.getPtrTy());
8686 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8688 Info.RTArgs.PointersArray, 0,
I);
8689 Builder.CreateAlignedStore(
8690 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8695void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8703 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8705 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8706 Value *DeleteBit = Builder.CreateAnd(
8709 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8710 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8715 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8717 Value *PtrAndObjBit = Builder.CreateAnd(
8720 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8721 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8722 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8723 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8724 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8725 DeleteCond = Builder.CreateIsNull(
8727 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8730 DeleteCond = Builder.CreateIsNotNull(
8732 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8734 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8735 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8737 emitBlock(BodyBB, MapperFn);
8740 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8743 Value *MapTypeArg = Builder.CreateAnd(
8746 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8747 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8748 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8749 MapTypeArg = Builder.CreateOr(
8752 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8753 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8757 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8758 ArraySize, MapTypeArg, MapName};
8760 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8768 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8784 MapperFn->
addFnAttr(Attribute::NoInline);
8785 MapperFn->
addFnAttr(Attribute::NoUnwind);
8795 auto SavedIP = Builder.saveIP();
8796 Builder.SetInsertPoint(EntryBB);
8808 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8809 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8810 Value *PtrBegin = BeginIn;
8811 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8816 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8817 MapType, MapName, ElementSize, HeadBB,
8823 emitBlock(HeadBB, MapperFn);
8828 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8829 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8832 emitBlock(BodyBB, MapperFn);
8835 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8839 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8841 return Info.takeError();
8845 Value *OffloadingArgs[] = {MapperHandle};
8846 Value *PreviousSize = Builder.CreateCall(
8847 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8849 Value *ShiftedPreviousSize =
8850 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8853 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8862 Value *OriMapType = Builder.getInt64(
8863 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8865 Value *MemberMapType =
8866 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8880 Value *LeftToFrom = Builder.CreateAnd(
8883 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8884 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8885 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8894 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8895 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8897 emitBlock(AllocBB, MapperFn);
8898 Value *AllocMapType = Builder.CreateAnd(
8901 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8902 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8903 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8904 Builder.CreateBr(EndBB);
8905 emitBlock(AllocElseBB, MapperFn);
8906 Value *IsTo = Builder.CreateICmpEQ(
8909 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8910 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8911 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8913 emitBlock(ToBB, MapperFn);
8914 Value *ToMapType = Builder.CreateAnd(
8917 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8918 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8919 Builder.CreateBr(EndBB);
8920 emitBlock(ToElseBB, MapperFn);
8921 Value *IsFrom = Builder.CreateICmpEQ(
8924 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8925 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8926 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8928 emitBlock(FromBB, MapperFn);
8929 Value *FromMapType = Builder.CreateAnd(
8932 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8933 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8935 emitBlock(EndBB, MapperFn);
8938 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8944 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8945 CurSizeArg, CurMapType, CurNameArg};
8947 auto ChildMapperFn = CustomMapperCB(
I);
8949 return ChildMapperFn.takeError();
8950 if (*ChildMapperFn) {
8952 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8957 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8964 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8965 "omp.arraymap.next");
8967 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8969 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8971 emitBlock(ExitBB, MapperFn);
8974 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8975 MapType, MapName, ElementSize, DoneBB,
8979 emitBlock(DoneBB, MapperFn,
true);
8981 Builder.CreateRetVoid();
8982 Builder.restoreIP(SavedIP);
8986Error OpenMPIRBuilder::emitOffloadingArrays(
8987 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
8988 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
8989 bool IsNonContiguous,
8993 Info.clearArrayInfo();
8994 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8996 if (
Info.NumberOfPtrs == 0)
8999 Builder.restoreIP(AllocaIP);
9005 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9006 PointerArrayType,
nullptr,
".offload_baseptrs");
9008 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9009 PointerArrayType,
nullptr,
".offload_ptrs");
9010 AllocaInst *MappersArray = Builder.CreateAlloca(
9011 PointerArrayType,
nullptr,
".offload_mappers");
9012 Info.RTArgs.MappersArray = MappersArray;
9019 ConstantInt::get(Int64Ty, 0));
9021 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9024 if (IsNonContiguous &&
9025 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9026 CombinedInfo.Types[
I] &
9027 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9029 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9035 RuntimeSizes.set(
I);
9038 if (RuntimeSizes.all()) {
9040 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9041 SizeArrayType,
nullptr,
".offload_sizes");
9046 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9047 auto *SizesArrayGbl =
9052 if (!RuntimeSizes.any()) {
9053 Info.RTArgs.SizesArray = SizesArrayGbl;
9055 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9056 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9059 SizeArrayType,
nullptr,
".offload_sizes");
9062 Builder.CreateMemCpy(
9063 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9064 SizesArrayGbl, OffloadSizeAlign,
9069 Info.RTArgs.SizesArray = Buffer;
9077 for (
auto mapFlag : CombinedInfo.Types)
9079 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9081 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9082 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9083 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9086 if (!CombinedInfo.Names.empty()) {
9087 auto *MapNamesArrayGbl = createOffloadMapnames(
9088 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9089 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9090 Info.EmitDebug =
true;
9092 Info.RTArgs.MapNamesArray =
9094 Info.EmitDebug =
false;
9099 if (
Info.separateBeginEndCalls()) {
9100 bool EndMapTypesDiffer =
false;
9102 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9103 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9104 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9105 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9106 EndMapTypesDiffer =
true;
9109 if (EndMapTypesDiffer) {
9110 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9111 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9116 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9117 Value *BPVal = CombinedInfo.BasePointers[
I];
9118 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9121 Builder.CreateAlignedStore(BPVal, BP,
9122 M.getDataLayout().getPrefTypeAlign(PtrTy));
9124 if (
Info.requiresDevicePointerInfo()) {
9125 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9126 CodeGenIP = Builder.saveIP();
9127 Builder.restoreIP(AllocaIP);
9128 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9129 Builder.restoreIP(CodeGenIP);
9131 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9132 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9133 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9135 DeviceAddrCB(
I, BP);
9139 Value *PVal = CombinedInfo.Pointers[
I];
9140 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9144 Builder.CreateAlignedStore(PVal,
P,
9145 M.getDataLayout().getPrefTypeAlign(PtrTy));
9147 if (RuntimeSizes.test(
I)) {
9148 Value *S = Builder.CreateConstInBoundsGEP2_32(
9152 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9155 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9158 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9161 auto CustomMFunc = CustomMapperCB(
I);
9163 return CustomMFunc.takeError();
9165 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9167 Value *MAddr = Builder.CreateInBoundsGEP(
9169 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9170 Builder.CreateAlignedStore(
9171 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9174 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9175 Info.NumberOfPtrs == 0)
9177 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9182 BasicBlock *CurBB = Builder.GetInsertBlock();
9189 Builder.CreateBr(
Target);
9192 Builder.ClearInsertionPoint();
9197 BasicBlock *CurBB = Builder.GetInsertBlock();
9213 Builder.SetInsertPoint(BB);
9216Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9217 BodyGenCallbackTy ElseGen,
9218 InsertPointTy AllocaIP) {
9222 auto CondConstant = CI->getSExtValue();
9224 return ThenGen(AllocaIP, Builder.saveIP());
9226 return ElseGen(AllocaIP, Builder.saveIP());
9236 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9238 emitBlock(ThenBlock, CurFn);
9239 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9241 emitBranch(ContBlock);
9244 emitBlock(ElseBlock, CurFn);
9245 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9248 emitBranch(ContBlock);
9250 emitBlock(ContBlock, CurFn,
true);
9254bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9258 "Unexpected Atomic Ordering.");
9315OpenMPIRBuilder::InsertPointTy
9316OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9317 AtomicOpValue &
X, AtomicOpValue &V,
9319 if (!updateToLocation(
Loc))
9322 assert(
X.Var->getType()->isPointerTy() &&
9323 "OMP Atomic expects a pointer to target memory");
9324 Type *XElemTy =
X.ElemTy;
9327 "OMP atomic read expected a scalar type");
9329 Value *XRead =
nullptr;
9333 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9339 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9344 OpenMPIRBuilder::AtomicInfo atomicInfo(
9345 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9346 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9347 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9348 XRead = AtomicLoadRes.first;
9355 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9358 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9360 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9363 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9364 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9365 return Builder.saveIP();
9368OpenMPIRBuilder::InsertPointTy
9369OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9370 AtomicOpValue &
X,
Value *Expr,
9372 if (!updateToLocation(
Loc))
9375 assert(
X.Var->getType()->isPointerTy() &&
9376 "OMP Atomic expects a pointer to target memory");
9377 Type *XElemTy =
X.ElemTy;
9380 "OMP atomic write expected a scalar type");
9383 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9386 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9390 OpenMPIRBuilder::AtomicInfo atomicInfo(
9391 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9392 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9393 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9400 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9401 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9405 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9406 return Builder.saveIP();
9409OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9410 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9412 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9413 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9415 if (!updateToLocation(
Loc))
9419 Type *XTy =
X.Var->getType();
9421 "OMP Atomic expects a pointer to target memory");
9422 Type *XElemTy =
X.ElemTy;
9425 "OMP atomic update expected a scalar type");
9428 "OpenMP atomic does not support LT or GT operations");
9432 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9433 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9435 return AtomicResult.takeError();
9436 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9437 return Builder.saveIP();
9441Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9445 return Builder.CreateAdd(Src1, Src2);
9447 return Builder.CreateSub(Src1, Src2);
9449 return Builder.CreateAnd(Src1, Src2);
9451 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9453 return Builder.CreateOr(Src1, Src2);
9455 return Builder.CreateXor(Src1, Src2);
9480 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9481 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9484 bool emitRMWOp =
false;
9492 emitRMWOp = XElemTy;
9495 emitRMWOp = (IsXBinopExpr && XElemTy);
9502 std::pair<Value *, Value *> Res;
9507 if (IsIgnoreDenormalMode)
9508 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9510 if (!IsFineGrainedMemory)
9511 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9513 if (!IsRemoteMemory)
9517 Res.first = RMWInst;
9522 Res.second = Res.first;
9524 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9528 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9534 OpenMPIRBuilder::AtomicInfo atomicInfo(
9535 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9536 OldVal->
getAlign(),
true , AllocaIP,
X);
9537 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9538 BasicBlock *CurBB = Builder.GetInsertBlock();
9540 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9544 X->getName() +
".atomic.cont");
9546 Builder.restoreIP(AllocaIP);
9547 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9548 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9549 Builder.SetInsertPoint(ContBB);
9551 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9556 Value *Upd = *CBResult;
9557 Builder.CreateStore(Upd, NewAtomicAddr);
9560 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9561 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9563 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9564 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9566 Res.first = OldExprVal;
9572 Builder.SetInsertPoint(ExitBB);
9574 Builder.SetInsertPoint(ExitTI);
9580 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9587 BasicBlock *CurBB = Builder.GetInsertBlock();
9589 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9593 X->getName() +
".atomic.cont");
9595 Builder.restoreIP(AllocaIP);
9596 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9597 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9598 Builder.SetInsertPoint(ContBB);
9600 PHI->addIncoming(OldVal, CurBB);
9605 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9606 X->getName() +
".atomic.fltCast");
9608 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9609 X->getName() +
".atomic.ptrCast");
9616 Value *Upd = *CBResult;
9617 Builder.CreateStore(Upd, NewAtomicAddr);
9618 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9623 Result->setVolatile(VolatileX);
9624 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9625 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9626 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9627 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9629 Res.first = OldExprVal;
9636 Builder.SetInsertPoint(ExitBB);
9638 Builder.SetInsertPoint(ExitTI);
9645OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9646 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9649 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9650 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9651 if (!updateToLocation(
Loc))
9655 Type *XTy =
X.Var->getType();
9657 "OMP Atomic expects a pointer to target memory");
9658 Type *XElemTy =
X.ElemTy;
9661 "OMP atomic capture expected a scalar type");
9663 "OpenMP atomic does not support LT or GT operations");
9670 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9671 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9674 Value *CapturedVal =
9675 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9676 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9678 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9679 return Builder.saveIP();
9682OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9683 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9689 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9690 IsPostfixUpdate, IsFailOnly, Failure);
9693OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9694 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9699 if (!updateToLocation(
Loc))
9702 assert(
X.Var->getType()->isPointerTy() &&
9703 "OMP atomic expects a pointer to target memory");
9706 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9707 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9710 bool IsInteger =
E->getType()->isIntegerTy();
9712 if (
Op == OMPAtomicCompareOp::EQ) {
9717 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9718 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9723 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9727 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9729 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9731 "OldValue and V must be of same type");
9732 if (IsPostfixUpdate) {
9733 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9735 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9746 BasicBlock *CurBB = Builder.GetInsertBlock();
9748 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9750 CurBBTI,
X.Var->getName() +
".atomic.exit");
9756 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9758 Builder.SetInsertPoint(ContBB);
9759 Builder.CreateStore(OldValue, V.Var);
9760 Builder.CreateBr(ExitBB);
9765 Builder.SetInsertPoint(ExitBB);
9767 Builder.SetInsertPoint(ExitTI);
9770 Value *CapturedValue =
9771 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9772 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9778 assert(
R.Var->getType()->isPointerTy() &&
9779 "r.var must be of pointer type");
9780 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9782 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9783 Value *ResultCast =
R.IsSigned
9784 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9785 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9786 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9789 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9790 "Op should be either max or min at this point");
9791 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9829 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9831 Value *CapturedValue =
nullptr;
9832 if (IsPostfixUpdate) {
9833 CapturedValue = OldValue;
9858 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9859 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9861 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9865 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9867 return Builder.saveIP();
9870OpenMPIRBuilder::InsertPointOrErrorTy
9871OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9872 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9875 if (!updateToLocation(
Loc))
9876 return InsertPointTy();
9879 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9880 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9885 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9886 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9887 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9907 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9908 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9910 splitBB(Builder,
true,
"teams.alloca");
9912 bool SubClausesPresent =
9913 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9915 if (!Config.isTargetDevice() && SubClausesPresent) {
9916 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9917 "if lowerbound is non-null, then upperbound must also be non-null "
9918 "for bounds on num_teams");
9920 if (NumTeamsUpper ==
nullptr)
9921 NumTeamsUpper = Builder.getInt32(0);
9923 if (NumTeamsLower ==
nullptr)
9924 NumTeamsLower = NumTeamsUpper;
9928 "argument to if clause must be an integer value");
9932 IfExpr = Builder.CreateICmpNE(IfExpr,
9933 ConstantInt::get(IfExpr->
getType(), 0));
9934 NumTeamsUpper = Builder.CreateSelect(
9935 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9938 NumTeamsLower = Builder.CreateSelect(
9939 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9942 if (ThreadLimit ==
nullptr)
9943 ThreadLimit = Builder.getInt32(0);
9945 Value *ThreadNum = getOrCreateThreadID(Ident);
9947 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9948 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9951 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9952 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9953 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9957 OI.EntryBB = AllocaBB;
9959 OI.OuterAllocaBB = &OuterAllocaBB;
9963 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9965 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9967 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9969 auto HostPostOutlineCB = [
this, Ident,
9970 ToBeDeleted](
Function &OutlinedFn)
mutable {
9975 "there must be a single user for the outlined function");
9980 "Outlined function must have two or three arguments only");
9982 bool HasShared = OutlinedFn.
arg_size() == 3;
9990 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9991 "outlined function.");
9992 Builder.SetInsertPoint(StaleCI);
9994 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
9997 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
9998 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10002 I->eraseFromParent();
10005 if (!Config.isTargetDevice())
10006 OI.PostOutlineCB = HostPostOutlineCB;
10008 addOutlineInfo(std::move(OI));
10010 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10012 return Builder.saveIP();
10015OpenMPIRBuilder::InsertPointOrErrorTy
10016OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10017 InsertPointTy OuterAllocaIP,
10018 BodyGenCallbackTy BodyGenCB) {
10019 if (!updateToLocation(
Loc))
10020 return InsertPointTy();
10022 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10024 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10026 splitBB(Builder,
true,
"distribute.entry");
10027 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10030 splitBB(Builder,
true,
"distribute.exit");
10032 splitBB(Builder,
true,
"distribute.body");
10034 splitBB(Builder,
true,
"distribute.alloca");
10037 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10038 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10039 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10044 if (Config.isTargetDevice()) {
10046 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10047 OI.EntryBB = AllocaBB;
10048 OI.ExitBB = ExitBB;
10050 addOutlineInfo(std::move(OI));
10052 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10054 return Builder.saveIP();
10059 std::string VarName) {
10065 M, MapNamesArrayInit->
getType(),
10068 return MapNamesArrayGlobal;
10073void OpenMPIRBuilder::initializeTypes(
Module &M) {
10076 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10077 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10078#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10079#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10080 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10081 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10082#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10083 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10084 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10085#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10086 T = StructType::getTypeByName(Ctx, StructName); \
10088 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10090 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10091#include "llvm/Frontend/OpenMP/OMPKinds.def"
10094void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10102 while (!Worklist.
empty()) {
10106 if (
BlockSet.insert(SuccBB).second)
10115 if (!Config.isGPU()) {
10130 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10131 Fn->
addFnAttr(Attribute::MustProgress);
10135void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10136 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10139 if (OffloadInfoManager.empty())
10143 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10144 TargetRegionEntryInfo>,
10146 OrderedEntries(OffloadInfoManager.size());
10149 auto &&GetMDInt = [
this](
unsigned V) {
10156 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10157 auto &&TargetRegionMetadataEmitter =
10158 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10159 const TargetRegionEntryInfo &EntryInfo,
10160 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10173 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10174 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10175 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10176 GetMDInt(
E.getOrder())};
10179 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10185 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10188 auto &&DeviceGlobalVarMetadataEmitter =
10189 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10191 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10199 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10200 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10203 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10204 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10210 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10211 DeviceGlobalVarMetadataEmitter);
10213 for (
const auto &
E : OrderedEntries) {
10214 assert(
E.first &&
"All ordered entries must exist!");
10215 if (
const auto *CE =
10218 if (!
CE->getID() || !
CE->getAddress()) {
10220 TargetRegionEntryInfo EntryInfo =
E.second;
10221 StringRef FnName = EntryInfo.ParentName;
10222 if (!M.getNamedValue(FnName))
10224 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10227 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10230 }
else if (
const auto *CE =
dyn_cast<
10231 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10233 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10234 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10237 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10238 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10239 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10241 if (!
CE->getAddress()) {
10242 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10246 if (
CE->getVarSize() == 0)
10249 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10250 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10251 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10252 "Declaret target link address is set.");
10253 if (Config.isTargetDevice())
10255 if (!
CE->getAddress()) {
10256 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10268 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10269 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10274 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10275 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10276 Flags,
CE->getLinkage(),
CE->getVarName());
10278 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10279 Flags,
CE->getLinkage());
10290 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10295 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10296 Config.getRequiresFlags());
10299void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10301 unsigned FileID,
unsigned Line,
unsigned Count) {
10303 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10304 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10306 OS <<
"_" <<
Count;
10309void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10311 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10312 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10313 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10314 EntryInfo.Line, NewCount);
10317TargetRegionEntryInfo
10318OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10322 auto FileIDInfo = CallBack();
10326 FileID =
Status->getUniqueID().getFile();
10330 FileID =
hash_value(std::get<0>(FileIDInfo));
10333 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10334 std::get<1>(FileIDInfo));
10337unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10340 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10342 !(Remain & 1); Remain = Remain >> 1)
10348OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10351 << getFlagMemberOffset());
10354void OpenMPIRBuilder::setCorrectMemberOfFlag(
10360 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10362 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10369 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10370 Flags |= MemberOfFlag;
10373Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10374 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10375 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10376 bool IsDeclaration,
bool IsExternallyVisible,
10377 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10378 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10379 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10380 std::function<
Constant *()> GlobalInitializer,
10387 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10388 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10390 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10391 Config.hasRequiresUnifiedSharedMemory())) {
10396 if (!IsExternallyVisible)
10397 OS <<
format(
"_%x", EntryInfo.FileID);
10398 OS <<
"_decl_tgt_ref_ptr";
10401 Value *
Ptr = M.getNamedValue(PtrName);
10405 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10410 if (!Config.isTargetDevice()) {
10411 if (GlobalInitializer)
10412 GV->setInitializer(GlobalInitializer());
10417 registerTargetGlobalVariable(
10418 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10419 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10429void OpenMPIRBuilder::registerTargetGlobalVariable(
10430 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10431 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10432 bool IsDeclaration,
bool IsExternallyVisible,
10433 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10434 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10435 std::vector<Triple> TargetTriple,
10436 std::function<
Constant *()> GlobalInitializer,
10439 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10440 (TargetTriple.empty() && !Config.isTargetDevice()))
10443 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10448 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10450 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10451 !Config.hasRequiresUnifiedSharedMemory()) {
10452 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10456 if (!IsDeclaration)
10458 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10465 if (Config.isTargetDevice() &&
10469 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10472 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10474 if (!M.getNamedValue(RefName)) {
10476 getOrCreateInternalVariable(Addr->
getType(), RefName);
10478 GvAddrRef->setConstant(
true);
10480 GvAddrRef->setInitializer(Addr);
10481 GeneratedRefs.push_back(GvAddrRef);
10485 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10486 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10488 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10490 if (Config.isTargetDevice()) {
10494 Addr = getAddrOfDeclareTargetVar(
10495 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10496 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10497 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10500 VarSize = M.getDataLayout().getPointerSize();
10504 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10510void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10514 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10519 auto &&GetMDInt = [MN](
unsigned Idx) {
10524 auto &&GetMDString = [MN](
unsigned Idx) {
10526 return V->getString();
10529 switch (GetMDInt(0)) {
10533 case OffloadEntriesInfoManager::OffloadEntryInfo::
10534 OffloadingEntryInfoTargetRegion: {
10535 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10540 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10544 case OffloadEntriesInfoManager::OffloadEntryInfo::
10545 OffloadingEntryInfoDeviceGlobalVar:
10546 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10548 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10558 if (HostFilePath.
empty())
10562 if (std::error_code Err = Buf.getError()) {
10564 "OpenMPIRBuilder: " +
10572 if (std::error_code Err = M.getError()) {
10574 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10578 loadOffloadInfoMetadata(*M.get());
10585bool OffloadEntriesInfoManager::empty()
const {
10586 return OffloadEntriesTargetRegion.empty() &&
10587 OffloadEntriesDeviceGlobalVar.empty();
10590unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10591 const TargetRegionEntryInfo &EntryInfo)
const {
10592 auto It = OffloadEntriesTargetRegionCount.find(
10593 getTargetRegionEntryCountKey(EntryInfo));
10594 if (It == OffloadEntriesTargetRegionCount.end())
10599void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10600 const TargetRegionEntryInfo &EntryInfo) {
10601 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10602 EntryInfo.Count + 1;
10606void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10607 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10608 OffloadEntriesTargetRegion[EntryInfo] =
10609 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10610 OMPTargetRegionEntryTargetRegion);
10611 ++OffloadingEntriesNum;
10614void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10616 OMPTargetRegionEntryKind Flags) {
10617 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10620 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10624 if (OMPBuilder->Config.isTargetDevice()) {
10626 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10629 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10630 Entry.setAddress(Addr);
10632 Entry.setFlags(Flags);
10634 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10635 hasTargetRegionEntryInfo(EntryInfo,
true))
10637 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10638 "Target region entry already registered!");
10639 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10640 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10641 ++OffloadingEntriesNum;
10643 incrementTargetRegionEntryInfoCount(EntryInfo);
10646bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10647 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10650 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10652 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10653 if (It == OffloadEntriesTargetRegion.end()) {
10657 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10662void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10663 const OffloadTargetRegionEntryInfoActTy &Action) {
10665 for (
const auto &It : OffloadEntriesTargetRegion) {
10666 Action(It.first, It.second);
10670void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10671 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10672 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10673 ++OffloadingEntriesNum;
10676void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10679 if (OMPBuilder->Config.isTargetDevice()) {
10681 if (!hasDeviceGlobalVarEntryInfo(VarName))
10683 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10684 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10685 if (Entry.getVarSize() == 0) {
10686 Entry.setVarSize(VarSize);
10691 Entry.setVarSize(VarSize);
10693 Entry.setAddress(Addr);
10695 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10696 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10697 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10698 "Entry not initialized!");
10699 if (Entry.getVarSize() == 0) {
10700 Entry.setVarSize(VarSize);
10705 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10706 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10707 Addr, VarSize, Flags,
Linkage,
10710 OffloadEntriesDeviceGlobalVar.try_emplace(
10711 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10712 ++OffloadingEntriesNum;
10716void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10717 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10719 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10720 Action(
E.getKey(),
E.getValue());
10727void CanonicalLoopInfo::collectControlBlocks(
10734 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10737BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10746void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10758void CanonicalLoopInfo::mapIndVar(
10768 for (
Use &U : OldIV->
uses()) {
10772 if (
User->getParent() == getCond())
10774 if (
User->getParent() == getLatch())
10780 Value *NewIV = Updater(OldIV);
10783 for (
Use *U : ReplacableUses)
10791void CanonicalLoopInfo::assertOK()
const {
10804 "Preheader must terminate with unconditional branch");
10806 "Preheader must jump to header");
10810 "Header must terminate with unconditional branch");
10811 assert(Header->getSingleSuccessor() ==
Cond &&
10812 "Header must jump to exiting block");
10815 assert(
Cond->getSinglePredecessor() == Header &&
10816 "Exiting block only reachable from header");
10819 "Exiting block must terminate with conditional branch");
10821 "Exiting block must have two successors");
10823 "Exiting block's first successor jump to the body");
10825 "Exiting block's second successor must exit the loop");
10829 "Body only reachable from exiting block");
10834 "Latch must terminate with unconditional branch");
10843 "Exit block must terminate with unconditional branch");
10844 assert(
Exit->getSingleSuccessor() == After &&
10845 "Exit block must jump to after block");
10849 "After block only reachable from exit block");
10853 assert(IndVar &&
"Canonical induction variable not found?");
10855 "Induction variable must be an integer");
10857 "Induction variable must be a PHI in the loop header");
10863 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10870 Value *TripCount = getTripCount();
10871 assert(TripCount &&
"Loop trip count not found?");
10873 "Trip count and induction variable must have the same type");
10877 "Exit condition must be a signed less-than comparison");
10879 "Exit condition must compare the induction variable");
10881 "Exit condition must compare with the trip count");
10885void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...