65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
159 Builder.restoreIP(IP);
169 Kernel->getFnAttribute(
"target-features").getValueAsString();
170 if (Features.
count(
"+wavefrontsize64"))
185 bool HasSimdModifier) {
187 switch (ClauseKind) {
188 case OMP_SCHEDULE_Default:
189 case OMP_SCHEDULE_Static:
190 return HasChunks ? OMPScheduleType::BaseStaticChunked
191 : OMPScheduleType::BaseStatic;
192 case OMP_SCHEDULE_Dynamic:
193 return OMPScheduleType::BaseDynamicChunked;
194 case OMP_SCHEDULE_Guided:
195 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
196 : OMPScheduleType::BaseGuidedChunked;
197 case OMP_SCHEDULE_Auto:
199 case OMP_SCHEDULE_Runtime:
200 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
201 : OMPScheduleType::BaseRuntime;
209 bool HasOrderedClause) {
210 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
211 OMPScheduleType::None &&
212 "Must not have ordering nor monotonicity flags already set");
215 ? OMPScheduleType::ModifierOrdered
216 : OMPScheduleType::ModifierUnordered;
217 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
220 if (OrderingScheduleType ==
221 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
222 return OMPScheduleType::OrderedGuidedChunked;
223 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
224 OMPScheduleType::ModifierOrdered))
225 return OMPScheduleType::OrderedRuntime;
227 return OrderingScheduleType;
233 bool HasSimdModifier,
bool HasMonotonic,
234 bool HasNonmonotonic,
bool HasOrderedClause) {
235 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
236 OMPScheduleType::None &&
237 "Must not have monotonicity flags already set");
238 assert((!HasMonotonic || !HasNonmonotonic) &&
239 "Monotonic and Nonmonotonic are contradicting each other");
242 return ScheduleType | OMPScheduleType::ModifierMonotonic;
243 }
else if (HasNonmonotonic) {
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
254 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
255 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
261 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
269 bool HasSimdModifier,
bool HasMonotonicModifier,
270 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
276 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
277 HasNonmonotonicModifier, HasOrderedClause);
292 assert(!Br->isConditional() &&
293 "BB's terminator must be an unconditional branch (or degenerate)");
296 Br->setSuccessor(0,
Target);
301 NewBr->setDebugLoc(
DL);
307 "Target BB must not have PHI nodes");
327 NewBr->setDebugLoc(
DL);
335 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
339 Builder.SetInsertPoint(Old);
343 Builder.SetCurrentDebugLocation(
DebugLoc);
352 spliceBB(IP, New, CreateBranch,
DL);
353 New->replaceSuccessorsPhiUsesWith(Old, New);
362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
364 Builder.SetInsertPoint(Builder.GetInsertBlock());
367 Builder.SetCurrentDebugLocation(
DebugLoc);
376 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
378 Builder.SetInsertPoint(Builder.GetInsertBlock());
381 Builder.SetCurrentDebugLocation(
DebugLoc);
388 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
395 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
397 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
398 const Twine &Name =
"",
bool AsPtr =
true) {
399 Builder.restoreIP(OuterAllocaIP);
402 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
406 FakeVal = FakeValAddr;
409 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
414 Builder.restoreIP(InnerAllocaIP);
418 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
434enum OpenMPOffloadingRequiresDirFlags {
436 OMP_REQ_UNDEFINED = 0x000,
438 OMP_REQ_NONE = 0x001,
440 OMP_REQ_REVERSE_OFFLOAD = 0x002,
442 OMP_REQ_UNIFIED_ADDRESS = 0x004,
444 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
446 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
453 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
456 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
457 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
458 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
459 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
460 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
461 RequiresFlags(OMP_REQ_UNDEFINED) {
462 if (HasRequiresReverseOffload)
463 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
464 if (HasRequiresUnifiedAddress)
465 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
466 if (HasRequiresUnifiedSharedMemory)
467 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
468 if (HasRequiresDynamicAllocators)
469 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
473 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
477 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
481 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
485 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
488int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
489 return hasRequiresFlags() ? RequiresFlags
490 :
static_cast<int64_t
>(OMP_REQ_NONE);
493void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
495 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
497 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
500void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
502 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
504 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
514void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
516 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
518 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
525void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
529 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
531 constexpr const size_t MaxDim = 3;
533 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
535 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
538 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
539 Value *NumThreads3D =
540 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
542 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
544 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
546 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
548 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
550 ArgsVector = {Version,
552 KernelArgs.RTArgs.BasePointersArray,
553 KernelArgs.RTArgs.PointersArray,
554 KernelArgs.RTArgs.SizesArray,
555 KernelArgs.RTArgs.MapTypesArray,
556 KernelArgs.RTArgs.MapNamesArray,
557 KernelArgs.RTArgs.MappersArray,
558 KernelArgs.NumIterations,
562 KernelArgs.DynCGGroupMem};
570 auto FnAttrs =
Attrs.getFnAttrs();
571 auto RetAttrs =
Attrs.getRetAttrs();
573 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
578 bool Param =
true) ->
void {
579 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
580 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
581 if (HasSignExt || HasZeroExt) {
582 assert(AS.getNumAttributes() == 1 &&
583 "Currently not handling extension attr combined with others.");
585 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
588 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
595#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
596#include "llvm/Frontend/OpenMP/OMPKinds.def"
600#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
602 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
603 addAttrSet(RetAttrs, RetAttrSet, false); \
604 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
605 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
606 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
608#include "llvm/Frontend/OpenMP/OMPKinds.def"
622#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
624 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
626 Fn = M.getFunction(Str); \
628#include "llvm/Frontend/OpenMP/OMPKinds.def"
634#define OMP_RTL(Enum, Str, ...) \
636 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
638#include "llvm/Frontend/OpenMP/OMPKinds.def"
642 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
652 LLVMContext::MD_callback,
654 2, {-1, -1},
true)}));
660 addAttributes(FnID, *Fn);
667 assert(Fn &&
"Failed to create OpenMP runtime function");
675 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
679void OpenMPIRBuilder::initialize() { initializeTypes(M); }
690 for (
auto Inst =
Block->getReverseIterator()->begin();
691 Inst !=
Block->getReverseIterator()->end();) {
704void OpenMPIRBuilder::finalize(
Function *Fn) {
708 for (OutlineInfo &OI : OutlineInfos) {
711 if (Fn && OI.getFunction() != Fn) {
716 ParallelRegionBlockSet.
clear();
718 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
728 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
737 ".omp_par", ArgsInZeroAddressSpace);
741 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
742 assert(Extractor.isEligible() &&
743 "Expected OpenMP outlining to be possible!");
745 for (
auto *V : OI.ExcludeArgsFromAggregate)
746 Extractor.excludeArgFromAggregate(V);
748 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
752 if (TargetCpuAttr.isStringAttribute())
755 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
756 if (TargetFeaturesAttr.isStringAttribute())
757 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
760 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
762 "OpenMP outlined functions should not return a value!");
767 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
774 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
781 "Expected instructions to add in the outlined region entry");
783 End = ArtificialEntry.
rend();
788 if (
I.isTerminator()) {
790 if (OI.EntryBB->getTerminator())
791 OI.EntryBB->getTerminator()->adoptDbgRecords(
792 &ArtificialEntry,
I.getIterator(),
false);
796 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
799 OI.EntryBB->moveBefore(&ArtificialEntry);
806 if (OI.PostOutlineCB)
807 OI.PostOutlineCB(*OutlinedFn);
811 OutlineInfos = std::move(DeferredOutlines);
832 for (
Function *
F : ConstantAllocaRaiseCandidates)
835 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
836 [](EmitMetadataErrorKind Kind,
837 const TargetRegionEntryInfo &EntryInfo) ->
void {
838 errs() <<
"Error of kind: " << Kind
839 <<
" when emitting offload entries and metadata during "
840 "OMPIRBuilder finalization \n";
843 if (!OffloadInfoManager.empty())
844 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
846 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
847 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
848 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
849 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
855bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
857OpenMPIRBuilder::~OpenMPIRBuilder() {
858 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
866 ConstantInt::get(I32Ty,
Value), Name);
878 UsedArray.
resize(List.size());
879 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
883 if (UsedArray.
empty())
890 GV->setSection(
"llvm.metadata");
894OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
896 auto *Int8Ty = Builder.getInt8Ty();
899 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
907 unsigned Reserve2Flags) {
909 LocFlags |= OMP_IDENT_FLAG_KMPC;
912 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
917 ConstantInt::get(
Int32, Reserve2Flags),
918 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
920 size_t SrcLocStrArgIdx = 4;
921 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
925 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
932 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
933 if (
GV.getInitializer() == Initializer)
938 M, OpenMPIRBuilder::Ident,
941 M.getDataLayout().getDefaultGlobalsAddressSpace());
953 SrcLocStrSize = LocStr.
size();
954 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
962 if (
GV.isConstant() &&
GV.hasInitializer() &&
963 GV.getInitializer() == Initializer)
966 SrcLocStr = Builder.CreateGlobalString(
967 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
975 unsigned Line,
unsigned Column,
981 Buffer.
append(FunctionName);
983 Buffer.
append(std::to_string(Line));
985 Buffer.
append(std::to_string(Column));
988 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
992OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
993 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
994 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1002 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1004 if (
DIFile *DIF = DIL->getFile())
1005 if (std::optional<StringRef> Source = DIF->getSource())
1010 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1011 DIL->getColumn(), SrcLocStrSize);
1014Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1016 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1017 Loc.IP.getBlock()->getParent());
1020Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1021 return Builder.CreateCall(
1022 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1023 "omp_global_thread_num");
1026OpenMPIRBuilder::InsertPointOrErrorTy
1027OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1028 bool ForceSimpleCall,
bool CheckCancelFlag) {
1029 if (!updateToLocation(
Loc))
1038 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1041 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1044 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1047 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1050 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1055 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1057 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1058 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1063 bool UseCancelBarrier =
1064 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1067 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1068 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1069 : OMPRTL___kmpc_barrier),
1072 if (UseCancelBarrier && CheckCancelFlag)
1073 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1076 return Builder.saveIP();
1079OpenMPIRBuilder::InsertPointOrErrorTy
1080OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1082 omp::Directive CanceledDirective) {
1083 if (!updateToLocation(
Loc))
1087 auto *UI = Builder.CreateUnreachable();
1092 Builder.SetInsertPoint(ThenTI);
1094 Value *CancelKind =
nullptr;
1095 switch (CanceledDirective) {
1096#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1097 case DirectiveEnum: \
1098 CancelKind = Builder.getInt32(Value); \
1100#include "llvm/Frontend/OpenMP/OMPKinds.def"
1106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1108 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1110 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1111 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1112 if (CanceledDirective == OMPD_parallel) {
1114 Builder.restoreIP(IP);
1115 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1116 omp::Directive::OMPD_unknown,
1125 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1129 Builder.SetInsertPoint(UI->getParent());
1130 UI->eraseFromParent();
1132 return Builder.saveIP();
1135OpenMPIRBuilder::InsertPointOrErrorTy
1136OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1137 omp::Directive CanceledDirective) {
1138 if (!updateToLocation(
Loc))
1142 auto *UI = Builder.CreateUnreachable();
1143 Builder.SetInsertPoint(UI);
1145 Value *CancelKind =
nullptr;
1146 switch (CanceledDirective) {
1147#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1148 case DirectiveEnum: \
1149 CancelKind = Builder.getInt32(Value); \
1151#include "llvm/Frontend/OpenMP/OMPKinds.def"
1157 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1158 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1159 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1161 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1162 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1163 if (CanceledDirective == OMPD_parallel) {
1165 Builder.restoreIP(IP);
1166 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1167 omp::Directive::OMPD_unknown,
1176 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1180 Builder.SetInsertPoint(UI->getParent());
1181 UI->eraseFromParent();
1183 return Builder.saveIP();
1186OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1187 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1190 if (!updateToLocation(
Loc))
1193 Builder.restoreIP(AllocaIP);
1194 auto *KernelArgsPtr =
1195 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1196 updateToLocation(
Loc);
1200 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1201 Builder.CreateAlignedStore(
1203 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1207 NumThreads, HostPtr, KernelArgsPtr};
1209 Return = Builder.CreateCall(
1210 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1213 return Builder.saveIP();
1216OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1217 const LocationDescription &
Loc,
Value *OutlinedFnID,
1218 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1219 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1221 if (!updateToLocation(
Loc))
1234 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1238 Value *Return =
nullptr;
1242 getKernelArgsVector(Args, Builder, ArgsVector);
1257 Builder.restoreIP(emitTargetKernel(
1258 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1259 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1266 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1268 auto CurFn = Builder.GetInsertBlock()->getParent();
1269 emitBlock(OffloadFailedBlock, CurFn);
1270 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1272 return AfterIP.takeError();
1273 Builder.restoreIP(*AfterIP);
1274 emitBranch(OffloadContBlock);
1275 emitBlock(OffloadContBlock, CurFn,
true);
1276 return Builder.saveIP();
1279Error OpenMPIRBuilder::emitCancelationCheckImpl(
1280 Value *CancelFlag, omp::Directive CanceledDirective,
1281 FinalizeCallbackTy ExitCB) {
1282 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1283 "Unexpected cancellation!");
1288 if (Builder.GetInsertPoint() == BB->
end()) {
1294 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1296 Builder.SetInsertPoint(BB);
1302 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1303 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1308 Builder.SetInsertPoint(CancellationBlock);
1310 if (
Error Err = ExitCB(Builder.saveIP()))
1312 auto &FI = FinalizationStack.back();
1313 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1317 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1336 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1339 "Expected at least tid and bounded tid as arguments");
1340 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1343 assert(CI &&
"Expected call instruction to outlined function");
1344 CI->
getParent()->setName(
"omp_parallel");
1346 Builder.SetInsertPoint(CI);
1347 Type *PtrTy = OMPIRBuilder->VoidPtr;
1351 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1355 Value *Args = ArgsAlloca;
1359 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1360 Builder.restoreIP(CurrentIP);
1363 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1365 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1367 Builder.CreateStore(V, StoreAddress);
1371 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1372 : Builder.getInt32(1);
1375 Value *Parallel51CallArgs[] = {
1379 NumThreads ? NumThreads : Builder.getInt32(-1),
1380 Builder.getInt32(-1),
1384 Builder.getInt64(NumCapturedVars)};
1387 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1389 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1392 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1395 Builder.SetInsertPoint(PrivTID);
1397 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1404 I->eraseFromParent();
1421 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1424 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1427 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1435 F->addMetadata(LLVMContext::MD_callback,
1444 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1447 "Expected at least tid and bounded tid as arguments");
1448 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1451 CI->
getParent()->setName(
"omp_parallel");
1452 Builder.SetInsertPoint(CI);
1455 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1459 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1461 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1468 auto PtrTy = OMPIRBuilder->VoidPtr;
1469 if (IfCondition && NumCapturedVars == 0) {
1474 Builder.CreateCall(RTLFn, RealArgs);
1477 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1480 Builder.SetInsertPoint(PrivTID);
1482 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1489 I->eraseFromParent();
1493OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1494 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1495 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1496 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1497 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1500 if (!updateToLocation(
Loc))
1504 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1505 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1506 Value *ThreadID = getOrCreateThreadID(Ident);
1512 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1516 if (NumThreads && !Config.isTargetDevice()) {
1519 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1521 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1524 if (ProcBind != OMP_PROC_BIND_default) {
1528 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1530 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1533 BasicBlock *InsertBB = Builder.GetInsertBlock();
1538 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1546 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1547 Builder.restoreIP(NewOuter);
1548 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1550 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1553 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1556 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1560 PointerType ::get(M.getContext(), 0),
1561 "zero.addr.ascast");
1582 auto FiniCBWrapper = [&](InsertPointTy IP) {
1587 Builder.restoreIP(IP);
1589 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1593 "Unexpected insertion point for finalization call!");
1597 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1602 InsertPointTy InnerAllocaIP = Builder.saveIP();
1605 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1609 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1611 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1629 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1632 assert(BodyGenCB &&
"Expected body generation callback!");
1633 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1634 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1637 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1640 if (Config.isTargetDevice()) {
1642 OI.PostOutlineCB = [=, ToBeDeletedVec =
1643 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1645 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1646 ThreadID, ToBeDeletedVec);
1650 OI.PostOutlineCB = [=, ToBeDeletedVec =
1651 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1653 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1657 OI.OuterAllocaBB = OuterAllocaBlock;
1658 OI.EntryBB = PRegEntryBB;
1659 OI.ExitBB = PRegExitBB;
1663 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1674 ".omp_par", ArgsInZeroAddressSpace);
1679 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1681 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1686 return GV->getValueType() == OpenMPIRBuilder::Ident;
1691 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1694 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1697 if (&V == TIDAddr || &V == ZeroAddr) {
1698 OI.ExcludeArgsFromAggregate.push_back(&V);
1703 for (
Use &U : V.uses())
1705 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1715 if (!V.getType()->isPointerTy()) {
1719 Builder.restoreIP(OuterAllocaIP);
1721 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1725 Builder.SetInsertPoint(InsertBB,
1727 Builder.CreateStore(&V,
Ptr);
1730 Builder.restoreIP(InnerAllocaIP);
1731 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1734 Value *ReplacementValue =
nullptr;
1737 ReplacementValue = PrivTID;
1739 InsertPointOrErrorTy AfterIP =
1740 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1742 return AfterIP.takeError();
1743 Builder.restoreIP(*AfterIP);
1745 InnerAllocaIP.getBlock(),
1746 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1748 assert(ReplacementValue &&
1749 "Expected copy/create callback to set replacement value!");
1750 if (ReplacementValue == &V)
1755 UPtr->set(ReplacementValue);
1780 for (
Value *Output : Outputs)
1783 assert(Outputs.empty() &&
1784 "OpenMP outlining should not produce live-out values!");
1786 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1788 for (
auto *BB : Blocks)
1795 auto FiniInfo = FinalizationStack.pop_back_val();
1797 assert(FiniInfo.DK == OMPD_parallel &&
1798 "Unexpected finalization stack state!");
1802 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1803 if (
Error Err = FiniCB(PreFiniIP))
1807 addOutlineInfo(std::move(OI));
1809 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1810 UI->eraseFromParent();
1815void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1818 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1819 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1821 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1824void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1825 if (!updateToLocation(
Loc))
1830void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1834 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1835 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1836 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1839 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1843void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1844 if (!updateToLocation(
Loc))
1846 emitTaskwaitImpl(
Loc);
1849void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1852 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1853 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1855 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1857 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1861void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1862 if (!updateToLocation(
Loc))
1864 emitTaskyieldImpl(
Loc);
1873 OpenMPIRBuilder &OMPBuilder,
1876 if (Dependencies.
empty())
1896 Type *DependInfo = OMPBuilder.DependInfo;
1897 Module &M = OMPBuilder.M;
1899 Value *DepArray =
nullptr;
1900 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1901 Builder.SetInsertPoint(
1902 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1905 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1907 Builder.restoreIP(OldIP);
1909 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1911 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1913 Value *Addr = Builder.CreateStructGEP(
1915 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1916 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1917 Builder.CreateStore(DepValPtr, Addr);
1920 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1921 Builder.CreateStore(
1922 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1925 Value *Flags = Builder.CreateStructGEP(
1927 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1928 Builder.CreateStore(
1929 ConstantInt::get(Builder.getInt8Ty(),
1930 static_cast<unsigned int>(Dep.DepKind)),
1936OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1937 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1938 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1942 if (!updateToLocation(
Loc))
1943 return InsertPointTy();
1946 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1947 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1964 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1965 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1967 splitBB(Builder,
true,
"task.alloca");
1969 InsertPointTy TaskAllocaIP =
1970 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1971 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1972 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1976 OI.EntryBB = TaskAllocaBB;
1977 OI.OuterAllocaBB = AllocaIP.getBlock();
1978 OI.ExitBB = TaskExitBB;
1983 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1985 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1986 Mergeable, Priority, EventHandle, TaskAllocaBB,
1987 ToBeDeleted](
Function &OutlinedFn)
mutable {
1990 "there must be a single user for the outlined function");
1995 bool HasShareds = StaleCI->
arg_size() > 1;
1996 Builder.SetInsertPoint(StaleCI);
2001 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2005 Value *ThreadID = getOrCreateThreadID(Ident);
2017 Value *Flags = Builder.getInt32(Tied);
2020 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2021 Flags = Builder.CreateOr(FinalFlag, Flags);
2025 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2027 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2033 Value *TaskSize = Builder.getInt64(
2034 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2039 Value *SharedsSize = Builder.getInt64(0);
2043 assert(ArgStructAlloca &&
2044 "Unable to find the alloca instruction corresponding to arguments "
2045 "for extracted function");
2048 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2049 "arguments for extracted function");
2051 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2056 CallInst *TaskData = Builder.CreateCall(
2057 TaskAllocFn, {Ident, ThreadID, Flags,
2058 TaskSize, SharedsSize,
2065 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2066 OMPRTL___kmpc_task_allow_completion_event);
2068 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2070 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2071 Builder.getPtrTy(0));
2072 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2073 Builder.CreateStore(EventVal, EventHandleAddr);
2079 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2080 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2098 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2101 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2102 Value *PriorityData = Builder.CreateInBoundsGEP(
2103 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2106 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2108 Builder.CreateStore(Priority, CmplrData);
2133 splitBB(Builder,
true,
"if.end");
2135 Builder.GetInsertPoint()->
getParent()->getTerminator();
2136 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2137 Builder.SetInsertPoint(IfTerminator);
2140 Builder.SetInsertPoint(ElseTI);
2142 if (Dependencies.size()) {
2144 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2147 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2148 ConstantInt::get(Builder.getInt32Ty(), 0),
2152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2154 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2155 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2158 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2160 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2162 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2163 Builder.SetInsertPoint(ThenTI);
2166 if (Dependencies.size()) {
2168 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2171 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2172 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2177 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2178 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2183 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2185 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2187 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2191 I->eraseFromParent();
2194 addOutlineInfo(std::move(OI));
2195 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2197 return Builder.saveIP();
2200OpenMPIRBuilder::InsertPointOrErrorTy
2201OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2202 InsertPointTy AllocaIP,
2203 BodyGenCallbackTy BodyGenCB) {
2204 if (!updateToLocation(
Loc))
2205 return InsertPointTy();
2208 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2209 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2210 Value *ThreadID = getOrCreateThreadID(Ident);
2214 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2215 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2217 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2218 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2221 Builder.SetInsertPoint(TaskgroupExitBB);
2224 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2225 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2227 return Builder.saveIP();
2230OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2231 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2233 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2236 if (!updateToLocation(
Loc))
2242 auto FiniCBWrapper = [&](InsertPointTy IP) {
2251 CancellationBranches.
push_back(DummyBranch);
2255 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2273 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2274 Builder.restoreIP(CodeGenIP);
2276 splitBBWithSuffix(Builder,
false,
".sections.after");
2280 unsigned CaseNumber = 0;
2281 for (
auto SectionCB : SectionCBs) {
2283 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2284 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2285 Builder.SetInsertPoint(CaseBB);
2287 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2299 Value *LB = ConstantInt::get(I32Ty, 0);
2300 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2301 Value *
ST = ConstantInt::get(I32Ty, 1);
2303 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2307 InsertPointOrErrorTy WsloopIP =
2308 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2309 WorksharingLoopType::ForStaticLoop, !IsNowait);
2311 return WsloopIP.takeError();
2312 InsertPointTy AfterIP = *WsloopIP;
2315 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2318 auto FiniInfo = FinalizationStack.pop_back_val();
2319 assert(FiniInfo.DK == OMPD_sections &&
2320 "Unexpected finalization stack state!");
2321 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2322 Builder.restoreIP(AfterIP);
2324 splitBBWithSuffix(Builder,
true,
"sections.fini");
2325 if (
Error Err = CB(Builder.saveIP()))
2327 AfterIP = {FiniBB, FiniBB->
begin()};
2331 for (
BranchInst *DummyBranch : CancellationBranches) {
2339OpenMPIRBuilder::InsertPointOrErrorTy
2340OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2341 BodyGenCallbackTy BodyGenCB,
2342 FinalizeCallbackTy FiniCB) {
2343 if (!updateToLocation(
Loc))
2346 auto FiniCBWrapper = [&](InsertPointTy IP) {
2357 Builder.restoreIP(IP);
2358 auto *CaseBB =
Loc.IP.getBlock();
2362 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2366 Directive OMPD = Directive::OMPD_sections;
2369 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2377 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2380Value *OpenMPIRBuilder::getGPUThreadID() {
2381 return Builder.CreateCall(
2382 getOrCreateRuntimeFunction(M,
2383 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2387Value *OpenMPIRBuilder::getGPUWarpSize() {
2388 return Builder.CreateCall(
2389 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2392Value *OpenMPIRBuilder::getNVPTXWarpID() {
2393 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2394 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2397Value *OpenMPIRBuilder::getNVPTXLaneID() {
2398 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2399 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2400 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2401 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2405Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2408 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2409 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2410 assert(FromSize > 0 &&
"From size must be greater than zero");
2411 assert(ToSize > 0 &&
"To size must be greater than zero");
2412 if (FromType == ToType)
2414 if (FromSize == ToSize)
2415 return Builder.CreateBitCast(From, ToType);
2417 return Builder.CreateIntCast(From, ToType,
true);
2418 InsertPointTy SaveIP = Builder.saveIP();
2419 Builder.restoreIP(AllocaIP);
2420 Value *CastItem = Builder.CreateAlloca(ToType);
2421 Builder.restoreIP(SaveIP);
2423 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2424 CastItem, Builder.getPtrTy(0));
2425 Builder.CreateStore(From, ValCastItem);
2426 return Builder.CreateLoad(ToType, CastItem);
2429Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2433 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2434 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2438 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2440 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2441 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2442 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2443 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2444 Value *WarpSizeCast =
2445 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2446 Value *ShuffleCall =
2447 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2448 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2451void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2454 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2465 Type *IndexTy = Builder.getIndexTy(
2466 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2467 Value *ElemPtr = DstAddr;
2469 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2473 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2474 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2476 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2477 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2478 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2481 if ((
Size / IntSize) > 1) {
2482 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2483 SrcAddrGEP, Builder.getPtrTy());
2488 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2489 emitBlock(PreCondBB, CurFunc);
2491 Builder.CreatePHI(
Ptr->getType(), 2);
2494 Builder.CreatePHI(ElemPtr->
getType(), 2);
2498 Value *PtrDiff = Builder.CreatePtrDiff(
2499 Builder.getInt8Ty(), PtrEnd,
2500 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2501 Builder.CreateCondBr(
2502 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2504 emitBlock(ThenBB, CurFunc);
2505 Value *Res = createRuntimeShuffleFunction(
2507 Builder.CreateAlignedLoad(
2508 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2510 Builder.CreateAlignedStore(Res, ElemPtr,
2511 M.getDataLayout().getPrefTypeAlign(ElemType));
2513 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2514 Value *LocalElemPtr =
2515 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2518 emitBranch(PreCondBB);
2519 emitBlock(ExitBB, CurFunc);
2521 Value *Res = createRuntimeShuffleFunction(
2522 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2525 Res = Builder.CreateTrunc(Res, ElemType);
2526 Builder.CreateStore(Res, ElemPtr);
2527 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2529 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2535void OpenMPIRBuilder::emitReductionListCopy(
2536 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2538 CopyOptionsTy CopyOptions) {
2539 Type *IndexTy = Builder.getIndexTy(
2540 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2541 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2545 for (
auto En :
enumerate(ReductionInfos)) {
2546 const ReductionInfo &RI = En.value();
2547 Value *SrcElementAddr =
nullptr;
2548 Value *DestElementAddr =
nullptr;
2549 Value *DestElementPtrAddr =
nullptr;
2551 bool ShuffleInElement =
false;
2554 bool UpdateDestListPtr =
false;
2557 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2558 ReductionArrayTy, SrcBase,
2559 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2560 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2564 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2565 ReductionArrayTy, DestBase,
2566 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2568 case CopyAction::RemoteLaneToThread: {
2569 InsertPointTy CurIP = Builder.saveIP();
2570 Builder.restoreIP(AllocaIP);
2571 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2572 ".omp.reduction.element");
2574 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2575 DestElementAddr = DestAlloca;
2577 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2578 DestElementAddr->
getName() +
".ascast");
2579 Builder.restoreIP(CurIP);
2580 ShuffleInElement =
true;
2581 UpdateDestListPtr =
true;
2584 case CopyAction::ThreadCopy: {
2586 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2593 if (ShuffleInElement) {
2594 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2595 RemoteLaneOffset, ReductionArrayTy);
2597 switch (RI.EvaluationKind) {
2598 case EvalKind::Scalar: {
2599 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2601 Builder.CreateStore(Elem, DestElementAddr);
2604 case EvalKind::Complex: {
2605 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2606 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2607 Value *SrcReal = Builder.CreateLoad(
2608 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2609 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2610 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2611 Value *SrcImg = Builder.CreateLoad(
2612 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2614 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2615 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2616 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2617 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2618 Builder.CreateStore(SrcReal, DestRealPtr);
2619 Builder.CreateStore(SrcImg, DestImgPtr);
2622 case EvalKind::Aggregate: {
2623 Value *SizeVal = Builder.getInt64(
2624 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2625 Builder.CreateMemCpy(
2626 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2627 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2639 if (UpdateDestListPtr) {
2640 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2641 DestElementAddr, Builder.getPtrTy(),
2642 DestElementAddr->
getName() +
".ascast");
2643 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2650 AttributeList FuncAttrs) {
2651 InsertPointTy SavedIP = Builder.saveIP();
2654 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2658 "_omp_reduction_inter_warp_copy_func", &M);
2663 Builder.SetInsertPoint(EntryBB);
2681 "__openmp_nvptx_data_transfer_temporary_storage";
2682 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2683 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2685 if (!TransferMedium) {
2694 Value *GPUThreadID = getGPUThreadID();
2696 Value *LaneID = getNVPTXLaneID();
2698 Value *WarpID = getNVPTXWarpID();
2700 InsertPointTy AllocaIP =
2701 InsertPointTy(Builder.GetInsertBlock(),
2702 Builder.GetInsertBlock()->getFirstInsertionPt());
2705 Builder.restoreIP(AllocaIP);
2706 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2707 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2709 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2710 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2711 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2712 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2713 NumWarpsAlloca, Builder.getPtrTy(0),
2714 NumWarpsAlloca->
getName() +
".ascast");
2715 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2716 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2718 InsertPointTy CodeGenIP =
2720 Builder.restoreIP(CodeGenIP);
2723 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2725 for (
auto En :
enumerate(ReductionInfos)) {
2730 const ReductionInfo &RI = En.value();
2731 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2732 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2735 unsigned NumIters = RealTySize / TySize;
2738 Value *Cnt =
nullptr;
2739 Value *CntAddr =
nullptr;
2743 CodeGenIP = Builder.saveIP();
2744 Builder.restoreIP(AllocaIP);
2746 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2748 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2749 CntAddr->
getName() +
".ascast");
2750 Builder.restoreIP(CodeGenIP);
2757 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2758 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2760 Value *
Cmp = Builder.CreateICmpULT(
2761 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2762 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2763 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2767 InsertPointOrErrorTy BarrierIP1 =
2768 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2769 omp::Directive::OMPD_unknown,
2773 return BarrierIP1.takeError();
2779 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2780 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2781 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2784 auto *RedListArrayTy =
2786 Type *IndexTy = Builder.getIndexTy(
2787 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2789 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2790 {ConstantInt::get(IndexTy, 0),
2791 ConstantInt::get(IndexTy, En.index())});
2793 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2795 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2799 Value *MediumPtr = Builder.CreateInBoundsGEP(
2800 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2803 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2805 Builder.CreateStore(Elem, MediumPtr,
2807 Builder.CreateBr(MergeBB);
2810 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2811 Builder.CreateBr(MergeBB);
2814 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2815 InsertPointOrErrorTy BarrierIP2 =
2816 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2817 omp::Directive::OMPD_unknown,
2821 return BarrierIP2.takeError();
2828 Value *NumWarpsVal =
2829 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2831 Value *IsActiveThread =
2832 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2833 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2835 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2839 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2840 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2842 Value *TargetElemPtrPtr =
2843 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2844 {ConstantInt::get(IndexTy, 0),
2845 ConstantInt::get(IndexTy, En.index())});
2846 Value *TargetElemPtrVal =
2847 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2848 Value *TargetElemPtr = TargetElemPtrVal;
2851 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2854 Value *SrcMediumValue =
2855 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2856 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2857 Builder.CreateBr(W0MergeBB);
2859 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2860 Builder.CreateBr(W0MergeBB);
2862 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2865 Cnt = Builder.CreateNSWAdd(
2866 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2867 Builder.CreateStore(Cnt, CntAddr,
false);
2869 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2870 emitBranch(PrecondBB);
2871 emitBlock(ExitBB, CurFn);
2873 RealTySize %= TySize;
2877 Builder.CreateRetVoid();
2878 Builder.restoreIP(SavedIP);
2883Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2885 AttributeList FuncAttrs) {
2889 {Builder.getPtrTy(), Builder.getInt16Ty(),
2890 Builder.getInt16Ty(), Builder.getInt16Ty()},
2894 "_omp_reduction_shuffle_and_reduce_func", &M);
2904 Builder.SetInsertPoint(EntryBB);
2915 Type *ReduceListArgType = ReduceListArg->
getType();
2917 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2918 Value *ReduceListAlloca = Builder.CreateAlloca(
2919 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2920 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2921 LaneIDArg->
getName() +
".addr");
2922 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2923 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2924 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2925 AlgoVerArg->
getName() +
".addr");
2931 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2932 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2934 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2935 ReduceListAlloca, ReduceListArgType,
2936 ReduceListAlloca->
getName() +
".ascast");
2937 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2938 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2939 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2940 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2941 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2942 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2943 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2944 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2945 RemoteReductionListAlloca, Builder.getPtrTy(),
2946 RemoteReductionListAlloca->
getName() +
".ascast");
2948 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2949 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2950 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2951 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2953 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2954 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2955 Value *RemoteLaneOffset =
2956 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2957 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2964 emitReductionListCopy(
2965 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2966 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2989 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2990 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2991 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2992 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2993 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
2994 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
2995 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
2996 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
2997 Value *RemoteOffsetComp =
2998 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
2999 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3000 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3001 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3007 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3008 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3009 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3010 ReduceList, Builder.getPtrTy());
3011 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3012 RemoteListAddrCast, Builder.getPtrTy());
3013 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3014 ->addFnAttr(Attribute::NoUnwind);
3015 Builder.CreateBr(MergeBB);
3017 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3018 Builder.CreateBr(MergeBB);
3020 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3024 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3025 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3026 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3031 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3033 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3034 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3035 ReductionInfos, RemoteListAddrCast, ReduceList);
3036 Builder.CreateBr(CpyMergeBB);
3038 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3039 Builder.CreateBr(CpyMergeBB);
3041 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3043 Builder.CreateRetVoid();
3048Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3050 AttributeList FuncAttrs) {
3051 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3054 Builder.getVoidTy(),
3055 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3059 "_omp_reduction_list_to_global_copy_func", &M);
3066 Builder.SetInsertPoint(EntryBlock);
3075 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3076 BufferArg->
getName() +
".addr");
3077 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3079 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3080 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3081 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3082 BufferArgAlloca, Builder.getPtrTy(),
3083 BufferArgAlloca->
getName() +
".ascast");
3084 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3085 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3086 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3087 ReduceListArgAlloca, Builder.getPtrTy(),
3088 ReduceListArgAlloca->
getName() +
".ascast");
3090 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3091 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3092 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3094 Value *LocalReduceList =
3095 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3096 Value *BufferArgVal =
3097 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3098 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3099 Type *IndexTy = Builder.getIndexTy(
3100 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3101 for (
auto En :
enumerate(ReductionInfos)) {
3102 const ReductionInfo &RI = En.value();
3103 auto *RedListArrayTy =
3106 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3107 RedListArrayTy, LocalReduceList,
3108 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3110 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3114 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3115 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3116 ReductionsBufferTy, BufferVD, 0, En.index());
3118 switch (RI.EvaluationKind) {
3119 case EvalKind::Scalar: {
3120 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3121 Builder.CreateStore(TargetElement, GlobVal);
3124 case EvalKind::Complex: {
3125 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3126 RI.ElementType, ElemPtr, 0, 0,
".realp");
3127 Value *SrcReal = Builder.CreateLoad(
3128 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3129 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3130 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3131 Value *SrcImg = Builder.CreateLoad(
3132 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3134 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3135 RI.ElementType, GlobVal, 0, 0,
".realp");
3136 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3137 RI.ElementType, GlobVal, 0, 1,
".imagp");
3138 Builder.CreateStore(SrcReal, DestRealPtr);
3139 Builder.CreateStore(SrcImg, DestImgPtr);
3142 case EvalKind::Aggregate: {
3144 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3145 Builder.CreateMemCpy(
3146 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3147 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3153 Builder.CreateRetVoid();
3154 Builder.restoreIP(OldIP);
3158Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3160 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3161 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3164 Builder.getVoidTy(),
3165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3169 "_omp_reduction_list_to_global_reduce_func", &M);
3176 Builder.SetInsertPoint(EntryBlock);
3185 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3186 BufferArg->
getName() +
".addr");
3187 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3189 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3191 auto *RedListArrayTy =
3196 Value *LocalReduceList =
3197 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3199 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3200 BufferArgAlloca, Builder.getPtrTy(),
3201 BufferArgAlloca->
getName() +
".ascast");
3202 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3203 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3204 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 ReduceListArgAlloca, Builder.getPtrTy(),
3206 ReduceListArgAlloca->
getName() +
".ascast");
3207 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3208 LocalReduceList, Builder.getPtrTy(),
3209 LocalReduceList->
getName() +
".ascast");
3211 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3212 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3213 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3215 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3216 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3217 Type *IndexTy = Builder.getIndexTy(
3218 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3219 for (
auto En :
enumerate(ReductionInfos)) {
3220 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3221 RedListArrayTy, LocalReduceListAddrCast,
3222 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3224 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3226 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3227 ReductionsBufferTy, BufferVD, 0, En.index());
3228 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3233 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3234 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3235 ->addFnAttr(Attribute::NoUnwind);
3236 Builder.CreateRetVoid();
3237 Builder.restoreIP(OldIP);
3241Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3243 AttributeList FuncAttrs) {
3244 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3247 Builder.getVoidTy(),
3248 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3252 "_omp_reduction_global_to_list_copy_func", &M);
3259 Builder.SetInsertPoint(EntryBlock);
3268 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3269 BufferArg->
getName() +
".addr");
3270 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3272 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3273 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3274 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3275 BufferArgAlloca, Builder.getPtrTy(),
3276 BufferArgAlloca->
getName() +
".ascast");
3277 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3278 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3279 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3280 ReduceListArgAlloca, Builder.getPtrTy(),
3281 ReduceListArgAlloca->
getName() +
".ascast");
3282 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3283 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3284 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3286 Value *LocalReduceList =
3287 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3288 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3289 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3290 Type *IndexTy = Builder.getIndexTy(
3291 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3292 for (
auto En :
enumerate(ReductionInfos)) {
3293 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3294 auto *RedListArrayTy =
3297 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3298 RedListArrayTy, LocalReduceList,
3299 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3301 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3304 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3305 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3306 ReductionsBufferTy, BufferVD, 0, En.index());
3308 switch (RI.EvaluationKind) {
3309 case EvalKind::Scalar: {
3310 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3311 Builder.CreateStore(TargetElement, ElemPtr);
3314 case EvalKind::Complex: {
3315 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3316 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3317 Value *SrcReal = Builder.CreateLoad(
3318 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3319 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3320 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3321 Value *SrcImg = Builder.CreateLoad(
3322 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3324 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3325 RI.ElementType, ElemPtr, 0, 0,
".realp");
3326 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3327 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3328 Builder.CreateStore(SrcReal, DestRealPtr);
3329 Builder.CreateStore(SrcImg, DestImgPtr);
3332 case EvalKind::Aggregate: {
3334 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3335 Builder.CreateMemCpy(
3336 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3337 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3344 Builder.CreateRetVoid();
3345 Builder.restoreIP(OldIP);
3349Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3351 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3352 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3355 Builder.getVoidTy(),
3356 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3360 "_omp_reduction_global_to_list_reduce_func", &M);
3367 Builder.SetInsertPoint(EntryBlock);
3376 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3377 BufferArg->
getName() +
".addr");
3378 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3380 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3381 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3387 Value *LocalReduceList =
3388 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3390 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3391 BufferArgAlloca, Builder.getPtrTy(),
3392 BufferArgAlloca->
getName() +
".ascast");
3393 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3394 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3395 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3396 ReduceListArgAlloca, Builder.getPtrTy(),
3397 ReduceListArgAlloca->
getName() +
".ascast");
3398 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3399 LocalReduceList, Builder.getPtrTy(),
3400 LocalReduceList->
getName() +
".ascast");
3402 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3403 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3404 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3406 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3407 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3408 Type *IndexTy = Builder.getIndexTy(
3409 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3410 for (
auto En :
enumerate(ReductionInfos)) {
3411 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3412 RedListArrayTy, ReductionList,
3413 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3416 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3417 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3418 ReductionsBufferTy, BufferVD, 0, En.index());
3419 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3424 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3425 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3426 ->addFnAttr(Attribute::NoUnwind);
3427 Builder.CreateRetVoid();
3428 Builder.restoreIP(OldIP);
3432std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3433 std::string Suffix =
3434 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3435 return (Name + Suffix).
str();
3440 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3442 {Builder.getPtrTy(), Builder.getPtrTy()},
3444 std::string
Name = getReductionFuncName(ReducerName);
3452 Builder.SetInsertPoint(EntryBB);
3456 Value *LHSArrayPtr =
nullptr;
3457 Value *RHSArrayPtr =
nullptr;
3464 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3466 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3467 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3468 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3469 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3470 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3471 Builder.CreateStore(Arg0, LHSAddrCast);
3472 Builder.CreateStore(Arg1, RHSAddrCast);
3473 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3474 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3477 Type *IndexTy = Builder.getIndexTy(
3478 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3480 for (
auto En :
enumerate(ReductionInfos)) {
3481 const ReductionInfo &RI = En.value();
3482 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3483 RedArrayTy, RHSArrayPtr,
3484 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3485 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3486 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3487 RHSI8Ptr, RI.PrivateVariable->getType(),
3488 RHSI8Ptr->
getName() +
".ascast");
3490 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3491 RedArrayTy, LHSArrayPtr,
3492 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3493 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3494 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3495 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3497 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3501 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3502 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3504 InsertPointOrErrorTy AfterIP =
3505 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3507 return AfterIP.takeError();
3508 if (!Builder.GetInsertBlock())
3509 return ReductionFunc;
3511 Builder.restoreIP(*AfterIP);
3512 Builder.CreateStore(Reduced, LHSPtr);
3516 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3517 for (
auto En :
enumerate(ReductionInfos)) {
3518 unsigned Index = En.index();
3519 const ReductionInfo &RI = En.value();
3520 Value *LHSFixupPtr, *RHSFixupPtr;
3521 Builder.restoreIP(RI.ReductionGenClang(
3522 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3527 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3532 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3538 Builder.CreateRetVoid();
3539 return ReductionFunc;
3545 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3547 assert(RI.Variable &&
"expected non-null variable");
3548 assert(RI.PrivateVariable &&
"expected non-null private variable");
3549 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3550 "expected non-null reduction generator callback");
3553 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3554 "expected variables and their private equivalents to have the same "
3557 assert(RI.Variable->getType()->isPointerTy() &&
3558 "expected variables to be pointers");
3562OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3563 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3565 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3566 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3567 Value *SrcLocInfo) {
3568 if (!updateToLocation(
Loc))
3569 return InsertPointTy();
3570 Builder.restoreIP(CodeGenIP);
3577 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3578 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3581 if (ReductionInfos.
size() == 0)
3582 return Builder.saveIP();
3585 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3591 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3595 AttributeList FuncAttrs;
3596 AttrBuilder AttrBldr(Ctx);
3598 AttrBldr.addAttribute(Attr);
3599 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3600 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3602 CodeGenIP = Builder.saveIP();
3604 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3605 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3606 if (!ReductionResult)
3608 Function *ReductionFunc = *ReductionResult;
3609 Builder.restoreIP(CodeGenIP);
3612 if (GridValue.has_value())
3613 Config.setGridValue(GridValue.value());
3628 CodeGenIP = Builder.saveIP();
3629 Builder.restoreIP(AllocaIP);
3630 Value *ReductionListAlloca =
3631 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3632 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3633 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3634 Builder.restoreIP(CodeGenIP);
3635 Type *IndexTy = Builder.getIndexTy(
3636 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3637 for (
auto En :
enumerate(ReductionInfos)) {
3638 const ReductionInfo &RI = En.value();
3639 Value *ElemPtr = Builder.CreateInBoundsGEP(
3640 RedArrayTy, ReductionList,
3641 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3643 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3644 Builder.CreateStore(CastElem, ElemPtr);
3646 CodeGenIP = Builder.saveIP();
3648 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3650 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3654 Builder.restoreIP(CodeGenIP);
3656 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3658 unsigned MaxDataSize = 0;
3660 for (
auto En :
enumerate(ReductionInfos)) {
3661 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3662 if (
Size > MaxDataSize)
3664 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3666 Value *ReductionDataSize =
3667 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3668 if (!IsTeamsReduction) {
3669 Value *SarFuncCast =
3670 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
3672 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
3673 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3675 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3676 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3677 Res = Builder.CreateCall(Pv2Ptr, Args);
3679 CodeGenIP = Builder.saveIP();
3681 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3682 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3683 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3684 Function *LtGCFunc = emitListToGlobalCopyFunction(
3685 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3686 Function *LtGRFunc = emitListToGlobalReduceFunction(
3687 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3688 Function *GtLCFunc = emitGlobalToListCopyFunction(
3689 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3690 Function *GtLRFunc = emitGlobalToListReduceFunction(
3691 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3692 Builder.restoreIP(CodeGenIP);
3694 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3695 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3697 Value *Args3[] = {SrcLocInfo,
3698 KernelTeamsReductionPtr,
3699 Builder.getInt32(ReductionBufNum),
3709 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3710 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3711 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3717 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3718 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3724 emitBlock(ThenBB, CurFunc);
3727 for (
auto En :
enumerate(ReductionInfos)) {
3728 const ReductionInfo &RI = En.value();
3731 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3733 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3734 Value *LHSPtr, *RHSPtr;
3735 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3736 &LHSPtr, &RHSPtr, CurFunc));
3749 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3750 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3752 InsertPointOrErrorTy AfterIP =
3753 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3755 return AfterIP.takeError();
3756 Builder.restoreIP(*AfterIP);
3757 Builder.CreateStore(Reduced,
LHS,
false);
3760 emitBlock(ExitBB, CurFunc);
3761 if (ContinuationBlock) {
3762 Builder.CreateBr(ContinuationBlock);
3763 Builder.SetInsertPoint(ContinuationBlock);
3765 Config.setEmitLLVMUsed();
3767 return Builder.saveIP();
3776 ".omp.reduction.func", &M);
3786 Builder.SetInsertPoint(ReductionFuncBlock);
3787 Value *LHSArrayPtr =
nullptr;
3788 Value *RHSArrayPtr =
nullptr;
3799 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3801 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3802 Value *LHSAddrCast =
3803 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3804 Value *RHSAddrCast =
3805 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3806 Builder.CreateStore(Arg0, LHSAddrCast);
3807 Builder.CreateStore(Arg1, RHSAddrCast);
3808 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3809 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3811 LHSArrayPtr = ReductionFunc->
getArg(0);
3812 RHSArrayPtr = ReductionFunc->
getArg(1);
3815 unsigned NumReductions = ReductionInfos.
size();
3818 for (
auto En :
enumerate(ReductionInfos)) {
3819 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3820 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3821 RedArrayTy, LHSArrayPtr, 0, En.index());
3822 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3823 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3824 LHSI8Ptr, RI.Variable->
getType());
3825 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3826 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3827 RedArrayTy, RHSArrayPtr, 0, En.index());
3828 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3829 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3830 RHSI8Ptr, RI.PrivateVariable->
getType());
3831 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3833 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3834 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3836 return AfterIP.takeError();
3838 Builder.restoreIP(*AfterIP);
3840 if (!Builder.GetInsertBlock())
3844 if (!IsByRef[En.index()])
3845 Builder.CreateStore(Reduced, LHSPtr);
3847 Builder.CreateRetVoid();
3851OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3852 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3854 bool IsNoWait,
bool IsTeamsReduction) {
3857 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3858 IsNoWait, IsTeamsReduction);
3862 if (!updateToLocation(
Loc))
3863 return InsertPointTy();
3865 if (ReductionInfos.
size() == 0)
3866 return Builder.saveIP();
3875 unsigned NumReductions = ReductionInfos.
size();
3877 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3878 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3880 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3882 for (
auto En :
enumerate(ReductionInfos)) {
3883 unsigned Index = En.index();
3884 const ReductionInfo &RI = En.value();
3885 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3886 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3887 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3892 Type *IndexTy = Builder.getIndexTy(
3893 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3894 Function *
Func = Builder.GetInsertBlock()->getParent();
3897 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3898 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3899 return RI.AtomicReductionGen;
3901 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3903 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3905 Value *ThreadId = getOrCreateThreadID(Ident);
3906 Constant *NumVariables = Builder.getInt32(NumReductions);
3908 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3909 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3911 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3912 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3913 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3914 : RuntimeFunction::OMPRTL___kmpc_reduce);
3916 Builder.CreateCall(ReduceFunc,
3917 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3918 ReductionFunc, Lock},
3929 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3930 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3931 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3936 Builder.SetInsertPoint(NonAtomicRedBlock);
3937 for (
auto En :
enumerate(ReductionInfos)) {
3938 const ReductionInfo &RI = En.value();
3942 Value *RedValue = RI.Variable;
3943 if (!IsByRef[En.index()]) {
3944 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3945 "red.value." +
Twine(En.index()));
3947 Value *PrivateRedValue =
3948 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3949 "red.private.value." +
Twine(En.index()));
3951 InsertPointOrErrorTy AfterIP =
3952 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3954 return AfterIP.takeError();
3955 Builder.restoreIP(*AfterIP);
3957 if (!Builder.GetInsertBlock())
3958 return InsertPointTy();
3960 if (!IsByRef[En.index()])
3961 Builder.CreateStore(Reduced, RI.Variable);
3963 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3964 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3965 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3966 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3967 Builder.CreateBr(ContinuationBlock);
3972 Builder.SetInsertPoint(AtomicRedBlock);
3973 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3974 for (
const ReductionInfo &RI : ReductionInfos) {
3975 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3976 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3978 return AfterIP.takeError();
3979 Builder.restoreIP(*AfterIP);
3980 if (!Builder.GetInsertBlock())
3981 return InsertPointTy();
3983 Builder.CreateBr(ContinuationBlock);
3985 Builder.CreateUnreachable();
3996 if (!Builder.GetInsertBlock())
3997 return InsertPointTy();
3999 Builder.SetInsertPoint(ContinuationBlock);
4000 return Builder.saveIP();
4003OpenMPIRBuilder::InsertPointOrErrorTy
4004OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4005 BodyGenCallbackTy BodyGenCB,
4006 FinalizeCallbackTy FiniCB) {
4007 if (!updateToLocation(
Loc))
4010 Directive OMPD = Directive::OMPD_master;
4012 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4013 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4014 Value *ThreadId = getOrCreateThreadID(Ident);
4017 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4018 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4020 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4021 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4023 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4027OpenMPIRBuilder::InsertPointOrErrorTy
4028OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4029 BodyGenCallbackTy BodyGenCB,
4031 if (!updateToLocation(
Loc))
4034 Directive OMPD = Directive::OMPD_masked;
4036 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4037 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4038 Value *ThreadId = getOrCreateThreadID(Ident);
4040 Value *ArgsEnd[] = {Ident, ThreadId};
4042 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4043 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4045 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4046 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4048 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4058 Call->setDoesNotThrow();
4070OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4071 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4073 bool IsInclusive, ScanInfo *ScanRedInfo) {
4074 if (ScanRedInfo->OMPFirstScanLoop) {
4075 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4076 ScanVarsType, ScanRedInfo);
4080 if (!updateToLocation(
Loc))
4085 if (ScanRedInfo->OMPFirstScanLoop) {
4087 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4088 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4089 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4090 Type *DestTy = ScanVarsType[i];
4091 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4092 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4094 Builder.CreateStore(Src, Val);
4097 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4098 emitBlock(ScanRedInfo->OMPScanDispatch,
4099 Builder.GetInsertBlock()->getParent());
4101 if (!ScanRedInfo->OMPFirstScanLoop) {
4102 IV = ScanRedInfo->IV;
4105 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4106 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4107 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4108 Type *DestTy = ScanVarsType[i];
4110 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4111 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4112 Builder.CreateStore(Src, ScanVars[i]);
4118 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4119 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4120 ScanRedInfo->OMPAfterScanBlock);
4122 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4123 ScanRedInfo->OMPBeforeScanBlock);
4125 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4126 Builder.GetInsertBlock()->getParent());
4127 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4128 return Builder.saveIP();
4131Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4135 Builder.restoreIP(AllocaIP);
4137 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4139 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4140 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4144 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4145 InsertPointTy CodeGenIP) ->
Error {
4146 Builder.restoreIP(CodeGenIP);
4148 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4149 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4153 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4154 AllocSpan,
nullptr,
"arr");
4155 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4163 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4165 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4166 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4169 return AfterIP.takeError();
4170 Builder.restoreIP(*AfterIP);
4171 BasicBlock *InputBB = Builder.GetInsertBlock();
4173 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4174 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4176 return AfterIP.takeError();
4177 Builder.restoreIP(*AfterIP);
4182Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4184 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4185 InsertPointTy CodeGenIP) ->
Error {
4186 Builder.restoreIP(CodeGenIP);
4187 for (ReductionInfo RedInfo : ReductionInfos) {
4188 Value *PrivateVar = RedInfo.PrivateVariable;
4189 Value *OrigVar = RedInfo.Variable;
4190 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4191 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4193 Type *SrcTy = RedInfo.ElementType;
4194 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4196 Value *Src = Builder.CreateLoad(SrcTy, Val);
4198 Builder.CreateStore(Src, OrigVar);
4199 Builder.CreateFree(Buff);
4207 if (ScanRedInfo->OMPScanFinish->getTerminator())
4208 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4210 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4213 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4214 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4217 return AfterIP.takeError();
4218 Builder.restoreIP(*AfterIP);
4219 BasicBlock *InputBB = Builder.GetInsertBlock();
4221 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4222 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4224 return AfterIP.takeError();
4225 Builder.restoreIP(*AfterIP);
4229OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4230 const LocationDescription &
Loc,
4232 ScanInfo *ScanRedInfo) {
4234 if (!updateToLocation(
Loc))
4236 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4237 InsertPointTy CodeGenIP) ->
Error {
4238 Builder.restoreIP(CodeGenIP);
4244 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4246 Builder.GetInsertBlock()->getModule(),
4250 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4253 Builder.GetInsertBlock()->getModule(),
4256 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4259 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4260 Builder.SetInsertPoint(InputBB);
4261 Builder.CreateBr(LoopBB);
4262 emitBlock(LoopBB, CurFn);
4263 Builder.SetInsertPoint(LoopBB);
4265 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4267 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4268 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4270 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4278 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4279 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4280 emitBlock(InnerLoopBB, CurFn);
4281 Builder.SetInsertPoint(InnerLoopBB);
4282 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4284 for (ReductionInfo RedInfo : ReductionInfos) {
4285 Value *ReductionVal = RedInfo.PrivateVariable;
4286 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4287 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4288 Type *DestTy = RedInfo.ElementType;
4289 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4291 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4292 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4294 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4295 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4296 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4298 InsertPointOrErrorTy AfterIP =
4299 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4301 return AfterIP.takeError();
4302 Builder.CreateStore(Result, LHSPtr);
4305 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4306 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4307 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4308 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4309 emitBlock(InnerExitBB, CurFn);
4311 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4314 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4315 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4317 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4327 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4328 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4331 return AfterIP.takeError();
4332 Builder.restoreIP(*AfterIP);
4333 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4336 return AfterIP.takeError();
4337 Builder.restoreIP(*AfterIP);
4338 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4345Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4348 ScanInfo *ScanRedInfo) {
4356 ScanRedInfo->OMPFirstScanLoop =
true;
4357 Error Err = InputLoopGen();
4367 ScanRedInfo->OMPFirstScanLoop =
false;
4368 Error Err = ScanLoopGen(Builder.saveIP());
4375void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4376 Function *
Fun = Builder.GetInsertBlock()->getParent();
4377 ScanRedInfo->OMPScanDispatch =
4379 ScanRedInfo->OMPAfterScanBlock =
4381 ScanRedInfo->OMPBeforeScanBlock =
4383 ScanRedInfo->OMPScanLoopExit =
4386CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4410 Builder.SetCurrentDebugLocation(
DL);
4412 Builder.SetInsertPoint(Preheader);
4413 Builder.CreateBr(Header);
4415 Builder.SetInsertPoint(Header);
4416 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4417 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4418 Builder.CreateBr(
Cond);
4420 Builder.SetInsertPoint(
Cond);
4422 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4423 Builder.CreateCondBr(Cmp, Body, Exit);
4425 Builder.SetInsertPoint(Body);
4426 Builder.CreateBr(Latch);
4428 Builder.SetInsertPoint(Latch);
4429 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4430 "omp_" + Name +
".next",
true);
4431 Builder.CreateBr(Header);
4434 Builder.SetInsertPoint(Exit);
4435 Builder.CreateBr(After);
4438 LoopInfos.emplace_front();
4439 CanonicalLoopInfo *CL = &LoopInfos.front();
4441 CL->Header = Header;
4453OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4454 LoopBodyGenCallbackTy BodyGenCB,
4459 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4460 NextBB, NextBB, Name);
4464 if (updateToLocation(
Loc)) {
4468 spliceBB(Builder, After,
false);
4469 Builder.CreateBr(CL->getPreheader());
4474 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4484 ScanInfos.emplace_front();
4485 ScanInfo *
Result = &ScanInfos.front();
4490OpenMPIRBuilder::createCanonicalScanLoops(
4491 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4492 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4493 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4494 LocationDescription ComputeLoc =
4495 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4496 updateToLocation(ComputeLoc);
4500 Value *TripCount = calculateCanonicalLoopTripCount(
4501 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4502 ScanRedInfo->Span = TripCount;
4503 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4504 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4506 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4507 Builder.restoreIP(CodeGenIP);
4508 ScanRedInfo->IV =
IV;
4509 createScanBBs(ScanRedInfo);
4510 BasicBlock *InputBlock = Builder.GetInsertBlock();
4514 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4515 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4516 Builder.GetInsertBlock()->getParent());
4517 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4518 emitBlock(ScanRedInfo->OMPScanLoopExit,
4519 Builder.GetInsertBlock()->getParent());
4520 Builder.CreateBr(ContinueBlock);
4521 Builder.SetInsertPoint(
4522 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4523 return BodyGenCB(Builder.saveIP(),
IV);
4526 const auto &&InputLoopGen = [&]() ->
Error {
4528 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4529 ComputeIP, Name,
true, ScanRedInfo);
4533 Builder.restoreIP((*LoopInfo)->getAfterIP());
4536 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4538 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4539 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4543 Builder.restoreIP((*LoopInfo)->getAfterIP());
4544 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4547 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4553Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4555 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4565 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4566 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4568 updateToLocation(
Loc);
4585 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4586 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4587 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4588 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4589 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4590 ZeroCmp = Builder.CreateICmp(
4593 Span = Builder.CreateSub(Stop, Start,
"",
true);
4594 ZeroCmp = Builder.CreateICmp(
4598 Value *CountIfLooping;
4599 if (InclusiveStop) {
4600 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4603 Value *CountIfTwo = Builder.CreateAdd(
4604 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4606 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4609 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4610 "omp_" + Name +
".tripcount");
4614 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4615 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4616 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4617 ScanInfo *ScanRedInfo) {
4618 LocationDescription ComputeLoc =
4619 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4621 Value *TripCount = calculateCanonicalLoopTripCount(
4622 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4624 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4625 Builder.restoreIP(CodeGenIP);
4626 Value *Span = Builder.CreateMul(
IV, Step);
4627 Value *IndVar = Builder.CreateAdd(Span, Start);
4629 ScanRedInfo->IV = IndVar;
4630 return BodyGenCB(Builder.saveIP(), IndVar);
4632 LocationDescription LoopLoc =
4635 : LocationDescription(Builder.saveIP(),
4636 Builder.getCurrentDebugLocation());
4637 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4646 OpenMPIRBuilder &OMPBuilder) {
4647 unsigned Bitwidth = Ty->getIntegerBitWidth();
4649 return OMPBuilder.getOrCreateRuntimeFunction(
4650 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4652 return OMPBuilder.getOrCreateRuntimeFunction(
4653 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4662 OpenMPIRBuilder &OMPBuilder) {
4663 unsigned Bitwidth = Ty->getIntegerBitWidth();
4665 return OMPBuilder.getOrCreateRuntimeFunction(
4666 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4668 return OMPBuilder.getOrCreateRuntimeFunction(
4669 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4673OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4674 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4676 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4678 "Require dedicated allocate IP");
4681 Builder.restoreIP(CLI->getPreheaderIP());
4682 Builder.SetCurrentDebugLocation(
DL);
4685 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4686 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4690 Type *IVTy =
IV->getType();
4692 LoopType == WorksharingLoopType::DistributeForStaticLoop
4696 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4699 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4702 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4703 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4704 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4705 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4706 CLI->setLastIter(PLastIter);
4712 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4714 Constant *One = ConstantInt::get(IVTy, 1);
4715 Builder.CreateStore(Zero, PLowerBound);
4716 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4717 Builder.CreateStore(UpperBound, PUpperBound);
4718 Builder.CreateStore(One, PStride);
4720 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4723 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4724 ? OMPScheduleType::OrderedDistribute
4727 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4732 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4733 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4734 Value *PDistUpperBound =
4735 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4736 Args.push_back(PDistUpperBound);
4739 Builder.CreateCall(StaticInit, Args);
4740 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4741 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4742 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4743 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4744 CLI->setTripCount(TripCount);
4751 Builder.SetInsertPoint(CLI->getBody(),
4752 CLI->getBody()->getFirstInsertionPt());
4753 Builder.SetCurrentDebugLocation(
DL);
4754 return Builder.CreateAdd(OldIV, LowerBound);
4758 Builder.SetInsertPoint(CLI->getExit(),
4759 CLI->getExit()->getTerminator()->getIterator());
4760 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4764 InsertPointOrErrorTy BarrierIP =
4765 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4766 omp::Directive::OMPD_for,
false,
4769 return BarrierIP.takeError();
4772 InsertPointTy AfterIP = CLI->getAfterIP();
4778OpenMPIRBuilder::InsertPointOrErrorTy
4779OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4780 CanonicalLoopInfo *CLI,
4781 InsertPointTy AllocaIP,
4784 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4785 assert(ChunkSize &&
"Chunk size is required");
4787 LLVMContext &Ctx = CLI->getFunction()->getContext();
4789 Value *OrigTripCount = CLI->getTripCount();
4790 Type *IVTy =
IV->getType();
4792 "Max supported tripcount bitwidth is 64 bits");
4794 :
Type::getInt64Ty(Ctx);
4797 Constant *One = ConstantInt::get(InternalIVTy, 1);
4803 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4806 Builder.restoreIP(AllocaIP);
4807 Builder.SetCurrentDebugLocation(
DL);
4808 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4809 Value *PLowerBound =
4810 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4811 Value *PUpperBound =
4812 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4813 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4814 CLI->setLastIter(PLastIter);
4817 Builder.restoreIP(CLI->getPreheaderIP());
4818 Builder.SetCurrentDebugLocation(
DL);
4821 Value *CastedChunkSize =
4822 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4823 Value *CastedTripCount =
4824 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4826 Constant *SchedulingType = ConstantInt::get(
4827 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4828 Builder.CreateStore(Zero, PLowerBound);
4829 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4830 Builder.CreateStore(OrigUpperBound, PUpperBound);
4831 Builder.CreateStore(One, PStride);
4836 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4837 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4838 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4839 Builder.CreateCall(StaticInit,
4841 SchedulingType, PLastIter,
4842 PLowerBound, PUpperBound,
4847 Value *FirstChunkStart =
4848 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4849 Value *FirstChunkStop =
4850 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4851 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4853 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4854 Value *NextChunkStride =
4855 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4858 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4859 Value *DispatchCounter;
4864 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4865 {Builder.saveIP(),
DL},
4866 [&](InsertPointTy BodyIP,
Value *Counter) {
4867 DispatchCounter = Counter;
4870 FirstChunkStart, CastedTripCount, NextChunkStride,
4876 BasicBlock *DispatchBody = DispatchCLI->getBody();
4877 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4878 BasicBlock *DispatchExit = DispatchCLI->getExit();
4879 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4880 DispatchCLI->invalidate();
4888 Builder.restoreIP(CLI->getPreheaderIP());
4889 Builder.SetCurrentDebugLocation(
DL);
4892 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4893 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4894 Value *IsLastChunk =
4895 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4896 Value *CountUntilOrigTripCount =
4897 Builder.CreateSub(CastedTripCount, DispatchCounter);
4898 Value *ChunkTripCount = Builder.CreateSelect(
4899 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4900 Value *BackcastedChunkTC =
4901 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4902 CLI->setTripCount(BackcastedChunkTC);
4907 Value *BackcastedDispatchCounter =
4908 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4910 Builder.restoreIP(CLI->getBodyIP());
4911 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4916 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4920 InsertPointOrErrorTy AfterIP =
4921 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4924 return AfterIP.takeError();
4942 unsigned Bitwidth = Ty->getIntegerBitWidth();
4943 Module &M = OMPBuilder->M;
4945 case WorksharingLoopType::ForStaticLoop:
4947 return OMPBuilder->getOrCreateRuntimeFunction(
4948 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4950 return OMPBuilder->getOrCreateRuntimeFunction(
4951 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4953 case WorksharingLoopType::DistributeStaticLoop:
4955 return OMPBuilder->getOrCreateRuntimeFunction(
4956 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4958 return OMPBuilder->getOrCreateRuntimeFunction(
4959 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4961 case WorksharingLoopType::DistributeForStaticLoop:
4963 return OMPBuilder->getOrCreateRuntimeFunction(
4964 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4966 return OMPBuilder->getOrCreateRuntimeFunction(
4967 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4970 if (Bitwidth != 32 && Bitwidth != 64) {
4982 Function &LoopBodyFn,
bool NoLoop) {
4984 Module &M = OMPBuilder->M;
4993 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4994 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4995 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
4996 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4997 Builder.CreateCall(RTLFn, RealArgs);
5000 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5001 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5002 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5003 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5006 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5007 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5008 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5009 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5010 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5012 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5015 Builder.CreateCall(RTLFn, RealArgs);
5019 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5024 Value *TripCount = CLI->getTripCount();
5030 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5031 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5036 Builder.restoreIP({Preheader, Preheader->
end()});
5039 Builder.CreateBr(CLI->getExit());
5042 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5045 CleanUpInfo.EntryBB = CLI->getHeader();
5046 CleanUpInfo.ExitBB = CLI->getExit();
5047 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5055 "Expected unique undroppable user of outlined function");
5057 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5059 "Expected outlined function call to be located in loop preheader");
5061 if (OutlinedFnCallInstruction->
arg_size() > 1)
5068 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5070 for (
auto &ToBeDeletedItem : ToBeDeleted)
5071 ToBeDeletedItem->eraseFromParent();
5075OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5076 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5079 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5080 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5083 OI.OuterAllocaBB = CLI->getPreheader();
5089 OI.OuterAllocaBB = AllocaIP.getBlock();
5092 OI.EntryBB = CLI->getBody();
5093 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5094 "omp.prelatch",
true);
5097 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5101 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5103 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5114 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5125 CLI->getPreheader(),
5134 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5140 CLI->getIndVar()->user_end());
5143 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5144 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5150 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5157 OI.PostOutlineCB = [=, ToBeDeletedVec =
5158 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5162 addOutlineInfo(std::move(OI));
5163 return CLI->getAfterIP();
5166OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5167 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5168 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5169 bool HasSimdModifier,
bool HasMonotonicModifier,
5170 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5172 if (Config.isTargetDevice())
5173 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5175 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5176 HasNonmonotonicModifier, HasOrderedClause);
5178 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5179 OMPScheduleType::ModifierOrdered;
5180 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5181 case OMPScheduleType::BaseStatic:
5182 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5184 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5185 NeedsBarrier, ChunkSize);
5187 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5189 case OMPScheduleType::BaseStaticChunked:
5191 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5192 NeedsBarrier, ChunkSize);
5194 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5197 case OMPScheduleType::BaseRuntime:
5198 case OMPScheduleType::BaseAuto:
5199 case OMPScheduleType::BaseGreedy:
5200 case OMPScheduleType::BaseBalanced:
5201 case OMPScheduleType::BaseSteal:
5202 case OMPScheduleType::BaseGuidedSimd:
5203 case OMPScheduleType::BaseRuntimeSimd:
5205 "schedule type does not support user-defined chunk sizes");
5207 case OMPScheduleType::BaseDynamicChunked:
5208 case OMPScheduleType::BaseGuidedChunked:
5209 case OMPScheduleType::BaseGuidedIterativeChunked:
5210 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5211 case OMPScheduleType::BaseStaticBalancedChunked:
5212 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5213 NeedsBarrier, ChunkSize);
5226 unsigned Bitwidth = Ty->getIntegerBitWidth();
5228 return OMPBuilder.getOrCreateRuntimeFunction(
5229 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5231 return OMPBuilder.getOrCreateRuntimeFunction(
5232 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5242 unsigned Bitwidth = Ty->getIntegerBitWidth();
5244 return OMPBuilder.getOrCreateRuntimeFunction(
5245 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5247 return OMPBuilder.getOrCreateRuntimeFunction(
5248 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5257 unsigned Bitwidth = Ty->getIntegerBitWidth();
5259 return OMPBuilder.getOrCreateRuntimeFunction(
5260 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5262 return OMPBuilder.getOrCreateRuntimeFunction(
5263 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5267OpenMPIRBuilder::InsertPointOrErrorTy
5268OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5269 InsertPointTy AllocaIP,
5271 bool NeedsBarrier,
Value *Chunk) {
5272 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5274 "Require dedicated allocate IP");
5276 "Require valid schedule type");
5278 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5279 OMPScheduleType::ModifierOrdered;
5282 Builder.SetCurrentDebugLocation(
DL);
5285 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5286 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5290 Type *IVTy =
IV->getType();
5295 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5297 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5298 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5299 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5300 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5301 CLI->setLastIter(PLastIter);
5309 Constant *One = ConstantInt::get(IVTy, 1);
5310 Builder.CreateStore(One, PLowerBound);
5311 Value *UpperBound = CLI->getTripCount();
5312 Builder.CreateStore(UpperBound, PUpperBound);
5313 Builder.CreateStore(One, PStride);
5319 InsertPointTy AfterIP = CLI->getAfterIP();
5327 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5330 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5333 Builder.CreateCall(DynamicInit,
5334 {SrcLoc, ThreadNum, SchedulingType, One,
5335 UpperBound, One, Chunk});
5344 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5345 PLowerBound, PUpperBound, PStride});
5346 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5349 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5350 Builder.CreateCondBr(MoreWork, Header, Exit);
5356 PI->setIncomingBlock(0, OuterCond);
5357 PI->setIncomingValue(0, LowerBound);
5362 Br->setSuccessor(0, OuterCond);
5367 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5368 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5375 assert(BI->getSuccessor(1) == Exit);
5376 BI->setSuccessor(1, OuterCond);
5380 Builder.SetInsertPoint(&Latch->
back());
5382 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5387 Builder.SetInsertPoint(&
Exit->back());
5388 InsertPointOrErrorTy BarrierIP =
5389 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5390 omp::Directive::OMPD_for,
false,
5393 return BarrierIP.takeError();
5412 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5417 if (BBsToErase.
count(UseInst->getParent()))
5424 while (BBsToErase.
remove_if(HasRemainingUses)) {
5434 InsertPointTy ComputeIP) {
5435 assert(
Loops.size() >= 1 &&
"At least one loop required");
5436 size_t NumLoops =
Loops.size();
5440 return Loops.front();
5442 CanonicalLoopInfo *Outermost =
Loops.front();
5443 CanonicalLoopInfo *Innermost =
Loops.back();
5444 BasicBlock *OrigPreheader = Outermost->getPreheader();
5445 BasicBlock *OrigAfter = Outermost->getAfter();
5452 Loop->collectControlBlocks(OldControlBBs);
5455 Builder.SetCurrentDebugLocation(
DL);
5456 if (ComputeIP.isSet())
5457 Builder.restoreIP(ComputeIP);
5459 Builder.restoreIP(Outermost->getPreheaderIP());
5463 Value *CollapsedTripCount =
nullptr;
5464 for (CanonicalLoopInfo *L :
Loops) {
5466 "All loops to collapse must be valid canonical loops");
5467 Value *OrigTripCount =
L->getTripCount();
5468 if (!CollapsedTripCount) {
5469 CollapsedTripCount = OrigTripCount;
5474 CollapsedTripCount = Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5478 CanonicalLoopInfo *
Result =
5479 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5480 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5486 Builder.restoreIP(
Result->getBodyIP());
5490 NewIndVars.
resize(NumLoops);
5491 for (
int i = NumLoops - 1; i >= 1; --i) {
5492 Value *OrigTripCount =
Loops[i]->getTripCount();
5494 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5495 NewIndVars[i] = NewIndVar;
5497 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5500 NewIndVars[0] = Leftover;
5511 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5518 ContinueBlock =
nullptr;
5519 ContinuePred = NextSrc;
5526 for (
size_t i = 0; i < NumLoops - 1; ++i)
5527 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5530 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5533 for (
size_t i = NumLoops - 1; i > 0; --i)
5534 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5537 ContinueWith(
Result->getLatch(),
nullptr);
5544 for (
size_t i = 0; i < NumLoops; ++i)
5545 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5550 for (CanonicalLoopInfo *L :
Loops)
5559std::vector<CanonicalLoopInfo *>
5563 "Must pass as many tile sizes as there are loops");
5564 int NumLoops =
Loops.size();
5565 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5567 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5568 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5569 Function *
F = OutermostLoop->getBody()->getParent();
5570 BasicBlock *InnerEnter = InnermostLoop->getBody();
5571 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5577 Loop->collectControlBlocks(OldControlBBs);
5584 for (CanonicalLoopInfo *L :
Loops) {
5585 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5597 for (
int i = 0; i < NumLoops - 1; ++i) {
5598 CanonicalLoopInfo *Surrounding =
Loops[i];
5601 BasicBlock *EnterBB = Surrounding->getBody();
5607 Builder.SetCurrentDebugLocation(
DL);
5608 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5610 for (
int i = 0; i < NumLoops; ++i) {
5612 Value *OrigTripCount = OrigTripCounts[i];
5615 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5616 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5625 Value *FloorTripOverflow =
5626 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5628 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5629 Value *FloorTripCount =
5630 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5631 "omp_floor" +
Twine(i) +
".tripcount",
true);
5634 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5640 std::vector<CanonicalLoopInfo *>
Result;
5641 Result.reserve(NumLoops * 2);
5645 BasicBlock *Enter = OutermostLoop->getPreheader();
5652 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5654 auto EmbeddNewLoop =
5655 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5657 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5658 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5663 Enter = EmbeddedLoop->getBody();
5664 Continue = EmbeddedLoop->getLatch();
5665 OutroInsertBefore = EmbeddedLoop->getLatch();
5666 return EmbeddedLoop;
5670 const Twine &NameBase) {
5672 CanonicalLoopInfo *EmbeddedLoop =
5673 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5674 Result.push_back(EmbeddedLoop);
5678 EmbeddNewLoops(FloorCount,
"floor");
5682 Builder.SetInsertPoint(Enter->getTerminator());
5684 for (
int i = 0; i < NumLoops; ++i) {
5685 CanonicalLoopInfo *FloorLoop =
Result[i];
5688 Value *FloorIsEpilogue =
5689 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5690 Value *TileTripCount =
5691 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5697 EmbeddNewLoops(TileCounts,
"tile");
5702 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5711 BodyEnter =
nullptr;
5712 BodyEntered = ExitBB;
5724 Builder.restoreIP(
Result.back()->getBodyIP());
5725 for (
int i = 0; i < NumLoops; ++i) {
5726 CanonicalLoopInfo *FloorLoop =
Result[i];
5727 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5728 Value *OrigIndVar = OrigIndVars[i];
5732 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5734 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5741 for (CanonicalLoopInfo *L :
Loops)
5745 for (CanonicalLoopInfo *GenL : Result)
5756 if (Properties.
empty())
5779 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5783 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5791 if (
I.mayReadOrWriteMemory()) {
5795 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5800void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5807void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5815void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5818 const Twine &NamePrefix) {
5819 Function *
F = CanonicalLoop->getFunction();
5841 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5847 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5849 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5852 Builder.SetInsertPoint(SplitBeforeIt);
5854 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5857 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5860 Builder.SetInsertPoint(ElseBlock);
5866 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5868 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5874 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5876 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5883 if (
Block == ThenBlock)
5884 NewBB->
setName(NamePrefix +
".if.else");
5887 VMap[
Block] = NewBB;
5891 Builder.CreateBr(NewBlocks.
front());
5895 L->getLoopLatch()->splitBasicBlock(
5896 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5900 L->addBasicBlockToLoop(ThenBlock, LI);
5904OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5906 if (TargetTriple.
isX86()) {
5907 if (Features.
lookup(
"avx512f"))
5909 else if (Features.
lookup(
"avx"))
5913 if (TargetTriple.
isPPC())
5915 if (TargetTriple.
isWasm())
5920void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5922 Value *IfCond, OrderKind Order,
5926 Function *
F = CanonicalLoop->getFunction();
5941 if (AlignedVars.
size()) {
5942 InsertPointTy IP = Builder.saveIP();
5943 for (
auto &AlignedItem : AlignedVars) {
5944 Value *AlignedPtr = AlignedItem.first;
5945 Value *Alignment = AlignedItem.second;
5948 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5951 Builder.restoreIP(IP);
5956 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5966 if (
Block == CanonicalLoop->getCond() ||
5967 Block == CanonicalLoop->getHeader())
5969 Reachable.insert(
Block);
5979 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5987 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
6003 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6005 if (Simdlen || Safelen) {
6009 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6035static std::unique_ptr<TargetMachine>
6039 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6040 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6051 std::nullopt, OptLevel));
6075 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6076 FAM.registerPass([&]() {
return TIRA; });
6090 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6095 nullptr, ORE,
static_cast<int>(OptLevel),
6116 <<
" Threshold=" << UP.
Threshold <<
"\n"
6119 <<
" PartialOptSizeThreshold="
6139 Ptr = Load->getPointerOperand();
6141 Ptr = Store->getPointerOperand();
6145 Ptr =
Ptr->stripPointerCasts();
6148 if (Alloca->getParent() == &
F->getEntryBlock())
6168 int MaxTripCount = 0;
6169 bool MaxOrZero =
false;
6170 unsigned TripMultiple = 0;
6172 bool UseUpperBound =
false;
6174 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6176 unsigned Factor = UP.
Count;
6177 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6185void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6187 CanonicalLoopInfo **UnrolledCLI) {
6188 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6204 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6217 *UnrolledCLI =
Loop;
6222 "unrolling only makes sense with a factor of 2 or larger");
6224 Type *IndVarTy =
Loop->getIndVarType();
6231 std::vector<CanonicalLoopInfo *>
LoopNest =
6232 tileLoops(
DL, {
Loop}, {FactorVal});
6235 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6246 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6249 (*UnrolledCLI)->assertOK();
6253OpenMPIRBuilder::InsertPointTy
6254OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6257 if (!updateToLocation(
Loc))
6261 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6262 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6263 Value *ThreadId = getOrCreateThreadID(Ident);
6265 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6267 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6269 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6270 Builder.CreateCall(Fn, Args);
6272 return Builder.saveIP();
6275OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6276 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6280 if (!updateToLocation(
Loc))
6286 if (!CPVars.
empty()) {
6288 Builder.CreateStore(Builder.getInt32(0), DidIt);
6291 Directive OMPD = Directive::OMPD_single;
6293 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6294 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6295 Value *ThreadId = getOrCreateThreadID(Ident);
6298 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6299 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6301 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6302 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6304 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6305 if (
Error Err = FiniCB(IP))
6312 Builder.CreateStore(Builder.getInt32(1), DidIt);
6325 InsertPointOrErrorTy AfterIP =
6326 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6330 return AfterIP.takeError();
6333 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6335 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6336 ConstantInt::get(
Int64, 0), CPVars[
I],
6339 }
else if (!IsNowait) {
6340 InsertPointOrErrorTy AfterIP =
6341 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6342 omp::Directive::OMPD_unknown,
false,
6345 return AfterIP.takeError();
6347 return Builder.saveIP();
6350OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6351 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6352 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6354 if (!updateToLocation(
Loc))
6357 Directive OMPD = Directive::OMPD_critical;
6359 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6360 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6361 Value *ThreadId = getOrCreateThreadID(Ident);
6362 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6363 Value *
Args[] = {Ident, ThreadId, LockVar};
6369 EnterArgs.push_back(HintInst);
6370 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6372 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6374 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6377 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6378 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6380 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6384OpenMPIRBuilder::InsertPointTy
6385OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6386 InsertPointTy AllocaIP,
unsigned NumLoops,
6388 const Twine &Name,
bool IsDependSource) {
6392 "OpenMP runtime requires depend vec with i64 type");
6394 if (!updateToLocation(
Loc))
6399 Builder.restoreIP(AllocaIP);
6400 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6402 updateToLocation(
Loc);
6405 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6406 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6407 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6408 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6412 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6413 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6416 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6417 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6418 Value *ThreadId = getOrCreateThreadID(Ident);
6419 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6423 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6425 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6426 Builder.CreateCall(RTLFn, Args);
6428 return Builder.saveIP();
6431OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6432 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6433 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6434 if (!updateToLocation(
Loc))
6437 Directive OMPD = Directive::OMPD_ordered;
6443 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6444 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6445 Value *ThreadId = getOrCreateThreadID(Ident);
6448 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6449 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6452 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6453 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6456 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6460OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6462 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6463 bool HasFinalize,
bool IsCancellable) {
6466 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6470 BasicBlock *EntryBB = Builder.GetInsertBlock();
6479 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6482 if (
Error Err = BodyGenCB( InsertPointTy(),
6490 "Unexpected control flow graph state!!");
6491 InsertPointOrErrorTy AfterIP =
6492 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6494 return AfterIP.takeError();
6496 "Unexpected Control Flow State!");
6502 "Unexpected Insertion point location!");
6505 auto InsertBB = merged ? ExitPredBB : ExitBB;
6508 Builder.SetInsertPoint(InsertBB);
6510 return Builder.saveIP();
6513OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6516 if (!Conditional || !EntryCall)
6517 return Builder.saveIP();
6519 BasicBlock *EntryBB = Builder.GetInsertBlock();
6520 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6532 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6534 Builder.SetInsertPoint(UI);
6535 Builder.Insert(EntryBBTI);
6536 UI->eraseFromParent();
6543OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6544 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6547 Builder.restoreIP(FinIP);
6551 assert(!FinalizationStack.empty() &&
6552 "Unexpected finalization stack state!");
6554 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6555 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6557 if (
Error Err = Fi.FiniCB(FinIP))
6564 Builder.SetInsertPoint(FiniBBTI);
6568 return Builder.saveIP();
6572 Builder.Insert(ExitCall);
6578OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6579 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6608 "copyin.not.master.end");
6615 Builder.SetInsertPoint(OMP_Entry);
6616 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6617 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6618 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6619 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6621 Builder.SetInsertPoint(CopyBegin);
6623 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6625 return Builder.saveIP();
6628CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6632 updateToLocation(
Loc);
6635 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6636 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6637 Value *ThreadId = getOrCreateThreadID(Ident);
6640 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6642 return Builder.CreateCall(Fn, Args, Name);
6645CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6649 updateToLocation(
Loc);
6652 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6653 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6654 Value *ThreadId = getOrCreateThreadID(Ident);
6656 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6657 return Builder.CreateCall(Fn, Args, Name);
6660CallInst *OpenMPIRBuilder::createOMPInteropInit(
6661 const LocationDescription &
Loc,
Value *InteropVar,
6663 Value *DependenceAddress,
bool HaveNowaitClause) {
6665 updateToLocation(
Loc);
6668 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6669 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6670 Value *ThreadId = getOrCreateThreadID(Ident);
6671 if (Device ==
nullptr)
6673 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6674 if (NumDependences ==
nullptr) {
6675 NumDependences = ConstantInt::get(
Int32, 0);
6679 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6681 Ident, ThreadId, InteropVar, InteropTypeVal,
6682 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6684 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6686 return Builder.CreateCall(Fn, Args);
6689CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6690 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6691 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6693 updateToLocation(
Loc);
6696 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6697 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6698 Value *ThreadId = getOrCreateThreadID(Ident);
6699 if (Device ==
nullptr)
6701 if (NumDependences ==
nullptr) {
6702 NumDependences = ConstantInt::get(
Int32, 0);
6706 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6708 Ident, ThreadId, InteropVar,
Device,
6709 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6711 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6713 return Builder.CreateCall(Fn, Args);
6716CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6718 Value *NumDependences,
6719 Value *DependenceAddress,
6720 bool HaveNowaitClause) {
6722 updateToLocation(
Loc);
6724 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6725 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6726 Value *ThreadId = getOrCreateThreadID(Ident);
6727 if (Device ==
nullptr)
6729 if (NumDependences ==
nullptr) {
6730 NumDependences = ConstantInt::get(
Int32, 0);
6734 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6736 Ident, ThreadId, InteropVar,
Device,
6737 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6739 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6741 return Builder.CreateCall(Fn, Args);
6744CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6748 updateToLocation(
Loc);
6751 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6752 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6753 Value *ThreadId = getOrCreateThreadID(Ident);
6755 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6759 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6761 return Builder.CreateCall(Fn, Args);
6764OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6765 const LocationDescription &
Loc,
6766 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6768 "expected num_threads and num_teams to be specified");
6770 if (!updateToLocation(
Loc))
6774 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6775 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6787 const std::string DebugPrefix =
"_debug__";
6788 if (KernelName.
ends_with(DebugPrefix)) {
6789 KernelName = KernelName.
drop_back(DebugPrefix.length());
6790 Kernel = M.getFunction(KernelName);
6796 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6801 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6802 if (MaxThreadsVal < 0)
6803 MaxThreadsVal = std::max(
6806 if (MaxThreadsVal > 0)
6807 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6818 Function *Fn = getOrCreateRuntimeFunctionPtr(
6819 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6822 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6823 Constant *DynamicEnvironmentInitializer =
6827 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6829 DL.getDefaultGlobalsAddressSpace());
6833 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6834 ? DynamicEnvironmentGV
6836 DynamicEnvironmentPtr);
6839 ConfigurationEnvironment, {
6840 UseGenericStateMachineVal,
6841 MayUseNestedParallelismVal,
6848 ReductionBufferLength,
6851 KernelEnvironment, {
6852 ConfigurationEnvironmentInitializer,
6856 std::string KernelEnvironmentName =
6857 (KernelName +
"_kernel_environment").str();
6860 KernelEnvironmentInitializer, KernelEnvironmentName,
6862 DL.getDefaultGlobalsAddressSpace());
6866 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6867 ? KernelEnvironmentGV
6869 KernelEnvironmentPtr);
6870 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6872 KernelLaunchEnvironment =
6873 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6874 ? KernelLaunchEnvironment
6875 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6876 KernelLaunchEnvParamTy);
6878 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6880 Value *ExecUserCode = Builder.CreateICmpEQ(
6890 auto *UI = Builder.CreateUnreachable();
6896 Builder.SetInsertPoint(WorkerExitBB);
6897 Builder.CreateRetVoid();
6900 Builder.SetInsertPoint(CheckBBTI);
6901 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6904 UI->eraseFromParent();
6911void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6912 int32_t TeamsReductionDataSize,
6913 int32_t TeamsReductionBufferLength) {
6914 if (!updateToLocation(
Loc))
6917 Function *Fn = getOrCreateRuntimeFunctionPtr(
6918 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6920 Builder.CreateCall(Fn, {});
6922 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6928 const std::string DebugPrefix =
"_debug__";
6930 KernelName = KernelName.
drop_back(DebugPrefix.length());
6931 auto *KernelEnvironmentGV =
6932 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6933 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6934 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6936 KernelEnvironmentInitializer,
6937 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6939 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6946 if (
Kernel.hasFnAttribute(Name)) {
6947 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6953std::pair<int32_t, int32_t>
6955 int32_t ThreadLimit =
6956 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6959 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6960 if (!Attr.isValid() || !Attr.isStringAttribute())
6961 return {0, ThreadLimit};
6962 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6965 return {0, ThreadLimit};
6966 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6972 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6973 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6974 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6976 return {0, ThreadLimit};
6979void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6982 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6985 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
6993std::pair<int32_t, int32_t>
6996 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7000 int32_t LB, int32_t UB) {
7007 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7010void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7012 if (Config.isTargetDevice()) {
7019 else if (
T.isNVPTX())
7021 else if (
T.isSPIRV())
7028 if (Config.isTargetDevice()) {
7029 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7038Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7043 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7044 "Named kernel already exists?");
7050Error OpenMPIRBuilder::emitTargetRegionFunction(
7051 TargetRegionEntryInfo &EntryInfo,
7052 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7056 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7058 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7062 OutlinedFn = *CBResult;
7064 OutlinedFn =
nullptr;
7070 if (!IsOffloadEntry)
7073 std::string EntryFnIDName =
7074 Config.isTargetDevice()
7075 ? std::string(EntryFnName)
7076 : createPlatformSpecificName({EntryFnName,
"region_id"});
7078 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7079 EntryFnName, EntryFnIDName);
7083Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7084 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7087 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7088 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7089 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7090 OffloadInfoManager.registerTargetRegionEntryInfo(
7091 EntryInfo, EntryAddr, OutlinedFnID,
7092 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7093 return OutlinedFnID;
7096OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7097 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7098 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7099 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7101 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7102 BodyGenTy BodyGenType)>
7105 if (!updateToLocation(
Loc))
7106 return InsertPointTy();
7108 Builder.restoreIP(CodeGenIP);
7110 if (Config.IsTargetDevice.value_or(
false)) {
7112 InsertPointOrErrorTy AfterIP =
7113 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7115 return AfterIP.takeError();
7116 Builder.restoreIP(*AfterIP);
7118 return Builder.saveIP();
7121 bool IsStandAlone = !BodyGenCB;
7122 MapInfosTy *MapInfo;
7126 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7127 InsertPointTy CodeGenIP) ->
Error {
7128 MapInfo = &GenMapInfoCB(Builder.saveIP());
7129 if (
Error Err = emitOffloadingArrays(
7130 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7131 true, DeviceAddrCB))
7134 TargetDataRTArgs RTArgs;
7135 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7138 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7143 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7144 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7148 SrcLocInfo, DeviceID,
7149 PointerNum, RTArgs.BasePointersArray,
7150 RTArgs.PointersArray, RTArgs.SizesArray,
7151 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7152 RTArgs.MappersArray};
7155 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7159 if (
Info.HasNoWait) {
7166 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7169 if (
Info.HasNoWait) {
7173 emitBlock(OffloadContBlock, CurFn,
true);
7174 Builder.restoreIP(Builder.saveIP());
7179 bool RequiresOuterTargetTask =
Info.HasNoWait;
7180 if (!RequiresOuterTargetTask)
7181 cantFail(TaskBodyCB(
nullptr,
nullptr,
7184 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7185 {}, RTArgs,
Info.HasNoWait));
7187 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7188 omp::OMPRTL___tgt_target_data_begin_mapper);
7190 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7192 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7195 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7196 Builder.CreateStore(LI, DeviceMap.second.second);
7203 InsertPointOrErrorTy AfterIP =
7204 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7206 return AfterIP.takeError();
7207 Builder.restoreIP(*AfterIP);
7215 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7216 InsertPointTy CodeGenIP) ->
Error {
7217 InsertPointOrErrorTy AfterIP =
7218 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7220 return AfterIP.takeError();
7221 Builder.restoreIP(*AfterIP);
7226 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7227 TargetDataRTArgs RTArgs;
7228 Info.EmitDebug = !MapInfo->Names.empty();
7229 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7232 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7237 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7238 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7241 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7242 PointerNum, RTArgs.BasePointersArray,
7243 RTArgs.PointersArray, RTArgs.SizesArray,
7244 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7245 RTArgs.MappersArray};
7247 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7249 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7255 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7263 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7264 return BeginThenGen(AllocaIP, Builder.saveIP());
7272 InsertPointOrErrorTy AfterIP =
7273 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7275 return AfterIP.takeError();
7279 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7280 return EndThenGen(AllocaIP, Builder.saveIP());
7283 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7284 return BeginThenGen(AllocaIP, Builder.saveIP());
7290 return Builder.saveIP();
7294OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7295 bool IsGPUDistribute) {
7296 assert((IVSize == 32 || IVSize == 64) &&
7297 "IV size is not compatible with the omp runtime");
7299 if (IsGPUDistribute)
7301 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7302 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7303 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7304 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7306 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7307 : omp::OMPRTL___kmpc_for_static_init_4u)
7308 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7309 : omp::OMPRTL___kmpc_for_static_init_8u);
7311 return getOrCreateRuntimeFunction(M, Name);
7314FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7316 assert((IVSize == 32 || IVSize == 64) &&
7317 "IV size is not compatible with the omp runtime");
7319 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7320 : omp::OMPRTL___kmpc_dispatch_init_4u)
7321 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7322 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7324 return getOrCreateRuntimeFunction(M, Name);
7327FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7329 assert((IVSize == 32 || IVSize == 64) &&
7330 "IV size is not compatible with the omp runtime");
7332 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7333 : omp::OMPRTL___kmpc_dispatch_next_4u)
7334 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7335 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7337 return getOrCreateRuntimeFunction(M, Name);
7340FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7342 assert((IVSize == 32 || IVSize == 64) &&
7343 "IV size is not compatible with the omp runtime");
7345 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7346 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7347 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7348 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7350 return getOrCreateRuntimeFunction(M, Name);
7354 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7359 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7367 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7371 if (NewVar && (arg == NewVar->
getArg()))
7381 auto UpdateDebugRecord = [&](
auto *DR) {
7384 for (
auto Loc : DR->location_ops()) {
7385 auto Iter = ValueReplacementMap.find(
Loc);
7386 if (Iter != ValueReplacementMap.end()) {
7387 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7388 ArgNo = std::get<1>(Iter->second) + 1;
7392 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7399 "Unexpected debug intrinsic");
7401 UpdateDebugRecord(&DVR);
7404 if (OMPBuilder.Config.isTargetDevice()) {
7406 Module *M = Func->getParent();
7409 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7411 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7412 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7414 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7427 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7429 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7430 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7432 if (OMPBuilder.Config.isTargetDevice()) {
7440 for (
auto &Arg : Inputs)
7445 for (
auto &Arg : Inputs)
7449 auto BB = Builder.GetInsertBlock();
7461 if (TargetCpuAttr.isStringAttribute())
7462 Func->addFnAttr(TargetCpuAttr);
7464 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7465 if (TargetFeaturesAttr.isStringAttribute())
7466 Func->addFnAttr(TargetFeaturesAttr);
7468 if (OMPBuilder.Config.isTargetDevice()) {
7470 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7471 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7482 Builder.SetInsertPoint(EntryBB);
7485 if (OMPBuilder.Config.isTargetDevice())
7486 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7488 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7493 if (OMPBuilder.Config.isTargetDevice())
7494 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7498 splitBB(Builder,
true,
"outlined.body");
7499 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7501 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7503 return AfterIP.takeError();
7504 Builder.restoreIP(*AfterIP);
7505 if (OMPBuilder.Config.isTargetDevice())
7506 OMPBuilder.createTargetDeinit(Builder);
7509 Builder.CreateRetVoid();
7513 auto AllocaIP = Builder.saveIP();
7518 const auto &ArgRange =
7519 OMPBuilder.Config.isTargetDevice()
7520 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7553 if (Instr->getFunction() == Func)
7554 Instr->replaceUsesOfWith(
Input, InputCopy);
7560 for (
auto InArg :
zip(Inputs, ArgRange)) {
7562 Argument &Arg = std::get<1>(InArg);
7563 Value *InputCopy =
nullptr;
7565 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7566 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7568 return AfterIP.takeError();
7569 Builder.restoreIP(*AfterIP);
7570 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7590 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7597 ReplaceValue(
Input, InputCopy, Func);
7601 for (
auto Deferred : DeferredReplacement)
7602 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7605 ValueReplacementMap);
7613 Value *TaskWithPrivates,
7614 Type *TaskWithPrivatesTy) {
7616 Type *TaskTy = OMPIRBuilder.Task;
7619 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7620 Value *Shareds = TaskT;
7630 if (TaskWithPrivatesTy != TaskTy)
7631 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7648 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7653 assert((!NumOffloadingArrays || PrivatesTy) &&
7654 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7657 Module &M = OMPBuilder.M;
7681 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7687 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7688 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7694 ".omp_target_task_proxy_func",
7695 Builder.GetInsertBlock()->getModule());
7696 Value *ThreadId = ProxyFn->getArg(0);
7697 Value *TaskWithPrivates = ProxyFn->getArg(1);
7698 ThreadId->
setName(
"thread.id");
7699 TaskWithPrivates->
setName(
"task");
7701 bool HasShareds = SharedArgsOperandNo > 0;
7702 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7705 Builder.SetInsertPoint(EntryBB);
7711 if (HasOffloadingArrays) {
7712 assert(TaskTy != TaskWithPrivatesTy &&
7713 "If there are offloading arrays to pass to the target"
7714 "TaskTy cannot be the same as TaskWithPrivatesTy");
7717 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7718 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7720 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7724 auto *ArgStructAlloca =
7726 assert(ArgStructAlloca &&
7727 "Unable to find the alloca instruction corresponding to arguments "
7728 "for extracted function");
7732 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7734 Value *SharedsSize =
7735 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7738 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7740 Builder.CreateMemCpy(
7741 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7743 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7745 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7746 Builder.CreateRetVoid();
7752 return GEP->getSourceElementType();
7754 return Alloca->getAllocatedType();
7777 if (OffloadingArraysToPrivatize.
empty())
7778 return OMPIRBuilder.Task;
7781 for (
Value *V : OffloadingArraysToPrivatize) {
7782 assert(V->getType()->isPointerTy() &&
7783 "Expected pointer to array to privatize. Got a non-pointer value "
7786 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7792 "struct.task_with_privates");
7795 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7796 TargetRegionEntryInfo &EntryInfo,
7797 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7800 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7801 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7803 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7806 EntryFnName, Inputs, CBFunc,
7810 return OMPBuilder.emitTargetRegionFunction(
7811 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7815OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7816 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7817 OpenMPIRBuilder::InsertPointTy AllocaIP,
7819 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7943 splitBB(Builder,
true,
"target.task.body");
7945 splitBB(Builder,
true,
"target.task.alloca");
7947 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7948 TargetTaskAllocaBB->
begin());
7949 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7952 OI.EntryBB = TargetTaskAllocaBB;
7953 OI.OuterAllocaBB = AllocaIP.getBlock();
7958 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7961 Builder.restoreIP(TargetTaskBodyIP);
7962 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7976 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7980 bool NeedsTargetTask = HasNoWait && DeviceID;
7981 if (NeedsTargetTask) {
7983 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7984 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7985 RTArgs.SizesArray}) {
7987 OffloadingArraysToPrivatize.
push_back(V);
7988 OI.ExcludeArgsFromAggregate.push_back(V);
7992 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7993 DeviceID, OffloadingArraysToPrivatize](
7996 "there must be a single user for the outlined function");
8010 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8011 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8013 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8014 "Wrong number of arguments for StaleCI when shareds are present");
8015 int SharedArgOperandNo =
8016 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8022 if (!OffloadingArraysToPrivatize.
empty())
8027 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8028 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8030 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8033 Builder.SetInsertPoint(StaleCI);
8038 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8039 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8048 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8049 : getOrCreateRuntimeFunctionPtr(
8050 OMPRTL___kmpc_omp_target_task_alloc);
8054 Value *ThreadID = getOrCreateThreadID(Ident);
8061 Value *TaskSize = Builder.getInt64(
8062 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8067 Value *SharedsSize = Builder.getInt64(0);
8069 auto *ArgStructAlloca =
8071 assert(ArgStructAlloca &&
8072 "Unable to find the alloca instruction corresponding to arguments "
8073 "for extracted function");
8074 auto *ArgStructType =
8076 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8077 "arguments for extracted function");
8079 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8088 Value *Flags = Builder.getInt32(0);
8098 TaskSize, SharedsSize,
8101 if (NeedsTargetTask) {
8102 assert(DeviceID &&
"Expected non-empty device ID.");
8106 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8112 *
this, Builder, TaskData, TaskWithPrivatesTy);
8113 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8116 if (!OffloadingArraysToPrivatize.
empty()) {
8118 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8119 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8120 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8127 "ElementType should match ArrayType");
8130 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8131 Builder.CreateMemCpy(
8132 Dst, Alignment, PtrToPrivatize, Alignment,
8133 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8147 if (!NeedsTargetTask) {
8150 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8154 Builder.getInt32(Dependencies.size()),
8156 ConstantInt::get(Builder.getInt32Ty(), 0),
8162 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8164 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8165 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8166 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8168 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8169 }
else if (DepArray) {
8174 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8177 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8178 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8182 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8183 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8188 I->eraseFromParent();
8190 addOutlineInfo(std::move(OI));
8193 << *(Builder.GetInsertBlock()) <<
"\n");
8195 << *(Builder.GetInsertBlock()->getParent()->getParent())
8197 return Builder.saveIP();
8200Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8201 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8202 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8203 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8206 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8207 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8209 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8215 OpenMPIRBuilder::InsertPointTy AllocaIP,
8216 OpenMPIRBuilder::TargetDataInfo &
Info,
8217 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8218 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8221 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8222 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8228 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8229 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8230 Builder.restoreIP(IP);
8231 Builder.CreateCall(OutlinedFn, Args);
8232 return Builder.saveIP();
8235 bool HasDependencies = Dependencies.
size() > 0;
8236 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8238 OpenMPIRBuilder::TargetKernelArgs KArgs;
8245 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8253 if (OutlinedFnID && DeviceID)
8254 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8255 EmitTargetCallFallbackCB, KArgs,
8256 DeviceID, RTLoc, TargetTaskAllocaIP);
8264 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8267 OMPBuilder.Builder.restoreIP(AfterIP);
8271 auto &&EmitTargetCallElse =
8272 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8273 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8276 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8277 if (RequiresOuterTargetTask) {
8281 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8282 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8284 Dependencies, EmptyRTArgs, HasNoWait);
8286 return EmitTargetCallFallbackCB(Builder.saveIP());
8289 Builder.restoreIP(AfterIP);
8293 auto &&EmitTargetCallThen =
8294 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8295 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8296 Info.HasNoWait = HasNoWait;
8297 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8298 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8299 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8300 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8307 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8308 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8313 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8315 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8319 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8322 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8330 Value *MaxThreadsClause =
8331 RuntimeAttrs.TeamsThreadLimit.size() == 1
8332 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8335 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8336 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8337 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8338 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8340 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8341 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8343 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8346 unsigned NumTargetItems =
Info.NumberOfPtrs;
8350 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8351 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8354 Value *TripCount = RuntimeAttrs.LoopTripCount
8355 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8356 Builder.getInt64Ty(),
8358 : Builder.getInt64(0);
8361 Value *DynCGGroupMem = Builder.getInt32(0);
8363 KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
8364 NumTeamsC, NumThreadsC,
8365 DynCGGroupMem, HasNoWait);
8369 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8372 if (RequiresOuterTargetTask)
8373 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8374 Dependencies, KArgs.RTArgs,
8377 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8378 EmitTargetCallFallbackCB, KArgs,
8379 DeviceID, RTLoc, AllocaIP);
8382 Builder.restoreIP(AfterIP);
8389 if (!OutlinedFnID) {
8390 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8396 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8400 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8401 EmitTargetCallElse, AllocaIP));
8404OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8405 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8406 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8407 TargetRegionEntryInfo &EntryInfo,
8408 const TargetKernelDefaultAttrs &DefaultAttrs,
8409 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8411 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8412 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8413 CustomMapperCallbackTy CustomMapperCB,
8416 if (!updateToLocation(
Loc))
8417 return InsertPointTy();
8419 Builder.restoreIP(CodeGenIP);
8427 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8428 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8434 if (!Config.isTargetDevice())
8436 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8437 CustomMapperCB, Dependencies, HasNowait);
8438 return Builder.saveIP();
8451 return OS.
str().str();
8456 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8457 Config.separator());
8461OpenMPIRBuilder::getOrCreateInternalVariable(
Type *Ty,
const StringRef &Name,
8463 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8465 assert(Elem.second->getValueType() == Ty &&
8466 "OMP internal variable has different type than requested");
8482 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8489Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8490 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8491 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8492 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8495Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8500 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8502 return SizePtrToInt;
8507 std::string VarName) {
8511 M, MaptypesArrayInit->
getType(),
8515 return MaptypesArrayGlobal;
8518void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8519 InsertPointTy AllocaIP,
8520 unsigned NumOperands,
8521 struct MapperAllocas &MapperAllocas) {
8522 if (!updateToLocation(
Loc))
8527 Builder.restoreIP(AllocaIP);
8529 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8533 ArrI64Ty,
nullptr,
".offload_sizes");
8534 updateToLocation(
Loc);
8535 MapperAllocas.ArgsBase = ArgsBase;
8536 MapperAllocas.Args =
Args;
8537 MapperAllocas.ArgSizes = ArgSizes;
8540void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8543 struct MapperAllocas &MapperAllocas,
8544 int64_t DeviceID,
unsigned NumOperands) {
8545 if (!updateToLocation(
Loc))
8550 Value *ArgsBaseGEP =
8551 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8552 {Builder.getInt32(0), Builder.getInt32(0)});
8554 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8555 {Builder.getInt32(0), Builder.getInt32(0)});
8556 Value *ArgSizesGEP =
8557 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8558 {Builder.getInt32(0), Builder.getInt32(0)});
8561 Builder.CreateCall(MapperFunc,
8562 {SrcLocInfo, Builder.getInt64(DeviceID),
8563 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8564 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8567void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8568 TargetDataRTArgs &RTArgs,
8569 TargetDataInfo &
Info,
8571 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8572 "expected region end call to runtime only when end call is separate");
8574 auto VoidPtrTy = UnqualPtrTy;
8575 auto VoidPtrPtrTy = UnqualPtrTy;
8577 auto Int64PtrTy = UnqualPtrTy;
8579 if (!
Info.NumberOfPtrs) {
8589 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8591 Info.RTArgs.BasePointersArray,
8593 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8597 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8600 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8602 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8603 :
Info.RTArgs.MapTypesArray,
8609 if (!
Info.EmitDebug)
8612 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8618 if (!
Info.HasMapper)
8621 RTArgs.MappersArray =
8622 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8625void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8626 InsertPointTy CodeGenIP,
8627 MapInfosTy &CombinedInfo,
8628 TargetDataInfo &
Info) {
8629 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8630 CombinedInfo.NonContigInfo;
8643 "struct.descriptor_dim");
8645 enum { OffsetFD = 0, CountFD, StrideFD };
8649 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8652 if (NonContigInfo.Dims[
I] == 1)
8654 Builder.restoreIP(AllocaIP);
8657 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8658 Builder.restoreIP(CodeGenIP);
8659 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8660 unsigned RevIdx = EE -
II - 1;
8661 Value *DimsLVal = Builder.CreateInBoundsGEP(
8663 {Builder.getInt64(0), Builder.getInt64(II)});
8665 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8666 Builder.CreateAlignedStore(
8667 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8668 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8670 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8671 Builder.CreateAlignedStore(
8672 NonContigInfo.Counts[L][RevIdx], CountLVal,
8673 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8675 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8676 Builder.CreateAlignedStore(
8677 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8678 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8681 Builder.restoreIP(CodeGenIP);
8682 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8683 DimsAddr, Builder.getPtrTy());
8684 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8686 Info.RTArgs.PointersArray, 0,
I);
8687 Builder.CreateAlignedStore(
8688 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8693void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8701 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8703 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8704 Value *DeleteBit = Builder.CreateAnd(
8707 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8708 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8713 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8715 Value *PtrAndObjBit = Builder.CreateAnd(
8718 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8719 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8720 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8721 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8722 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8723 DeleteCond = Builder.CreateIsNull(
8725 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8728 DeleteCond = Builder.CreateIsNotNull(
8730 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8732 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8733 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8735 emitBlock(BodyBB, MapperFn);
8738 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8741 Value *MapTypeArg = Builder.CreateAnd(
8744 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8745 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8746 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8747 MapTypeArg = Builder.CreateOr(
8750 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8751 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8755 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8756 ArraySize, MapTypeArg, MapName};
8758 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8766 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8782 MapperFn->
addFnAttr(Attribute::NoInline);
8783 MapperFn->
addFnAttr(Attribute::NoUnwind);
8793 auto SavedIP = Builder.saveIP();
8794 Builder.SetInsertPoint(EntryBB);
8806 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8807 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8808 Value *PtrBegin = BeginIn;
8809 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8814 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8815 MapType, MapName, ElementSize, HeadBB,
8821 emitBlock(HeadBB, MapperFn);
8826 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8827 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8830 emitBlock(BodyBB, MapperFn);
8833 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8837 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8839 return Info.takeError();
8843 Value *OffloadingArgs[] = {MapperHandle};
8844 Value *PreviousSize = Builder.CreateCall(
8845 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8847 Value *ShiftedPreviousSize =
8848 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8851 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8860 Value *OriMapType = Builder.getInt64(
8861 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8863 Value *MemberMapType =
8864 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8878 Value *LeftToFrom = Builder.CreateAnd(
8881 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8882 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8883 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8892 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8893 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8895 emitBlock(AllocBB, MapperFn);
8896 Value *AllocMapType = Builder.CreateAnd(
8899 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8900 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8901 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8902 Builder.CreateBr(EndBB);
8903 emitBlock(AllocElseBB, MapperFn);
8904 Value *IsTo = Builder.CreateICmpEQ(
8907 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8908 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8909 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8911 emitBlock(ToBB, MapperFn);
8912 Value *ToMapType = Builder.CreateAnd(
8915 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8916 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8917 Builder.CreateBr(EndBB);
8918 emitBlock(ToElseBB, MapperFn);
8919 Value *IsFrom = Builder.CreateICmpEQ(
8922 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8923 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8924 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8926 emitBlock(FromBB, MapperFn);
8927 Value *FromMapType = Builder.CreateAnd(
8930 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8931 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8933 emitBlock(EndBB, MapperFn);
8936 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8942 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8943 CurSizeArg, CurMapType, CurNameArg};
8945 auto ChildMapperFn = CustomMapperCB(
I);
8947 return ChildMapperFn.takeError();
8948 if (*ChildMapperFn) {
8950 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8955 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8962 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8963 "omp.arraymap.next");
8965 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8967 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8969 emitBlock(ExitBB, MapperFn);
8972 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8973 MapType, MapName, ElementSize, DoneBB,
8977 emitBlock(DoneBB, MapperFn,
true);
8979 Builder.CreateRetVoid();
8980 Builder.restoreIP(SavedIP);
8984Error OpenMPIRBuilder::emitOffloadingArrays(
8985 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
8986 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
8987 bool IsNonContiguous,
8991 Info.clearArrayInfo();
8992 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8994 if (
Info.NumberOfPtrs == 0)
8997 Builder.restoreIP(AllocaIP);
9003 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9004 PointerArrayType,
nullptr,
".offload_baseptrs");
9006 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9007 PointerArrayType,
nullptr,
".offload_ptrs");
9008 AllocaInst *MappersArray = Builder.CreateAlloca(
9009 PointerArrayType,
nullptr,
".offload_mappers");
9010 Info.RTArgs.MappersArray = MappersArray;
9017 ConstantInt::get(Int64Ty, 0));
9019 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9022 if (IsNonContiguous &&
9023 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9024 CombinedInfo.Types[
I] &
9025 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9027 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9033 RuntimeSizes.set(
I);
9036 if (RuntimeSizes.all()) {
9038 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9039 SizeArrayType,
nullptr,
".offload_sizes");
9044 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9045 auto *SizesArrayGbl =
9050 if (!RuntimeSizes.any()) {
9051 Info.RTArgs.SizesArray = SizesArrayGbl;
9053 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9054 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9057 SizeArrayType,
nullptr,
".offload_sizes");
9060 Builder.CreateMemCpy(
9061 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9062 SizesArrayGbl, OffloadSizeAlign,
9067 Info.RTArgs.SizesArray = Buffer;
9075 for (
auto mapFlag : CombinedInfo.Types)
9077 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9079 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9080 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9081 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9084 if (!CombinedInfo.Names.empty()) {
9085 auto *MapNamesArrayGbl = createOffloadMapnames(
9086 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9087 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9088 Info.EmitDebug =
true;
9090 Info.RTArgs.MapNamesArray =
9092 Info.EmitDebug =
false;
9097 if (
Info.separateBeginEndCalls()) {
9098 bool EndMapTypesDiffer =
false;
9100 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9101 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9102 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9103 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9104 EndMapTypesDiffer =
true;
9107 if (EndMapTypesDiffer) {
9108 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9109 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9114 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9115 Value *BPVal = CombinedInfo.BasePointers[
I];
9116 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9119 Builder.CreateAlignedStore(BPVal, BP,
9120 M.getDataLayout().getPrefTypeAlign(PtrTy));
9122 if (
Info.requiresDevicePointerInfo()) {
9123 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9124 CodeGenIP = Builder.saveIP();
9125 Builder.restoreIP(AllocaIP);
9126 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9127 Builder.restoreIP(CodeGenIP);
9129 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9130 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9131 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9133 DeviceAddrCB(
I, BP);
9137 Value *PVal = CombinedInfo.Pointers[
I];
9138 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9142 Builder.CreateAlignedStore(PVal,
P,
9143 M.getDataLayout().getPrefTypeAlign(PtrTy));
9145 if (RuntimeSizes.test(
I)) {
9146 Value *S = Builder.CreateConstInBoundsGEP2_32(
9150 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9153 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9156 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9159 auto CustomMFunc = CustomMapperCB(
I);
9161 return CustomMFunc.takeError();
9163 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9165 Value *MAddr = Builder.CreateInBoundsGEP(
9167 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9168 Builder.CreateAlignedStore(
9169 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9172 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9173 Info.NumberOfPtrs == 0)
9175 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9180 BasicBlock *CurBB = Builder.GetInsertBlock();
9187 Builder.CreateBr(
Target);
9190 Builder.ClearInsertionPoint();
9195 BasicBlock *CurBB = Builder.GetInsertBlock();
9211 Builder.SetInsertPoint(BB);
9214Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9215 BodyGenCallbackTy ElseGen,
9216 InsertPointTy AllocaIP) {
9220 auto CondConstant = CI->getSExtValue();
9222 return ThenGen(AllocaIP, Builder.saveIP());
9224 return ElseGen(AllocaIP, Builder.saveIP());
9234 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9236 emitBlock(ThenBlock, CurFn);
9237 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9239 emitBranch(ContBlock);
9242 emitBlock(ElseBlock, CurFn);
9243 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9246 emitBranch(ContBlock);
9248 emitBlock(ContBlock, CurFn,
true);
9252bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9256 "Unexpected Atomic Ordering.");
9313OpenMPIRBuilder::InsertPointTy
9314OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9315 AtomicOpValue &
X, AtomicOpValue &V,
9317 if (!updateToLocation(
Loc))
9320 assert(
X.Var->getType()->isPointerTy() &&
9321 "OMP Atomic expects a pointer to target memory");
9322 Type *XElemTy =
X.ElemTy;
9325 "OMP atomic read expected a scalar type");
9327 Value *XRead =
nullptr;
9331 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9337 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9342 OpenMPIRBuilder::AtomicInfo atomicInfo(
9343 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9344 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9345 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9346 XRead = AtomicLoadRes.first;
9353 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9356 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9358 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9361 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9362 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9363 return Builder.saveIP();
9366OpenMPIRBuilder::InsertPointTy
9367OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9368 AtomicOpValue &
X,
Value *Expr,
9370 if (!updateToLocation(
Loc))
9373 assert(
X.Var->getType()->isPointerTy() &&
9374 "OMP Atomic expects a pointer to target memory");
9375 Type *XElemTy =
X.ElemTy;
9378 "OMP atomic write expected a scalar type");
9381 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9384 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9388 OpenMPIRBuilder::AtomicInfo atomicInfo(
9389 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9390 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9391 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9398 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9399 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9403 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9404 return Builder.saveIP();
9407OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9408 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9410 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9411 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9413 if (!updateToLocation(
Loc))
9417 Type *XTy =
X.Var->getType();
9419 "OMP Atomic expects a pointer to target memory");
9420 Type *XElemTy =
X.ElemTy;
9423 "OMP atomic update expected a scalar type");
9426 "OpenMP atomic does not support LT or GT operations");
9430 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9431 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9433 return AtomicResult.takeError();
9434 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9435 return Builder.saveIP();
9439Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9443 return Builder.CreateAdd(Src1, Src2);
9445 return Builder.CreateSub(Src1, Src2);
9447 return Builder.CreateAnd(Src1, Src2);
9449 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9451 return Builder.CreateOr(Src1, Src2);
9453 return Builder.CreateXor(Src1, Src2);
9478 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9479 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9482 bool emitRMWOp =
false;
9490 emitRMWOp = XElemTy;
9493 emitRMWOp = (IsXBinopExpr && XElemTy);
9500 std::pair<Value *, Value *> Res;
9505 if (IsIgnoreDenormalMode)
9506 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9508 if (!IsFineGrainedMemory)
9509 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9511 if (!IsRemoteMemory)
9515 Res.first = RMWInst;
9520 Res.second = Res.first;
9522 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9526 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9532 OpenMPIRBuilder::AtomicInfo atomicInfo(
9533 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9534 OldVal->
getAlign(),
true , AllocaIP,
X);
9535 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9536 BasicBlock *CurBB = Builder.GetInsertBlock();
9538 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9542 X->getName() +
".atomic.cont");
9544 Builder.restoreIP(AllocaIP);
9545 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9546 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9547 Builder.SetInsertPoint(ContBB);
9549 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9554 Value *Upd = *CBResult;
9555 Builder.CreateStore(Upd, NewAtomicAddr);
9558 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9559 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9561 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9562 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9564 Res.first = OldExprVal;
9570 Builder.SetInsertPoint(ExitBB);
9572 Builder.SetInsertPoint(ExitTI);
9578 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9585 BasicBlock *CurBB = Builder.GetInsertBlock();
9587 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9591 X->getName() +
".atomic.cont");
9593 Builder.restoreIP(AllocaIP);
9594 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9595 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9596 Builder.SetInsertPoint(ContBB);
9598 PHI->addIncoming(OldVal, CurBB);
9603 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9604 X->getName() +
".atomic.fltCast");
9606 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9607 X->getName() +
".atomic.ptrCast");
9614 Value *Upd = *CBResult;
9615 Builder.CreateStore(Upd, NewAtomicAddr);
9616 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9621 Result->setVolatile(VolatileX);
9622 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9623 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9624 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9625 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9627 Res.first = OldExprVal;
9634 Builder.SetInsertPoint(ExitBB);
9636 Builder.SetInsertPoint(ExitTI);
9643OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9644 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9647 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9648 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9649 if (!updateToLocation(
Loc))
9653 Type *XTy =
X.Var->getType();
9655 "OMP Atomic expects a pointer to target memory");
9656 Type *XElemTy =
X.ElemTy;
9659 "OMP atomic capture expected a scalar type");
9661 "OpenMP atomic does not support LT or GT operations");
9668 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9669 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9672 Value *CapturedVal =
9673 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9674 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9676 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9677 return Builder.saveIP();
9680OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9681 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9687 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9688 IsPostfixUpdate, IsFailOnly, Failure);
9691OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9692 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9697 if (!updateToLocation(
Loc))
9700 assert(
X.Var->getType()->isPointerTy() &&
9701 "OMP atomic expects a pointer to target memory");
9704 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9705 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9708 bool IsInteger =
E->getType()->isIntegerTy();
9710 if (
Op == OMPAtomicCompareOp::EQ) {
9715 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9716 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9721 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9725 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9727 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9729 "OldValue and V must be of same type");
9730 if (IsPostfixUpdate) {
9731 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9733 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9744 BasicBlock *CurBB = Builder.GetInsertBlock();
9746 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9748 CurBBTI,
X.Var->getName() +
".atomic.exit");
9754 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9756 Builder.SetInsertPoint(ContBB);
9757 Builder.CreateStore(OldValue, V.Var);
9758 Builder.CreateBr(ExitBB);
9763 Builder.SetInsertPoint(ExitBB);
9765 Builder.SetInsertPoint(ExitTI);
9768 Value *CapturedValue =
9769 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9770 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9776 assert(
R.Var->getType()->isPointerTy() &&
9777 "r.var must be of pointer type");
9778 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9780 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9781 Value *ResultCast =
R.IsSigned
9782 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9783 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9784 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9787 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9788 "Op should be either max or min at this point");
9789 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9827 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9829 Value *CapturedValue =
nullptr;
9830 if (IsPostfixUpdate) {
9831 CapturedValue = OldValue;
9856 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9857 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9859 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9863 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9865 return Builder.saveIP();
9868OpenMPIRBuilder::InsertPointOrErrorTy
9869OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9870 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9873 if (!updateToLocation(
Loc))
9874 return InsertPointTy();
9877 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9878 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9883 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9884 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9885 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9905 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9906 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9908 splitBB(Builder,
true,
"teams.alloca");
9910 bool SubClausesPresent =
9911 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9913 if (!Config.isTargetDevice() && SubClausesPresent) {
9914 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9915 "if lowerbound is non-null, then upperbound must also be non-null "
9916 "for bounds on num_teams");
9918 if (NumTeamsUpper ==
nullptr)
9919 NumTeamsUpper = Builder.getInt32(0);
9921 if (NumTeamsLower ==
nullptr)
9922 NumTeamsLower = NumTeamsUpper;
9926 "argument to if clause must be an integer value");
9930 IfExpr = Builder.CreateICmpNE(IfExpr,
9931 ConstantInt::get(IfExpr->
getType(), 0));
9932 NumTeamsUpper = Builder.CreateSelect(
9933 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9936 NumTeamsLower = Builder.CreateSelect(
9937 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9940 if (ThreadLimit ==
nullptr)
9941 ThreadLimit = Builder.getInt32(0);
9943 Value *ThreadNum = getOrCreateThreadID(Ident);
9945 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9946 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9949 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9950 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9951 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9955 OI.EntryBB = AllocaBB;
9957 OI.OuterAllocaBB = &OuterAllocaBB;
9961 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9963 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9965 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9967 auto HostPostOutlineCB = [
this, Ident,
9968 ToBeDeleted](
Function &OutlinedFn)
mutable {
9973 "there must be a single user for the outlined function");
9978 "Outlined function must have two or three arguments only");
9980 bool HasShared = OutlinedFn.
arg_size() == 3;
9988 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9989 "outlined function.");
9990 Builder.SetInsertPoint(StaleCI);
9992 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
9995 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
9996 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10000 I->eraseFromParent();
10003 if (!Config.isTargetDevice())
10004 OI.PostOutlineCB = HostPostOutlineCB;
10006 addOutlineInfo(std::move(OI));
10008 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10010 return Builder.saveIP();
10013OpenMPIRBuilder::InsertPointOrErrorTy
10014OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10015 InsertPointTy OuterAllocaIP,
10016 BodyGenCallbackTy BodyGenCB) {
10017 if (!updateToLocation(
Loc))
10018 return InsertPointTy();
10020 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10022 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10024 splitBB(Builder,
true,
"distribute.entry");
10025 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10028 splitBB(Builder,
true,
"distribute.exit");
10030 splitBB(Builder,
true,
"distribute.body");
10032 splitBB(Builder,
true,
"distribute.alloca");
10035 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10036 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10037 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10042 if (Config.isTargetDevice()) {
10044 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10045 OI.EntryBB = AllocaBB;
10046 OI.ExitBB = ExitBB;
10048 addOutlineInfo(std::move(OI));
10050 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10052 return Builder.saveIP();
10057 std::string VarName) {
10063 M, MapNamesArrayInit->
getType(),
10066 return MapNamesArrayGlobal;
10071void OpenMPIRBuilder::initializeTypes(
Module &M) {
10074 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10075#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10076#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10077 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10078 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10079#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10080 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10081 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10082#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10083 T = StructType::getTypeByName(Ctx, StructName); \
10085 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10087 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10088#include "llvm/Frontend/OpenMP/OMPKinds.def"
10091void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10099 while (!Worklist.
empty()) {
10103 if (
BlockSet.insert(SuccBB).second)
10112 if (!Config.isGPU()) {
10127 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10128 Fn->
addFnAttr(Attribute::MustProgress);
10132void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10133 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10136 if (OffloadInfoManager.empty())
10140 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10141 TargetRegionEntryInfo>,
10143 OrderedEntries(OffloadInfoManager.size());
10146 auto &&GetMDInt = [
this](
unsigned V) {
10153 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10154 auto &&TargetRegionMetadataEmitter =
10155 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10156 const TargetRegionEntryInfo &EntryInfo,
10157 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10170 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10171 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10172 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10173 GetMDInt(
E.getOrder())};
10176 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10182 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10185 auto &&DeviceGlobalVarMetadataEmitter =
10186 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10188 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10196 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10197 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10200 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10201 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10207 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10208 DeviceGlobalVarMetadataEmitter);
10210 for (
const auto &
E : OrderedEntries) {
10211 assert(
E.first &&
"All ordered entries must exist!");
10212 if (
const auto *CE =
10215 if (!
CE->getID() || !
CE->getAddress()) {
10217 TargetRegionEntryInfo EntryInfo =
E.second;
10218 StringRef FnName = EntryInfo.ParentName;
10219 if (!M.getNamedValue(FnName))
10221 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10224 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10227 }
else if (
const auto *CE =
dyn_cast<
10228 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10230 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10231 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10234 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10235 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10236 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10238 if (!
CE->getAddress()) {
10239 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10243 if (
CE->getVarSize() == 0)
10246 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10247 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10248 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10249 "Declaret target link address is set.");
10250 if (Config.isTargetDevice())
10252 if (!
CE->getAddress()) {
10253 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10265 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10266 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10271 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10272 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10273 Flags,
CE->getLinkage(),
CE->getVarName());
10275 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10276 Flags,
CE->getLinkage());
10287 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10292 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10293 Config.getRequiresFlags());
10296void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10298 unsigned FileID,
unsigned Line,
unsigned Count) {
10300 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10301 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10303 OS <<
"_" <<
Count;
10306void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10308 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10309 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10310 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10311 EntryInfo.Line, NewCount);
10314TargetRegionEntryInfo
10315OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10319 auto FileIDInfo = CallBack();
10323 FileID =
Status->getUniqueID().getFile();
10327 FileID =
hash_value(std::get<0>(FileIDInfo));
10330 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10331 std::get<1>(FileIDInfo));
10334unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10337 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10339 !(Remain & 1); Remain = Remain >> 1)
10345OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10348 << getFlagMemberOffset());
10351void OpenMPIRBuilder::setCorrectMemberOfFlag(
10357 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10359 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10366 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10367 Flags |= MemberOfFlag;
10370Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10371 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10372 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10373 bool IsDeclaration,
bool IsExternallyVisible,
10374 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10375 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10376 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10377 std::function<
Constant *()> GlobalInitializer,
10384 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10385 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10387 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10388 Config.hasRequiresUnifiedSharedMemory())) {
10393 if (!IsExternallyVisible)
10394 OS <<
format(
"_%x", EntryInfo.FileID);
10395 OS <<
"_decl_tgt_ref_ptr";
10398 Value *
Ptr = M.getNamedValue(PtrName);
10402 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10407 if (!Config.isTargetDevice()) {
10408 if (GlobalInitializer)
10409 GV->setInitializer(GlobalInitializer());
10414 registerTargetGlobalVariable(
10415 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10416 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10426void OpenMPIRBuilder::registerTargetGlobalVariable(
10427 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10428 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10429 bool IsDeclaration,
bool IsExternallyVisible,
10430 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10431 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10432 std::vector<Triple> TargetTriple,
10433 std::function<
Constant *()> GlobalInitializer,
10436 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10437 (TargetTriple.empty() && !Config.isTargetDevice()))
10440 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10445 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10447 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10448 !Config.hasRequiresUnifiedSharedMemory()) {
10449 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10453 if (!IsDeclaration)
10455 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10462 if (Config.isTargetDevice() &&
10466 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10469 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10471 if (!M.getNamedValue(RefName)) {
10473 getOrCreateInternalVariable(Addr->
getType(), RefName);
10475 GvAddrRef->setConstant(
true);
10477 GvAddrRef->setInitializer(Addr);
10478 GeneratedRefs.push_back(GvAddrRef);
10482 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10483 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10485 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10487 if (Config.isTargetDevice()) {
10491 Addr = getAddrOfDeclareTargetVar(
10492 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10493 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10494 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10497 VarSize = M.getDataLayout().getPointerSize();
10501 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10507void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10511 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10516 auto &&GetMDInt = [MN](
unsigned Idx) {
10521 auto &&GetMDString = [MN](
unsigned Idx) {
10523 return V->getString();
10526 switch (GetMDInt(0)) {
10530 case OffloadEntriesInfoManager::OffloadEntryInfo::
10531 OffloadingEntryInfoTargetRegion: {
10532 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10537 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10541 case OffloadEntriesInfoManager::OffloadEntryInfo::
10542 OffloadingEntryInfoDeviceGlobalVar:
10543 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10545 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10555 if (HostFilePath.
empty())
10559 if (std::error_code Err = Buf.getError()) {
10561 "OpenMPIRBuilder: " +
10569 if (std::error_code Err = M.getError()) {
10571 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10575 loadOffloadInfoMetadata(*M.get());
10582bool OffloadEntriesInfoManager::empty()
const {
10583 return OffloadEntriesTargetRegion.empty() &&
10584 OffloadEntriesDeviceGlobalVar.empty();
10587unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10588 const TargetRegionEntryInfo &EntryInfo)
const {
10589 auto It = OffloadEntriesTargetRegionCount.find(
10590 getTargetRegionEntryCountKey(EntryInfo));
10591 if (It == OffloadEntriesTargetRegionCount.end())
10596void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10597 const TargetRegionEntryInfo &EntryInfo) {
10598 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10599 EntryInfo.Count + 1;
10603void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10604 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10605 OffloadEntriesTargetRegion[EntryInfo] =
10606 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10607 OMPTargetRegionEntryTargetRegion);
10608 ++OffloadingEntriesNum;
10611void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10613 OMPTargetRegionEntryKind Flags) {
10614 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10617 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10621 if (OMPBuilder->Config.isTargetDevice()) {
10623 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10626 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10627 Entry.setAddress(Addr);
10629 Entry.setFlags(Flags);
10631 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10632 hasTargetRegionEntryInfo(EntryInfo,
true))
10634 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10635 "Target region entry already registered!");
10636 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10637 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10638 ++OffloadingEntriesNum;
10640 incrementTargetRegionEntryInfoCount(EntryInfo);
10643bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10644 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10647 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10649 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10650 if (It == OffloadEntriesTargetRegion.end()) {
10654 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10659void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10660 const OffloadTargetRegionEntryInfoActTy &Action) {
10662 for (
const auto &It : OffloadEntriesTargetRegion) {
10663 Action(It.first, It.second);
10667void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10668 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10669 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10670 ++OffloadingEntriesNum;
10673void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10676 if (OMPBuilder->Config.isTargetDevice()) {
10678 if (!hasDeviceGlobalVarEntryInfo(VarName))
10680 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10681 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10682 if (Entry.getVarSize() == 0) {
10683 Entry.setVarSize(VarSize);
10688 Entry.setVarSize(VarSize);
10690 Entry.setAddress(Addr);
10692 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10693 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10694 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10695 "Entry not initialized!");
10696 if (Entry.getVarSize() == 0) {
10697 Entry.setVarSize(VarSize);
10702 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10703 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10704 Addr, VarSize, Flags,
Linkage,
10707 OffloadEntriesDeviceGlobalVar.try_emplace(
10708 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10709 ++OffloadingEntriesNum;
10713void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10714 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10716 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10717 Action(
E.getKey(),
E.getValue());
10724void CanonicalLoopInfo::collectControlBlocks(
10731 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10734BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10743void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10755void CanonicalLoopInfo::mapIndVar(
10765 for (
Use &U : OldIV->
uses()) {
10769 if (
User->getParent() == getCond())
10771 if (
User->getParent() == getLatch())
10777 Value *NewIV = Updater(OldIV);
10780 for (
Use *U : ReplacableUses)
10788void CanonicalLoopInfo::assertOK()
const {
10801 "Preheader must terminate with unconditional branch");
10803 "Preheader must jump to header");
10807 "Header must terminate with unconditional branch");
10808 assert(Header->getSingleSuccessor() ==
Cond &&
10809 "Header must jump to exiting block");
10812 assert(
Cond->getSinglePredecessor() == Header &&
10813 "Exiting block only reachable from header");
10816 "Exiting block must terminate with conditional branch");
10818 "Exiting block must have two successors");
10820 "Exiting block's first successor jump to the body");
10822 "Exiting block's second successor must exit the loop");
10826 "Body only reachable from exiting block");
10831 "Latch must terminate with unconditional branch");
10840 "Exit block must terminate with unconditional branch");
10841 assert(
Exit->getSingleSuccessor() == After &&
10842 "Exit block must jump to after block");
10846 "After block only reachable from exit block");
10850 assert(IndVar &&
"Canonical induction variable not found?");
10852 "Induction variable must be an integer");
10854 "Induction variable must be a PHI in the loop header");
10860 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10867 Value *TripCount = getTripCount();
10868 assert(TripCount &&
"Loop trip count not found?");
10870 "Trip count and induction variable must have the same type");
10874 "Exit condition must be a signed less-than comparison");
10876 "Exit condition must compare the induction variable");
10878 "Exit condition must compare with the trip count");
10882void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...